|
@@ -109,19 +109,19 @@ type Options struct {
|
|
|
|
|
|
// Runner options which must be set when the model is loaded into memory
|
|
|
type Runner struct {
|
|
|
- UseNUMA bool `json:"numa,omitempty"`
|
|
|
- NumCtx int `json:"num_ctx,omitempty"`
|
|
|
- NumBatch int `json:"num_batch,omitempty"`
|
|
|
- NumGQA int `json:"num_gqa,omitempty"`
|
|
|
- NumGPU int `json:"num_gpu,omitempty"`
|
|
|
- MainGPU int `json:"main_gpu,omitempty"`
|
|
|
- LowVRAM bool `json:"low_vram,omitempty"`
|
|
|
- F16KV bool `json:"f16_kv,omitempty"`
|
|
|
- LogitsAll bool `json:"logits_all,omitempty"`
|
|
|
- VocabOnly bool `json:"vocab_only,omitempty"`
|
|
|
- UseMMap bool `json:"use_mmap,omitempty"`
|
|
|
- UseMLock bool `json:"use_mlock,omitempty"`
|
|
|
- NumThread int `json:"num_thread,omitempty"`
|
|
|
+ UseNUMA bool `json:"numa,omitempty"`
|
|
|
+ NumCtx int `json:"num_ctx,omitempty"`
|
|
|
+ NumBatch int `json:"num_batch,omitempty"`
|
|
|
+ NumGQA int `json:"num_gqa,omitempty"`
|
|
|
+ NumGPU int `json:"num_gpu,omitempty"`
|
|
|
+ MainGPU int `json:"main_gpu,omitempty"`
|
|
|
+ LowVRAM bool `json:"low_vram,omitempty"`
|
|
|
+ F16KV bool `json:"f16_kv,omitempty"`
|
|
|
+ LogitsAll bool `json:"logits_all,omitempty"`
|
|
|
+ VocabOnly bool `json:"vocab_only,omitempty"`
|
|
|
+ UseMMap bool `json:"use_mmap,omitempty"`
|
|
|
+ UseMLock bool `json:"use_mlock,omitempty"`
|
|
|
+ NumThread int `json:"num_thread,omitempty"`
|
|
|
}
|
|
|
|
|
|
type EmbeddingRequest struct {
|
|
@@ -137,10 +137,11 @@ type EmbeddingResponse struct {
|
|
|
}
|
|
|
|
|
|
type CreateRequest struct {
|
|
|
- Model string `json:"model"`
|
|
|
- Path string `json:"path"`
|
|
|
- Modelfile string `json:"modelfile"`
|
|
|
- Stream *bool `json:"stream,omitempty"`
|
|
|
+ Model string `json:"model"`
|
|
|
+ Path string `json:"path"`
|
|
|
+ Modelfile string `json:"modelfile"`
|
|
|
+ Stream *bool `json:"stream,omitempty"`
|
|
|
+ Quantization string `json:"quantization,omitempty"`
|
|
|
|
|
|
// Name is deprecated, see Model
|
|
|
Name string `json:"name"`
|
|
@@ -380,16 +381,16 @@ func DefaultOptions() Options {
|
|
|
|
|
|
Runner: Runner{
|
|
|
// options set when the model is loaded
|
|
|
- NumCtx: 2048,
|
|
|
- NumBatch: 512,
|
|
|
- NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
|
|
|
- NumGQA: 1,
|
|
|
- NumThread: 0, // let the runtime decide
|
|
|
- LowVRAM: false,
|
|
|
- F16KV: true,
|
|
|
- UseMLock: false,
|
|
|
- UseMMap: true,
|
|
|
- UseNUMA: false,
|
|
|
+ NumCtx: 2048,
|
|
|
+ NumBatch: 512,
|
|
|
+ NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
|
|
|
+ NumGQA: 1,
|
|
|
+ NumThread: 0, // let the runtime decide
|
|
|
+ LowVRAM: false,
|
|
|
+ F16KV: true,
|
|
|
+ UseMLock: false,
|
|
|
+ UseMMap: true,
|
|
|
+ UseNUMA: false,
|
|
|
},
|
|
|
}
|
|
|
}
|