Michael Yang 1 年間 前
コミット
ad3a7d0e2c
2 ファイル変更3 行追加0 行削除
  1. 2 0
      api/types.go
  2. 1 0
      llama/llama.go

+ 2 - 0
api/types.go

@@ -153,6 +153,7 @@ type Options struct {
 	NumCtx        int  `json:"num_ctx,omitempty"`
 	NumCtx        int  `json:"num_ctx,omitempty"`
 	NumKeep       int  `json:"num_keep,omitempty"`
 	NumKeep       int  `json:"num_keep,omitempty"`
 	NumBatch      int  `json:"num_batch,omitempty"`
 	NumBatch      int  `json:"num_batch,omitempty"`
+	NumGQA        int  `json:"num_gqa,omitempty"`
 	NumGPU        int  `json:"num_gpu,omitempty"`
 	NumGPU        int  `json:"num_gpu,omitempty"`
 	MainGPU       int  `json:"main_gpu,omitempty"`
 	MainGPU       int  `json:"main_gpu,omitempty"`
 	LowVRAM       bool `json:"low_vram,omitempty"`
 	LowVRAM       bool `json:"low_vram,omitempty"`
@@ -190,6 +191,7 @@ func DefaultOptions() Options {
 		NumCtx:   2048,
 		NumCtx:   2048,
 		NumBatch: 1024,
 		NumBatch: 1024,
 		NumGPU:   1,
 		NumGPU:   1,
+		NumGQA:   1,
 		LowVRAM:  false,
 		LowVRAM:  false,
 		F16KV:    true,
 		F16KV:    true,
 		UseMMap:  true,
 		UseMMap:  true,

+ 1 - 0
llama/llama.go

@@ -127,6 +127,7 @@ func New(model string, opts api.Options) (*LLM, error) {
 	params.seed = C.uint(llm.Seed)
 	params.seed = C.uint(llm.Seed)
 	params.n_ctx = C.int(llm.NumCtx)
 	params.n_ctx = C.int(llm.NumCtx)
 	params.n_batch = C.int(llm.NumBatch)
 	params.n_batch = C.int(llm.NumBatch)
+	params.n_gqa = C.int(llm.NumGQA)
 	params.n_gpu_layers = C.int(llm.NumGPU)
 	params.n_gpu_layers = C.int(llm.NumGPU)
 	params.main_gpu = C.int(llm.MainGPU)
 	params.main_gpu = C.int(llm.MainGPU)
 	params.low_vram = C.bool(llm.LowVRAM)
 	params.low_vram = C.bool(llm.LowVRAM)