ソースを参照

configurable rope frequency parameters

Michael Yang 1 年間 前
コミット
b9f4d67554
2 ファイル変更27 行追加21 行削除
  1. 25 21
      api/types.go
  2. 2 0
      llama/llama.go

+ 25 - 21
api/types.go

@@ -147,19 +147,21 @@ type Options struct {
 	UseNUMA bool `json:"numa,omitempty"`
 	UseNUMA bool `json:"numa,omitempty"`
 
 
 	// Model options
 	// Model options
-	NumCtx        int  `json:"num_ctx,omitempty"`
-	NumKeep       int  `json:"num_keep,omitempty"`
-	NumBatch      int  `json:"num_batch,omitempty"`
-	NumGQA        int  `json:"num_gqa,omitempty"`
-	NumGPU        int  `json:"num_gpu,omitempty"`
-	MainGPU       int  `json:"main_gpu,omitempty"`
-	LowVRAM       bool `json:"low_vram,omitempty"`
-	F16KV         bool `json:"f16_kv,omitempty"`
-	LogitsAll     bool `json:"logits_all,omitempty"`
-	VocabOnly     bool `json:"vocab_only,omitempty"`
-	UseMMap       bool `json:"use_mmap,omitempty"`
-	UseMLock      bool `json:"use_mlock,omitempty"`
-	EmbeddingOnly bool `json:"embedding_only,omitempty"`
+	NumCtx             int     `json:"num_ctx,omitempty"`
+	NumKeep            int     `json:"num_keep,omitempty"`
+	NumBatch           int     `json:"num_batch,omitempty"`
+	NumGQA             int     `json:"num_gqa,omitempty"`
+	NumGPU             int     `json:"num_gpu,omitempty"`
+	MainGPU            int     `json:"main_gpu,omitempty"`
+	LowVRAM            bool    `json:"low_vram,omitempty"`
+	F16KV              bool    `json:"f16_kv,omitempty"`
+	LogitsAll          bool    `json:"logits_all,omitempty"`
+	VocabOnly          bool    `json:"vocab_only,omitempty"`
+	UseMMap            bool    `json:"use_mmap,omitempty"`
+	UseMLock           bool    `json:"use_mlock,omitempty"`
+	EmbeddingOnly      bool    `json:"embedding_only,omitempty"`
+	RopeFrequencyBase  float32 `json:"rope_frequency_base,omitempty"`
+	RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
 
 
 	// Predict options
 	// Predict options
 	RepeatLastN      int      `json:"repeat_last_n,omitempty"`
 	RepeatLastN      int      `json:"repeat_last_n,omitempty"`
@@ -261,14 +263,16 @@ func DefaultOptions() Options {
 
 
 		UseNUMA: false,
 		UseNUMA: false,
 
 
-		NumCtx:   2048,
-		NumBatch: 512,
-		NumGPU:   1,
-		NumGQA:   1,
-		LowVRAM:  false,
-		F16KV:    true,
-		UseMMap:  true,
-		UseMLock: false,
+		NumCtx:             2048,
+		NumBatch:           512,
+		NumGPU:             1,
+		NumGQA:             1,
+		LowVRAM:            false,
+		F16KV:              true,
+		UseMMap:            true,
+		UseMLock:           false,
+		RopeFrequencyBase:  10000.0,
+		RopeFrequencyScale: 1.0,
 
 
 		RepeatLastN:      64,
 		RepeatLastN:      64,
 		RepeatPenalty:    1.1,
 		RepeatPenalty:    1.1,

+ 2 - 0
llama/llama.go

@@ -142,6 +142,8 @@ func New(model string, opts api.Options) (*LLM, error) {
 	params.use_mmap = C.bool(llm.UseMMap)
 	params.use_mmap = C.bool(llm.UseMMap)
 	params.use_mlock = C.bool(llm.UseMLock)
 	params.use_mlock = C.bool(llm.UseMLock)
 	params.embedding = C.bool(llm.EmbeddingOnly)
 	params.embedding = C.bool(llm.EmbeddingOnly)
+	params.rope_freq_base = C.float(llm.RopeFrequencyBase)
+	params.rope_freq_scale = C.float(llm.RopeFrequencyScale)
 	llm.params = &params
 	llm.params = &params
 
 
 	cModel := C.CString(model)
 	cModel := C.CString(model)