1 year ago · be517e491c
--- a/api/types.go
+++ b/api/types.go
@@ -121,8 +121,6 @@ type Runner struct {
 
				 	VocabOnly          bool    `json:"vocab_only,omitempty"`
			
 
				 	UseMMap            bool    `json:"use_mmap,omitempty"`
			
 
				 	UseMLock           bool    `json:"use_mlock,omitempty"`
			
 
				-	RopeFrequencyBase  float32 `json:"rope_frequency_base,omitempty"`
			
 
				-	RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
			
 
				 	NumThread          int     `json:"num_thread,omitempty"`
			
 
				 }
			
 
				 
			
@@ -383,8 +381,6 @@ func DefaultOptions() Options {
 
				 		Runner: Runner{
			
 
				 			// options set when the model is loaded
			
 
				 			NumCtx:             2048,
			
 
				-			RopeFrequencyBase:  10000.0,
			
 
				-			RopeFrequencyScale: 1.0,
			
 
				 			NumBatch:           512,
			
 
				 			NumGPU:             -1, // -1 here indicates that NumGPU should be set dynamically
			
 
				 			NumGQA:             1,
			
--- a/convert/convert.go
+++ b/convert/convert.go
@@ -32,7 +32,6 @@ type Params struct {
 
				 	AttentionHeads   int      `json:"num_attention_heads"` // n_head
			
 
				 	KeyValHeads      int      `json:"num_key_value_heads"`
			
 
				 	NormEPS          float64  `json:"rms_norm_eps"`
			
 
				-	RopeFreqBase     float64  `json:"rope_theta"`
			
 
				 	BoSTokenID       int      `json:"bos_token_id"`
			
 
				 	EoSTokenID       int      `json:"eos_token_id"`
			
 
				 	HeadDimension    int      `json:"head_dim"`
			
--- a/convert/mistral.go
+++ b/convert/mistral.go
@@ -144,7 +144,6 @@ func (m *MistralModel) WriteGGUF() (string, error) {
 
				 		"llama.attention.head_count":             uint32(m.Params.AttentionHeads),
			
 
				 		"llama.attention.head_count_kv":          uint32(m.Params.KeyValHeads),
			
 
				 		"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
			
 
				-		"llama.rope.freq_base":                   float32(m.Params.RopeFreqBase),
			
 
				 		"general.file_type":                      uint32(1),
			
 
				 		"tokenizer.ggml.model":                   "llama",
			
 
				 
			
--- a/llm/server.go
+++ b/llm/server.go
@@ -172,14 +172,6 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
 
				 		params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU))
			
 
				 	}
			
 
				 
			
 
				-	if opts.RopeFrequencyBase > 0 {
			
 
				-		params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase))
			
 
				-	}
			
 
				-
			
 
				-	if opts.RopeFrequencyScale > 0 {
			
 
				-		params = append(params, "--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale))
			
 
				-	}
			
 
				-
			
 
				 	if len(adapters) > 0 {
			
 
				 		// TODO: applying multiple adapters is not supported by the llama.cpp server yet
			
 
				 		params = append(params, "--lora", adapters[0])