jmorganca 1 month ago
parent
commit
8025781dce

+ 12 - 5
convert/convert_mistral.go

@@ -63,10 +63,8 @@ func (p *mistralModel) KV(t *Tokenizer) ggml.KV {
 		kv["mistral.feed_forward_length"] = cmp.Or(p.IntermediateSize, p.NInner)
 	}
 
-	if headCount := cmp.Or(p.NumAttentionHeads, p.NHead); headCount > 0 {
-		kv["mistral.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
-		kv["mistral.rope.dimension_count"] = p.HiddenSize / headCount
-	}
+	kv["mistral.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
+	kv["mistral.rope.dimension_count"] = p.HiddenSize / cmp.Or(p.NLayers, p.NumHiddenLayers, p.NLayer)
 
 	if p.RopeTheta > 0 {
 		kv["mistral.rope.freq_base"] = p.RopeTheta
@@ -136,6 +134,13 @@ func (p *mistralModel) Tensors(ts []Tensor) []ggml.Tensor {
 			t.SetRepacker(p.repack)
 		}
 
+		if strings.HasPrefix(t.Name(), "patch_merger.") ||
+			strings.HasPrefix(t.Name(), "pre_mm_projector_output_norm.") ||
+			strings.HasPrefix(t.Name(), "vision_encoder.") ||
+			strings.HasPrefix(t.Name(), "vision_language_adapter.") {
+			continue
+		}
+
 		out = append(out, ggml.Tensor{
 			Name:     t.Name(),
 			Kind:     t.Kind(),
@@ -149,7 +154,7 @@ func (p *mistralModel) Tensors(ts []Tensor) []ggml.Tensor {
 
 func (p *mistralModel) Replacements() []string {
 	return []string{
-		"tok_embeddings.weight", "token_embd",
+		"tok_embeddings", "token_embd",
 		"norm", "output_norm",
 		"layers", "blk",
 		"attention_norm", "attn_norm",
@@ -160,6 +165,8 @@ func (p *mistralModel) Replacements() []string {
 		"feed_forward.w1", "ffn_gate",
 		"feed_forward.w2", "ffn_down",
 		"feed_forward.w3", "ffn_up",
+		"ffn_norm", "ffn_norm",
+		"output", "output",
 	}
 }
 

+ 4 - 1
model/models/mistral/model.go

@@ -37,7 +37,10 @@ func New(c ml.Config) (model.Model, error) {
 
 	m := Model{
 		BytePairEncoding: model.NewBytePairEncoding(
-			c.String("tokenizer.ggml.pretokenizer", `[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
+			// TODO: need to set this in the conversion for mistral:
+			// tokenizer.ggml.pretokenizer = [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+
+			c.String("tokenizer.ggml.pretokenizer", `(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
+			// c.String("tokenizer.ggml.pretokenizer", `[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`),
 			&model.Vocabulary{
 				Values: c.Strings("tokenizer.ggml.tokens"),
 				Types:  c.Uints("tokenizer.ggml.token_type"),

+ 1 - 0
model/models/models.go

@@ -4,5 +4,6 @@ import (
 	_ "github.com/ollama/ollama/model/models/gemma2"
 	_ "github.com/ollama/ollama/model/models/gemma3"
 	_ "github.com/ollama/ollama/model/models/llama"
+	_ "github.com/ollama/ollama/model/models/mistral"
 	_ "github.com/ollama/ollama/model/models/mllama"
 )

+ 4 - 0
model/process_text.go

@@ -263,6 +263,10 @@ func (bpe BytePairEncoding) Encode(s string, addSpecial bool) ([]int32, error) {
 					continue
 				}
 
+				if id := bpe.vocab.Encode(pair.value); id < 0 {
+					continue
+				}
+
 				merges[pair.a].runes = append(left.runes, right.runes...)
 				merges[pair.b].runes = nil
 

+ 4 - 0
runner/ollamarunner/runner.go

@@ -179,6 +179,10 @@ func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, *
 			return nil, nil, err
 		}
 
+		for _, t := range tokens {
+			decoded, _ := s.model.(model.TextProcessor).Decode([]int32{t})
+			fmt.Println("token", t, "decoded", decoded)
+		}
 		for _, t := range tokens {
 			inputs = append(inputs, input.Input{Token: t})
 		}