Michael Yang 9 months ago
parent
commit
df993fa37b

+ 23 - 23
convert/convert.go

@@ -40,13 +40,13 @@ func (Parameters) KV(t *Tokenizer) llm.KV {
 	return kv
 }
 
-func (Parameters) specialTypes() []string {
+func (Parameters) specialTokenTypes() []string {
 	return []string{
 		"bos", "eos", "unk", "sep", "pad", "cls", "mask",
 	}
 }
 
-func (Parameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []*llm.Tensor) error {
+func (Parameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
 	return llm.WriteGGUF(ws, kv, ts)
 }
 
@@ -54,24 +54,27 @@ type Converter interface {
 	// KV maps parameters to LLM key-values
 	KV(*Tokenizer) llm.KV
 	// Tensors maps input tensors to LLM tensors. Model specific modifications can be done here.
-	Tensors([]Tensor) []*llm.Tensor
+	Tensors([]Tensor) []llm.Tensor
 
 	// tensorName returns the LLM tensor name for a specific input name
 	tensorName(string) string
-	// specialTypes returns any special token types the model uses
-	specialTypes() []string
-	writeFile(io.WriteSeeker, llm.KV, []*llm.Tensor) error
+	// specialTokenTypes returns any special token types the model uses
+	specialTokenTypes() []string
+	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
 }
 
-func Convert(d string, ws io.WriteSeeker) error {
-	f, err := os.Open(filepath.Join(d, "config.json"))
+// Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
+// and files it finds in the input path.
+// Supported input model formats include safetensors.
+// Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
+func Convert(path string, ws io.WriteSeeker) error {
+	bts, err := os.ReadFile(filepath.Join(path, "config.json"))
 	if err != nil {
 		return err
 	}
-	defer f.Close()
 
 	var p Parameters
-	if err := json.NewDecoder(f).Decode(&p); err != nil {
+	if err := json.Unmarshal(bts, &p); err != nil {
 		return err
 	}
 
@@ -79,28 +82,23 @@ func Convert(d string, ws io.WriteSeeker) error {
 		return errors.New("unknown architecture")
 	}
 
-	var c Converter
+	var conv Converter
 	switch p.Architectures[0] {
 	case "LlamaForCausalLM", "MistralForCausalLM":
-		c = &llama{}
+		conv = &llama{}
 	case "MixtralForCausalLM":
-		c = &mixtral{}
+		conv = &mixtral{}
 	case "GemmaForCausalLM":
-		c = &gemma{}
+		conv = &gemma{}
 	default:
 		return errors.New("unsupported architecture")
 	}
 
-	bts, err := os.ReadFile(filepath.Join(d, "config.json"))
-	if err != nil {
-		return err
-	}
-
-	if err := json.Unmarshal(bts, c); err != nil {
+	if err := json.Unmarshal(bts, conv); err != nil {
 		return err
 	}
 
-	t, err := parseTokenizer(d, c.specialTypes())
+	t, err := parseTokenizer(path, conv.specialTokenTypes())
 	if err != nil {
 		return err
 	}
@@ -112,12 +110,14 @@ func Convert(d string, ws io.WriteSeeker) error {
 			t.Vocabulary.Scores = append(t.Vocabulary.Scores, -1)
 			t.Vocabulary.Types = append(t.Vocabulary.Types, tokenTypeUserDefined)
 		}
+	} else {
+		slog.Debug("vocabulary", "size", len(t.Vocabulary.Tokens))
 	}
 
-	ts, err := parseTensors(d)
+	ts, err := parseTensors(path)
 	if err != nil {
 		return err
 	}
 
-	return c.writeFile(ws, c.KV(t), c.Tensors(ts))
+	return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
 }

+ 3 - 3
convert/convert_gemma.go

@@ -43,15 +43,15 @@ func (p *gemma) KV(t *Tokenizer) llm.KV {
 	return kv
 }
 
-func (p *gemma) Tensors(ts []Tensor) []*llm.Tensor {
-	var out []*llm.Tensor
+func (p *gemma) Tensors(ts []Tensor) []llm.Tensor {
+	var out []llm.Tensor
 	for _, t := range ts {
 		name := p.tensorName(t.Name())
 		if strings.HasSuffix(name, "_norm.weight") {
 			t.SetRepacker(p.addOne)
 		}
 
-		out = append(out, &llm.Tensor{
+		out = append(out, llm.Tensor{
 			Name:     name,
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),

+ 3 - 3
convert/convert_llama.go

@@ -96,8 +96,8 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
 	return kv
 }
 
-func (p *llama) Tensors(ts []Tensor) []*llm.Tensor {
-	var out []*llm.Tensor
+func (p *llama) Tensors(ts []Tensor) []llm.Tensor {
+	var out []llm.Tensor
 	for _, t := range ts {
 		name := p.tensorName(t.Name())
 		if strings.HasSuffix(name, "attn_q.weight") ||
@@ -105,7 +105,7 @@ func (p *llama) Tensors(ts []Tensor) []*llm.Tensor {
 			t.SetRepacker(p.repack)
 		}
 
-		out = append(out, &llm.Tensor{
+		out = append(out, llm.Tensor{
 			Name:     name,
 			Kind:     t.Kind(),
 			Shape:    t.Shape(),

+ 3 - 3
convert/convert_mixtral.go

@@ -31,7 +31,7 @@ func (p *mixtral) KV(t *Tokenizer) llm.KV {
 	return kv
 }
 
-func (p *mixtral) Tensors(ts []Tensor) []*llm.Tensor {
+func (p *mixtral) Tensors(ts []Tensor) []llm.Tensor {
 	oldnew := []string{
 		"model.layers", "blk",
 		"w1", "ffn_gate_exps",
@@ -58,10 +58,10 @@ func (p *mixtral) Tensors(ts []Tensor) []*llm.Tensor {
 		return true
 	})
 
-	var out []*llm.Tensor
+	var out []llm.Tensor
 	for n, e := range experts {
 		// TODO(mxyng): sanity check experts
-		out = append(out, &llm.Tensor{
+		out = append(out, llm.Tensor{
 			Name:     n,
 			Kind:     e[0].Kind(),
 			Shape:    append([]uint64{uint64(len(e))}, e[0].Shape()...),

+ 7 - 2
convert/reader.go

@@ -29,6 +29,11 @@ func (t tensorBase) Shape() []uint64 {
 	return t.shape
 }
 
+const (
+	tensorKindF32 uint32 = iota
+	tensorKindF16
+)
+
 func (t tensorBase) Kind() uint32 {
 	if strings.HasSuffix(t.name, ".block_sparse_moe.gate.weight") {
 		return 0
@@ -38,9 +43,9 @@ func (t tensorBase) Kind() uint32 {
 	case 0:
 		panic("invalid tensor shape")
 	case 1:
-		return 0
+		return tensorKindF32
 	default:
-		return 1
+		return tensorKindF16
 	}
 }
 

+ 3 - 2
convert/reader_safetensors.go

@@ -66,6 +66,7 @@ func parseSafetensors(ps ...string) ([]Tensor, error) {
 	return ts, nil
 }
 
+// safetensorsPad returns the padded size of the safetensors file given a length n and offset s
 func safetensorsPad(n, s int64) int64 {
 	return 8 + n + s
 }
@@ -125,9 +126,9 @@ func (st safetensor) WriteTo(w io.Writer) (int64, error) {
 	}
 
 	switch st.Kind() {
-	case 0:
+	case tensorKindF32:
 		return 0, binary.Write(w, binary.LittleEndian, f32s)
-	case 1:
+	case tensorKindF16:
 		f16s := make([]uint16, len(f32s))
 		for i := range f32s {
 			f16s[i] = float16.Fromfloat32(f32s[i]).Bits()

+ 8 - 7
convert/tokenizer.go

@@ -32,7 +32,7 @@ type Tokenizer struct {
 	Template string
 }
 
-func parseTokenizer(d string, specialTypes []string) (*Tokenizer, error) {
+func parseTokenizer(d string, specialTokenTypes []string) (*Tokenizer, error) {
 	v, err := parseVocabulary(d)
 	if err != nil {
 		return nil, err
@@ -66,6 +66,8 @@ func parseTokenizer(d string, specialTypes []string) (*Tokenizer, error) {
 			switch pt.Type {
 			case "Split":
 				if pt.Pattern.Regex != "" {
+					// create a checksum of all Split pretokenizers which should be sufficient
+					// to identify the pretokenizer
 					sha256sum.Write([]byte(pt.Pattern.Regex))
 				}
 			}
@@ -102,7 +104,7 @@ func parseTokenizer(d string, specialTypes []string) (*Tokenizer, error) {
 			}
 		}
 
-		for _, st := range specialTypes {
+		for _, st := range specialTokenTypes {
 			sv := SpecialVocabulary{Type: st}
 			if bts, ok := p[fmt.Sprintf("add_%s_token", st)]; ok {
 				if err := json.Unmarshal(bts, &sv.AddToken); err != nil {
@@ -224,14 +226,13 @@ func parseVocabulary(d string) (*Vocabulary, error) {
 	}
 
 	for pattern, parseFn := range patterns {
-		matches, err := filepath.Glob(filepath.Join(d, pattern))
-		if err != nil {
+		if _, err := os.Stat(filepath.Join(d, pattern)); errors.Is(err, os.ErrNotExist) {
+			continue
+		} else if err != nil {
 			return nil, err
 		}
 
-		if len(matches) > 0 {
-			return parseFn(d)
-		}
+		return parseFn(d)
 	}
 
 	return nil, errors.New("unknown tensor format")

+ 6 - 11
llm/gguf.go

@@ -489,6 +489,7 @@ func readGGUFArray(llm *gguf, r io.Reader) (*array, error) {
 	return a, nil
 }
 
+// writeGGUFArray writes a slice s of type E to the write with a gguf type of t
 func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
 	if err := binary.Write(w, binary.LittleEndian, ggufTypeArray); err != nil {
 		return err
@@ -502,16 +503,10 @@ func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
 		return err
 	}
 
-	for _, e := range s {
-		if err := binary.Write(w, binary.LittleEndian, e); err != nil {
-			return err
-		}
-	}
-
-	return nil
+	return binary.Write(w, binary.LittleEndian, s)
 }
 
-func WriteGGUF(ws io.WriteSeeker, kv KV, ts []*Tensor) error {
+func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
 	if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil {
 		return err
 	}
@@ -537,7 +532,7 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []*Tensor) error {
 		}
 	}
 
-	slices.SortFunc(ts, func(a, b *Tensor) int {
+	slices.SortFunc(ts, func(a, b Tensor) int {
 		var i, j int
 		if n, err := fmt.Sscanf(a.Name, "blk.%d", &i); err != nil || n != 1 {
 			return cmp.Compare(a.Name, b.Name)
@@ -622,7 +617,7 @@ func ggufWriteKV(ws io.WriteSeeker, k string, v any) error {
 	return err
 }
 
-func ggufWriteTensorInfo(ws io.WriteSeeker, t *Tensor) error {
+func ggufWriteTensorInfo(ws io.WriteSeeker, t Tensor) error {
 	slog.Debug(t.Name, "kind", t.Kind, "shape", t.Shape, "offset", t.Offset)
 	if err := binary.Write(ws, binary.LittleEndian, uint64(len(t.Name))); err != nil {
 		return err
@@ -649,7 +644,7 @@ func ggufWriteTensorInfo(ws io.WriteSeeker, t *Tensor) error {
 	return binary.Write(ws, binary.LittleEndian, t.Offset)
 }
 
-func ggufWriteTensor(ws io.WriteSeeker, t *Tensor, alignment int64) error {
+func ggufWriteTensor(ws io.WriteSeeker, t Tensor, alignment int64) error {
 	offset, err := ws.Seek(0, io.SeekCurrent)
 	if err != nil {
 		return err

+ 1 - 1
llm/memory_test.go

@@ -21,7 +21,7 @@ func TestEstimateGPULayers(t *testing.T) {
 	defer f.Close()
 	inputLayerCount := 5
 
-	tensors := []*Tensor{
+	tensors := []Tensor{
 		{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
 		{Name: "blk.1.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
 		{Name: "blk.2.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},

+ 1 - 1
server/routes_create_test.go

@@ -19,7 +19,7 @@ import (
 
 var stream bool = false
 
-func createBinFile(t *testing.T, kv map[string]any, ti []*llm.Tensor) string {
+func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string {
 	t.Helper()
 
 	f, err := os.CreateTemp(t.TempDir(), "")

+ 4 - 4
server/routes_generate_test.go

@@ -101,7 +101,7 @@ func TestGenerateChat(t *testing.T) {
 			"tokenizer.ggml.tokens":         []string{""},
 			"tokenizer.ggml.scores":         []float32{0},
 			"tokenizer.ggml.token_type":     []int32{0},
-		}, []*llm.Tensor{
+		}, []llm.Tensor{
 			{Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
 			{Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
 			{Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
@@ -149,7 +149,7 @@ func TestGenerateChat(t *testing.T) {
 			Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
 				"general.architecture": "bert",
 				"bert.pooling_type":    uint32(0),
-			}, []*llm.Tensor{})),
+			}, []llm.Tensor{})),
 			Stream: &stream,
 		})
 
@@ -399,7 +399,7 @@ func TestGenerate(t *testing.T) {
 			"tokenizer.ggml.tokens":         []string{""},
 			"tokenizer.ggml.scores":         []float32{0},
 			"tokenizer.ggml.token_type":     []int32{0},
-		}, []*llm.Tensor{
+		}, []llm.Tensor{
 			{Name: "token_embd.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
 			{Name: "blk.0.attn_norm.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
 			{Name: "blk.0.ffn_down.weight", Shape: []uint64{1}, WriterTo: bytes.NewReader(make([]byte, 4))},
@@ -447,7 +447,7 @@ func TestGenerate(t *testing.T) {
 			Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
 				"general.architecture": "bert",
 				"bert.pooling_type":    uint32(0),
-			}, []*llm.Tensor{})),
+			}, []llm.Tensor{})),
 			Stream: &stream,
 		})
 

+ 1 - 1
server/sched_test.go

@@ -124,7 +124,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
 		"tokenizer.ggml.tokens":         []string{" "},
 		"tokenizer.ggml.scores":         []float32{0},
 		"tokenizer.ggml.token_type":     []int32{0},
-	}, []*llm.Tensor{
+	}, []llm.Tensor{
 		{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
 		{Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
 	}))