Josh Yan 9 months ago
parent
commit
2fdebffc8d
3 changed files with 106 additions and 2 deletions
  1. 21 0
      llm/ggml.go
  2. 65 1
      llm/gguf.go
  3. 20 1
      server/model.go

+ 21 - 0
llm/ggml.go

@@ -1,6 +1,7 @@
 package llm
 
 import (
+	"cmp"
 	"encoding/binary"
 	"errors"
 	"fmt"
@@ -114,6 +115,26 @@ func (kv KV) ChatTemplate() string {
 
 type Tensors []*Tensor
 
+func (ts Tensors) Less(i, j int) bool {
+	var x, y int
+	if n, err := fmt.Sscanf(ts[i].Name, "blk.%d", &x); err != nil || n != 1 {
+		return cmp.Less(ts[i].Name, ts[j].Name)
+	} else if n, err := fmt.Sscanf(ts[j].Name, "blk.%d", &y); err != nil || n != 1 {
+		return cmp.Less(ts[i].Name, ts[j].Name)
+	}
+
+	return cmp.Less(x, y)
+}
+
+func (ts Tensors) Len() int {
+	return len(ts)
+}
+
+func (ts Tensors) Swap(i, j int) {
+	var temp Tensor
+	
+}
+
 func (ts Tensors) Layers() map[string]Layer {
 	layers := make(map[string]Layer)
 	for _, t := range ts {

+ 65 - 1
llm/gguf.go

@@ -2,11 +2,15 @@ package llm
 
 import (
 	"bytes"
+	"cmp"
 	"encoding/binary"
 	"encoding/json"
 	"fmt"
 	"io"
+	"slices"
 	"strings"
+
+	"golang.org/x/exp/maps"
 )
 
 type containerGGUF struct {
@@ -711,5 +715,65 @@ func (GGUFWriter) Read([]byte) (int, error) {
 }
 
 func (gguf GGUFWriter) WriteTo(w io.Writer) (int64, error) {
-	
+	if err := binary.Write(w, binary.LittleEndian, []byte("GGUF")); err != nil {
+		return 0, err
+	}
+
+	if err := binary.Write(w, binary.LittleEndian, uint32(3)); err != nil {
+		return 0, err
+	}
+
+	if err := binary.Write(w, binary.LittleEndian, uint64(len(gguf.T))); err != nil {
+		return 0, err
+	}
+
+	if err := binary.Write(w, binary.LittleEndian, uint64(len(gguf.KV))); err != nil {
+		return 0, err
+	}
+
+	keys := maps.Keys(gguf.KV)
+	slices.Sort(keys)
+
+	for _, key := range keys {
+		if err := ggufWriteKV(w, key, gguf.KV[key]); err != nil {
+			return err
+		}
+	}
+
+	slices.SortFunc(gguf.T, func(a, b *Tensor) int {
+		var i, j int
+		if n, err := fmt.Sscanf(a.Name, "blk.%d", &i); err != nil || n != 1 {
+			return cmp.Compare(a.Name, b.Name)
+		} else if n, err := fmt.Sscanf(b.Name, "blk.%d", &j); err != nil || n != 1 {
+			return cmp.Compare(a.Name, b.Name)
+		}
+
+		return cmp.Compare(i, j)
+	})
+
+	var s uint64
+	for _, t := range gguf.T {
+		t.Offset = s
+		if err := ggufWriteTensorInfo(w, t); err != nil {
+			return 0, err
+		}
+		s += t.Size()
+	}
+
+	var alignment int64 = 32
+	for _, t := range gguf.T {
+		if err := ggufWriteTensor(w, t, alignment); err != nil {
+			return 0, err
+		}
+	}
+
+	return 0, nil
+}
+
+func ggufWriteTensor(io.Writer, *Tensor, int64) error {
+
+}
+
+func ggufWriteTensorInfo(io.Writer, *Tensor) error {
+
 }

+ 20 - 1
server/model.go

@@ -3,6 +3,7 @@ package server
 import (
 	"archive/zip"
 	"bytes"
+	"cmp"
 	"context"
 	"errors"
 	"fmt"
@@ -11,6 +12,7 @@ import (
 	"net/http"
 	"os"
 	"path/filepath"
+	"slices"
 
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/convert"
@@ -241,7 +243,24 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
 			mediatype = "application/vnd.ollama.image.projector"
 		}
 
-		layer, err := NewLayer(io.NewSectionReader(file, offset, n), mediatype)
+		var reader io.Reader = io.NewSectionReader(file, offset, n)
+		if !slices.IsSortedFunc(ggml.Tensors(), func(a, b *llm.Tensor) int {
+			var i, j int
+			if n, err := fmt.Sscanf(a.Name, "blk.%d", &i); err != nil || n != 1 {
+				return cmp.Compare(a.Name, b.Name)
+			} else if n, err := fmt.Sscanf(b.Name, "blk.%d", &j); err != nil || n != 1 {
+				return cmp.Compare(a.Name, b.Name)
+			}
+
+			return cmp.Compare(i, j)
+		}) {
+			reader = &llm.GGUFWriter{
+				KV: ggml.KV(),
+				T:  ggml.Tensors(),
+			}
+		}
+
+		layer, err := NewLayer(reader, mediatype)
 		if err != nil {
 			return nil, err
 		}