Josh Yan 9 months ago
parent
commit
873f334783
5 changed files with 65 additions and 34 deletions
  1. 5 1
      llm/ggla.go
  2. 13 8
      llm/ggml.go
  3. 23 10
      llm/gguf.go
  4. 16 11
      llm/gguf_test.go
  5. 8 4
      server/model.go

+ 5 - 1
llm/ggla.go

@@ -36,6 +36,7 @@ type ggla struct {
 
 
 	kv      KV
 	kv      KV
 	tensors []*Tensor
 	tensors []*Tensor
+	offset  int64
 }
 }
 
 
 func newGGLA(container *containerGGLA) *ggla {
 func newGGLA(container *containerGGLA) *ggla {
@@ -50,7 +51,10 @@ func (llm *ggla) KV() KV {
 }
 }
 
 
 func (llm *ggla) Tensors() Tensors {
 func (llm *ggla) Tensors() Tensors {
-	return llm.tensors
+	return Tensors{
+		Items:  llm.tensors,
+		Offset: llm.offset,
+	}
 }
 }
 
 
 func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) {
 func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) {

+ 13 - 8
llm/ggml.go

@@ -113,32 +113,37 @@ func (kv KV) ChatTemplate() string {
 }
 }
 
 
 // Tensors type as a slice of pointers to Tensor
 // Tensors type as a slice of pointers to Tensor
-type Tensors []*Tensor
+// type Tensors []*Tensor
+
+type Tensors struct {
+	Items  []*Tensor
+	Offset int64
+}
 
 
 // Implement the Len method
 // Implement the Len method
 func (ts Tensors) Len() int {
 func (ts Tensors) Len() int {
-	return len(ts)
+	return len(ts.Items)
 }
 }
 
 
 // Implement the Swap method
 // Implement the Swap method
 func (ts Tensors) Swap(i, j int) {
 func (ts Tensors) Swap(i, j int) {
-	ts[i], ts[j] = ts[j], ts[i]
+	ts.Items[i], ts.Items[j] = ts.Items[j], ts.Items[i]
 }
 }
 
 
 // Implement the Less method
 // Implement the Less method
 func (ts Tensors) Less(i, j int) bool {
 func (ts Tensors) Less(i, j int) bool {
 	var x, y int
 	var x, y int
-	if n, err := fmt.Sscanf(ts[i].Name, "blk.%d", &x); err != nil || n != 1 {
-		return ts[i].Name < ts[j].Name
-	} else if n, err := fmt.Sscanf(ts[j].Name, "blk.%d", &y); err != nil || n != 1 {
-		return ts[i].Name < ts[j].Name
+	if n, err := fmt.Sscanf(ts.Items[i].Name, "blk.%d", &x); err != nil || n != 1 {
+		return ts.Items[i].Name < ts.Items[j].Name
+	} else if n, err := fmt.Sscanf(ts.Items[j].Name, "blk.%d", &y); err != nil || n != 1 {
+		return ts.Items[i].Name < ts.Items[j].Name
 	}
 	}
 	return x < y
 	return x < y
 }
 }
 
 
 func (ts Tensors) Layers() map[string]Layer {
 func (ts Tensors) Layers() map[string]Layer {
 	layers := make(map[string]Layer)
 	layers := make(map[string]Layer)
-	for _, t := range ts {
+	for _, t := range ts.Items {
 		parts := strings.Split(t.Name, ".")
 		parts := strings.Split(t.Name, ".")
 		if parts[0] == "blk" {
 		if parts[0] == "blk" {
 			// join first and second part, e.g. blk.%d
 			// join first and second part, e.g. blk.%d

+ 23 - 10
llm/gguf.go

@@ -8,6 +8,7 @@ import (
 	"io"
 	"io"
 	"log/slog"
 	"log/slog"
 	"slices"
 	"slices"
+	"sort"
 	"strings"
 	"strings"
 
 
 	"golang.org/x/exp/maps"
 	"golang.org/x/exp/maps"
@@ -91,6 +92,7 @@ type gguf struct {
 
 
 	kv      KV
 	kv      KV
 	tensors []*Tensor
 	tensors []*Tensor
+	offset  int64
 
 
 	parameters uint64
 	parameters uint64
 
 
@@ -113,7 +115,10 @@ func (llm *gguf) KV() KV {
 }
 }
 
 
 func (llm *gguf) Tensors() Tensors {
 func (llm *gguf) Tensors() Tensors {
-	return llm.tensors
+	return Tensors{
+		Items:  llm.tensors,
+		Offset: llm.offset,
+	}
 }
 }
 
 
 func (llm *gguf) numTensor() uint64 {
 func (llm *gguf) numTensor() uint64 {
@@ -242,6 +247,15 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
 		alignment = 32
 		alignment = 32
 	}
 	}
 
 
+	offset, err := rs.Seek(0, io.SeekCurrent)
+	if err != nil {
+		return fmt.Errorf("failed to get current offset: %w", err)
+	}
+
+	// ADD PADDING
+
+	llm.offset = offset + llm.padding(offset, int64(alignment))
+
 	for _, tensor := range llm.tensors {
 	for _, tensor := range llm.tensors {
 		offset, err := rs.Seek(0, io.SeekCurrent)
 		offset, err := rs.Seek(0, io.SeekCurrent)
 		if err != nil {
 		if err != nil {
@@ -703,7 +717,7 @@ func (gguf) padding(offset, align int64) int64 {
 	return (align - offset%align) % align
 	return (align - offset%align) % align
 }
 }
 
 
-// Reader and WriterTo
+// Reader and WriterTof
 type GGUFWriter struct {
 type GGUFWriter struct {
 	KV
 	KV
 	Tensors
 	Tensors
@@ -739,7 +753,7 @@ func (gguf GGUFWriter) WriteTo(w io.Writer) (int64, error) {
 		return 0, err
 		return 0, err
 	}
 	}
 
 
-	if err := binary.Write(wo, binary.LittleEndian, uint64(len(gguf.Tensors))); err != nil {
+	if err := binary.Write(wo, binary.LittleEndian, uint64(len(gguf.Tensors.Items))); err != nil {
 		return 0, err
 		return 0, err
 	}
 	}
 
 
@@ -761,10 +775,10 @@ func (gguf GGUFWriter) WriteTo(w io.Writer) (int64, error) {
 			}
 			}
 		}
 		}
 	}
 	}
-	//sort.Sort(gguf.Tensors)
+	sort.Sort(gguf.Tensors)
 
 
 	var s uint64
 	var s uint64
-	for _, t := range gguf.Tensors {
+	for _, t := range gguf.Tensors.Items {
 		t.Offset = s
 		t.Offset = s
 		if err := ggufWriteTensorInfo(wo, t); err != nil {
 		if err := ggufWriteTensorInfo(wo, t); err != nil {
 			return 0, err
 			return 0, err
@@ -773,7 +787,7 @@ func (gguf GGUFWriter) WriteTo(w io.Writer) (int64, error) {
 	}
 	}
 	tensorOffset := wo.offset
 	tensorOffset := wo.offset
 
 
-	for _, t := range gguf.Tensors {
+	for _, t := range gguf.Tensors.Items {
 		if err := ggufWriteTensor(wo, t, wo.offset); err != nil {
 		if err := ggufWriteTensor(wo, t, wo.offset); err != nil {
 			return 0, err
 			return 0, err
 		}
 		}
@@ -810,10 +824,9 @@ func ggufWriteTensorInfo(ws io.Writer, t *Tensor) error {
 
 
 func ggufWriteTensor(ws io.Writer, t *Tensor, offset int) error {
 func ggufWriteTensor(ws io.Writer, t *Tensor, offset int) error {
 	slog.Debug(t.Name, "kind", t.Kind, "shape", t.Shape, "offset", t.Offset)
 	slog.Debug(t.Name, "kind", t.Kind, "shape", t.Shape, "offset", t.Offset)
-	fmt.Println(int(ggufPadding(int64(offset), 32)))
-	/* if err := binary.Write(ws, binary.LittleEndian, bytes.Repeat([]byte{0}, int(ggufPadding(int64(offset), 32)))); err != nil {
+	if err := binary.Write(ws, binary.LittleEndian, bytes.Repeat([]byte{0}, int(ggufPadding(int64(offset), 32)))); err != nil {
 		return err
 		return err
-	} */
+	}
 
 
 	_, err := t.WriteTo(ws)
 	_, err := t.WriteTo(ws)
 	return err
 	return err
@@ -906,5 +919,5 @@ func ggufWriteKV(ws io.Writer, k string, v any) error {
 }
 }
 
 
 func ggufPadding(offset, align int64) int64 {
 func ggufPadding(offset, align int64) int64 {
-	return align - offset%align
+	return (align - offset%align) % align
 }
 }

+ 16 - 11
llm/gguf_test.go

@@ -16,6 +16,7 @@ import (
 func TestGGUFRewrite(t *testing.T) {
 func TestGGUFRewrite(t *testing.T) {
 	tests := []string{
 	tests := []string{
 		"phi3.gguf",
 		"phi3.gguf",
+		"nutiny.gguf",
 	}
 	}
 
 
 	for i := range tests {
 	for i := range tests {
@@ -112,13 +113,13 @@ func compareGGML(n int64, ggml1, ggml2 *GGML, f *os.File, f2 *os.File) (map[stri
 	t1 := ggml1.Tensors()
 	t1 := ggml1.Tensors()
 	t2 := ggml2.Tensors()
 	t2 := ggml2.Tensors()
 
 
-	if len(t1) != len(t2) {
-		diff["lenTensors"] = fmt.Sprintf("t1: %d, t2: %d", len(t1), len(t2))
+	if len(t1.Items) != len(t2.Items) {
+		diff["lenTensors"] = fmt.Sprintf("t1: %d, t2: %d", len(t1.Items), len(t2.Items))
 	}
 	}
 
 
-	for _, tensor := range t1 {
+	for _, tensor := range t1.Items {
 		sha256sum := sha256.New()
 		sha256sum := sha256.New()
-		sr := io.NewSectionReader(f, n+int64(tensor.Offset), int64(tensor.Size()))
+		sr := io.NewSectionReader(f, t1.Offset+int64(tensor.Offset), int64(tensor.Size()))
 		var s int64
 		var s int64
 		s, err := io.Copy(sha256sum, sr)
 		s, err := io.Copy(sha256sum, sr)
 		if err != nil {
 		if err != nil {
@@ -147,10 +148,10 @@ func compareGGML(n int64, ggml1, ggml2 *GGML, f *os.File, f2 *os.File) (map[stri
 	diff["sha"] = fmt.Sprintf("%d", s1)
 	diff["sha"] = fmt.Sprintf("%d", s1)
 	diff2["sha"] = fmt.Sprintf("%d", s2) */
 	diff2["sha"] = fmt.Sprintf("%d", s2) */
 
 
-	for _, tensor := range t2 {
+	for _, tensor := range t2.Items {
 		sha256sum := sha256.New()
 		sha256sum := sha256.New()
 		var s int64
 		var s int64
-		sr := io.NewSectionReader(f2, n+int64(tensor.Offset), int64(tensor.Size()))
+		sr := io.NewSectionReader(f2, t1.Offset+int64(tensor.Offset), int64(tensor.Size()))
 		s, err := io.Copy(sha256sum, sr)
 		s, err := io.Copy(sha256sum, sr)
 		if err != nil {
 		if err != nil {
 			fmt.Println(err)
 			fmt.Println(err)
@@ -173,23 +174,24 @@ func decodeGGML(t *testing.T, f *os.File) (*GGML, int64, error) {
 }
 }
 
 
 func rewriteGGML(t *testing.T, ggml *GGML, temp *os.File, f *os.File) (int64, *GGML, error) {
 func rewriteGGML(t *testing.T, ggml *GGML, temp *os.File, f *os.File) (int64, *GGML, error) {
-	var tensors Tensors
+	var tensors []*Tensor
 
 
 	fmt.Println("11111111111111111111111111111111111111111")
 	fmt.Println("11111111111111111111111111111111111111111")
-	for _, tensor := range ggml.Tensors() {
+	for _, tensor := range ggml.Tensors().Items {
 		shape := make([]uint64, len(tensor.Shape))
 		shape := make([]uint64, len(tensor.Shape))
 		for i := range len(tensor.Shape) {
 		for i := range len(tensor.Shape) {
 			shape[i] = tensor.Shape[len(tensor.Shape)-i-1]
 			shape[i] = tensor.Shape[len(tensor.Shape)-i-1]
 		}
 		}
 
 
-		fmt.Println("tensors", tensor.Name, shape, tensor.Kind, 737414+int64(tensor.Offset))
+		fmt.Println("tensors", tensor.Name, shape, tensor.Kind, tensor.Offset)
+		fmt.Println(ggml.Tensors().Offset)
 		tensors = append(tensors, &Tensor{
 		tensors = append(tensors, &Tensor{
 			Name:  tensor.Name,
 			Name:  tensor.Name,
 			Kind:  tensor.Kind,
 			Kind:  tensor.Kind,
 			Shape: shape,
 			Shape: shape,
 
 
 			WriterTo: TensorWriter{
 			WriterTo: TensorWriter{
-				Reader: io.NewSectionReader(f, 737414+int64(tensor.Offset), int64(tensor.Size())),
+				Reader: io.NewSectionReader(f, ggml.Tensors().Offset+int64(tensor.Offset), int64(tensor.Size())),
 			},
 			},
 		})
 		})
 	}
 	}
@@ -197,7 +199,10 @@ func rewriteGGML(t *testing.T, ggml *GGML, temp *os.File, f *os.File) (int64, *G
 	reader := &GGUFWriter{
 	reader := &GGUFWriter{
 		KV: ggml.KV(),
 		KV: ggml.KV(),
 		// Update .Tensors
 		// Update .Tensors
-		Tensors: tensors,
+		Tensors: Tensors{
+			Items:  tensors,
+			Offset: ggml.Tensors().Offset,
+		},
 	}
 	}
 
 
 	n, err := io.Copy(temp, reader)
 	n, err := io.Copy(temp, reader)

+ 8 - 4
server/model.go

@@ -245,9 +245,10 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
 		var reader io.Reader = io.NewSectionReader(file, offset, n)
 		var reader io.Reader = io.NewSectionReader(file, offset, n)
 		if !sort.IsSorted(ggml.Tensors()) {
 		if !sort.IsSorted(ggml.Tensors()) {
 			// create a new Tensors containing Tensors that have a writeTo
 			// create a new Tensors containing Tensors that have a writeTo
-			var tensors llm.Tensors
+			var tensors []*llm.Tensor
+			ggmlTensors := ggml.Tensors()
 
 
-			for _, tensor := range ggml.Tensors() {
+			for _, tensor := range ggmlTensors.Items {
 				shape := make([]uint64, len(tensor.Shape))
 				shape := make([]uint64, len(tensor.Shape))
 				for i := range len(tensor.Shape) {
 				for i := range len(tensor.Shape) {
 					shape[i] = tensor.Shape[len(tensor.Shape)-i-1]
 					shape[i] = tensor.Shape[len(tensor.Shape)-i-1]
@@ -260,7 +261,7 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
 
 
 					WriterTo: &llm.TensorWriter{
 					WriterTo: &llm.TensorWriter{
 						// This needs offset + tensors.Offset int64(tensor.Offset) to be correct
 						// This needs offset + tensors.Offset int64(tensor.Offset) to be correct
-						Reader: io.NewSectionReader(file, offset + int64(tensor.Offset), int64(tensor.Size())),
+						Reader: io.NewSectionReader(file, offset+ggmlTensors.Offset+int64(tensor.Offset), int64(tensor.Size())),
 					},
 					},
 				})
 				})
 			}
 			}
@@ -268,7 +269,10 @@ func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(ap
 			reader = &llm.GGUFWriter{
 			reader = &llm.GGUFWriter{
 				KV: ggml.KV(),
 				KV: ggml.KV(),
 				// Update .Tensors
 				// Update .Tensors
-				Tensors: tensors,
+				Tensors: llm.Tensors{
+					Items:  tensors,
+					Offset: ggml.Tensors().Offset,
+				},
 			}
 			}
 		}
 		}