Explorar o código

split model layer into metadata and data layers

Michael Yang hai 1 ano
pai
achega
41ae232e10
Modificáronse 5 ficheiros con 113 adicións e 12 borrados
  1. 12 0
      llm/ggla.go
  2. 1 0
      llm/ggml.go
  3. 9 2
      llm/gguf.go
  4. 84 6
      server/images.go
  5. 7 4
      server/layer.go

+ 12 - 0
llm/ggla.go

@@ -33,6 +33,7 @@ func (c *containerGGLA) Decode(rs io.ReadSeeker) (model, error) {
 
 
 type ggla struct {
 type ggla struct {
 	*containerGGLA
 	*containerGGLA
+	offset int64
 
 
 	kv      KV
 	kv      KV
 	tensors []*Tensor
 	tensors []*Tensor
@@ -53,6 +54,10 @@ func (llm *ggla) Tensors() Tensors {
 	return llm.tensors
 	return llm.tensors
 }
 }
 
 
+func (llm *ggla) Offset() int64 {
+	return llm.offset
+}
+
 func (llm *ggla) decode(rs io.ReadSeeker) error {
 func (llm *ggla) decode(rs io.ReadSeeker) error {
 	var r uint32
 	var r uint32
 	if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
 	if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
@@ -66,6 +71,13 @@ func (llm *ggla) decode(rs io.ReadSeeker) error {
 	}
 	}
 	llm.kv["alpha"] = alpha
 	llm.kv["alpha"] = alpha
 
 
+	offset, err := rs.Seek(0, io.SeekCurrent)
+	if err != nil {
+		return err
+	}
+
+	llm.offset = offset
+
 	for {
 	for {
 		var dims uint32
 		var dims uint32
 		if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil {
 		if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil {

+ 1 - 0
llm/ggml.go

@@ -16,6 +16,7 @@ type GGML struct {
 type model interface {
 type model interface {
 	KV() KV
 	KV() KV
 	Tensors() Tensors
 	Tensors() Tensors
+	Offset() int64
 }
 }
 
 
 type KV map[string]any
 type KV map[string]any

+ 9 - 2
llm/gguf.go

@@ -55,7 +55,7 @@ func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) {
 
 
 	model := newGGUF(c)
 	model := newGGUF(c)
 	slog.Debug(fmt.Sprintf("model = %#v", model))
 	slog.Debug(fmt.Sprintf("model = %#v", model))
-	if err := model.Decode(rs); err != nil {
+	if err := model.decode(rs); err != nil {
 		return nil, err
 		return nil, err
 	}
 	}
 
 
@@ -90,6 +90,7 @@ const (
 
 
 type gguf struct {
 type gguf struct {
 	*containerGGUF
 	*containerGGUF
+	offset int64
 
 
 	kv      KV
 	kv      KV
 	tensors []*Tensor
 	tensors []*Tensor
@@ -116,6 +117,10 @@ func (llm *gguf) Tensors() Tensors {
 	return llm.tensors
 	return llm.tensors
 }
 }
 
 
+func (llm *gguf) Offset() int64 {
+	return llm.offset
+}
+
 func (llm *gguf) numTensor() uint64 {
 func (llm *gguf) numTensor() uint64 {
 	switch llm.Version {
 	switch llm.Version {
 	case 1:
 	case 1:
@@ -138,7 +143,7 @@ func (llm *gguf) numKV() uint64 {
 	}
 	}
 }
 }
 
 
-func (llm *gguf) Decode(rs io.ReadSeeker) error {
+func (llm *gguf) decode(rs io.ReadSeeker) error {
 	// decode key-values
 	// decode key-values
 	for i := 0; uint64(i) < llm.numKV(); i++ {
 	for i := 0; uint64(i) < llm.numKV(); i++ {
 		k, err := readGGUFString(llm, rs)
 		k, err := readGGUFString(llm, rs)
@@ -250,6 +255,8 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
 		return err
 		return err
 	}
 	}
 
 
+	llm.offset = offset + padding
+
 	for _, tensor := range llm.tensors {
 	for _, tensor := range llm.tensors {
 		if _, err := rs.Seek(int64(tensor.size()), io.SeekCurrent); err != nil {
 		if _, err := rs.Seek(int64(tensor.size()), io.SeekCurrent); err != nil {
 			return err
 			return err

+ 84 - 6
server/images.go

@@ -208,6 +208,14 @@ func GetModel(name string) (*Model, error) {
 
 
 		switch layer.MediaType {
 		switch layer.MediaType {
 		case "application/vnd.ollama.image.model":
 		case "application/vnd.ollama.image.model":
+			model.ModelPath = filename
+			model.ParentModel = layer.From
+		case "application/vnd.ollama.image.model+metadata", "application/vnd.ollama.image.model+data":
+			filename, err = GetBlobsPath(layer.MergeBase)
+			if err != nil {
+				return nil, err
+			}
+
 			model.ModelPath = filename
 			model.ModelPath = filename
 			model.ParentModel = layer.From
 			model.ParentModel = layer.From
 		case "application/vnd.ollama.image.embed":
 		case "application/vnd.ollama.image.embed":
@@ -349,10 +357,7 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
 			}
 			}
 
 
 			for _, baseLayer := range baseLayers {
 			for _, baseLayer := range baseLayers {
-				if quantization != "" &&
-					baseLayer.MediaType == "application/vnd.ollama.image.model" &&
-					baseLayer.GGML != nil &&
-					baseLayer.GGML.Name() == "gguf" {
+				if quantization != "" && baseLayer.MediaType == "application/vnd.ollama.image.model" {
 					ftype, err := llm.ParseFileType(quantization)
 					ftype, err := llm.ParseFileType(quantization)
 					if err != nil {
 					if err != nil {
 						return err
 						return err
@@ -393,6 +398,36 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
 					config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(baseLayer.GGML.KV().ParameterCount()))
 					config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(baseLayer.GGML.KV().ParameterCount()))
 					config.FileType = cmp.Or(config.FileType, baseLayer.GGML.KV().FileType())
 					config.FileType = cmp.Or(config.FileType, baseLayer.GGML.KV().FileType())
 					config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture())
 					config.ModelFamilies = append(config.ModelFamilies, baseLayer.GGML.KV().Architecture())
+
+					f, err := baseLayer.Layer.Open()
+					if err != nil {
+						return err
+					}
+					defer f.Close()
+
+					metadata := io.NewSectionReader(f, 0, baseLayer.GGML.Offset())
+					metadataLayer, err := NewLayer(metadata, "application/vnd.ollama.image.model+metadata")
+					if err != nil {
+						return err
+					}
+					metadataLayer.MergeBase = baseLayer.Digest
+
+					layers = append(layers, metadataLayer)
+
+					stat, err := f.Stat()
+					if err != nil {
+						return err
+					}
+
+					data := io.NewSectionReader(f, baseLayer.GGML.Offset(), stat.Size())
+					dataLayer, err := NewLayer(data, "application/vnd.ollama.image.model+data")
+					if err != nil {
+						return err
+					}
+					dataLayer.MergeBase = baseLayer.Digest
+
+					layers = append(layers, dataLayer)
+					continue
 				}
 				}
 
 
 				layers = append(layers, baseLayer.Layer)
 				layers = append(layers, baseLayer.Layer)
@@ -524,8 +559,8 @@ func CreateModel(ctx context.Context, name, modelFileDir, quantization string, c
 	}
 	}
 
 
 	for _, layer := range append(layers, layer) {
 	for _, layer := range append(layers, layer) {
-		if layer.status != "" {
-			fn(api.ProgressResponse{Status: layer.status})
+		if layer.message != "" {
+			fn(api.ProgressResponse{Status: layer.message})
 		}
 		}
 	}
 	}
 
 
@@ -627,6 +662,9 @@ func deleteUnusedLayers(skipModelPath *ModelPath, deleteMap map[string]struct{},
 
 
 		for _, layer := range manifest.Layers {
 		for _, layer := range manifest.Layers {
 			delete(deleteMap, layer.Digest)
 			delete(deleteMap, layer.Digest)
+			if layer.MergeBase != "" {
+				delete(deleteMap, layer.MergeBase)
+			}
 		}
 		}
 
 
 		delete(deleteMap, manifest.Config.Digest)
 		delete(deleteMap, manifest.Config.Digest)
@@ -733,6 +771,9 @@ func DeleteModel(name string) error {
 	deleteMap := make(map[string]struct{})
 	deleteMap := make(map[string]struct{})
 	for _, layer := range manifest.Layers {
 	for _, layer := range manifest.Layers {
 		deleteMap[layer.Digest] = struct{}{}
 		deleteMap[layer.Digest] = struct{}{}
+		if layer.MergeBase != "" {
+			deleteMap[layer.MergeBase] = struct{}{}
+		}
 	}
 	}
 	deleteMap[manifest.Config.Digest] = struct{}{}
 	deleteMap[manifest.Config.Digest] = struct{}{}
 
 
@@ -855,6 +896,43 @@ func PullModel(ctx context.Context, name string, regOpts *registryOptions, fn fu
 	}
 	}
 	delete(deleteMap, manifest.Config.Digest)
 	delete(deleteMap, manifest.Config.Digest)
 
 
+	type mergedLayer struct {
+		Metadata, Data *Layer
+	}
+
+	mergedLayers := make(map[string]mergedLayer)
+	for _, layer := range manifest.Layers {
+		merged := mergedLayers[layer.MergeBase]
+		if layer.MediaType == "application/vnd.ollama.image.model+metadata" {
+			merged.Metadata = layer
+		} else if layer.MediaType == "application/vnd.ollama.image.model+data" {
+			merged.Data = layer
+		} else {
+			continue
+		}
+
+		mergedLayers[layer.MergeBase] = merged
+	}
+
+	for _, mergedLayer := range mergedLayers {
+		fn(api.ProgressResponse{Status: "merging layers"})
+		metadata, err := mergedLayer.Metadata.Open()
+		if err != nil {
+			return err
+		}
+		defer metadata.Close()
+
+		data, err := mergedLayer.Data.Open()
+		if err != nil {
+			return err
+		}
+		defer data.Close()
+
+		if _, err := NewLayer(io.MultiReader(metadata, data), "application/vnd.ollama.image.model"); err != nil {
+			return err
+		}
+	}
+
 	fn(api.ProgressResponse{Status: "verifying sha256 digest"})
 	fn(api.ProgressResponse{Status: "verifying sha256 digest"})
 	for _, layer := range layers {
 	for _, layer := range layers {
 		if err := verifyBlob(layer.Digest); err != nil {
 		if err := verifyBlob(layer.Digest); err != nil {

+ 7 - 4
server/layer.go

@@ -12,7 +12,10 @@ type Layer struct {
 	Digest    string `json:"digest"`
 	Digest    string `json:"digest"`
 	Size      int64  `json:"size"`
 	Size      int64  `json:"size"`
 	From      string `json:"from,omitempty"`
 	From      string `json:"from,omitempty"`
-	status    string
+
+	MergeBase string `json:"merge_base,omitempty"`
+
+	message string
 }
 }
 
 
 func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
 func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
@@ -56,7 +59,7 @@ func NewLayer(r io.Reader, mediatype string) (*Layer, error) {
 		MediaType: mediatype,
 		MediaType: mediatype,
 		Digest:    digest,
 		Digest:    digest,
 		Size:      n,
 		Size:      n,
-		status:    fmt.Sprintf("%s %s", status, digest),
+		message:   fmt.Sprintf("%s %s", status, digest),
 	}, nil
 	}, nil
 }
 }
 
 
@@ -76,11 +79,11 @@ func NewLayerFromLayer(digest, mediatype, from string) (*Layer, error) {
 		Digest:    digest,
 		Digest:    digest,
 		Size:      fi.Size(),
 		Size:      fi.Size(),
 		From:      from,
 		From:      from,
-		status:    fmt.Sprintf("using existing layer %s", digest),
+		message:   fmt.Sprintf("using existing layer %s", digest),
 	}, nil
 	}, nil
 }
 }
 
 
-func (l *Layer) Open() (io.ReadCloser, error) {
+func (l *Layer) Open() (*os.File, error) {
 	blob, err := GetBlobsPath(l.Digest)
 	blob, err := GetBlobsPath(l.Digest)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err