Bläddra i källkod

convert progress

Josh Yan 8 månader sedan
förälder
incheckning
340162fbc3
9 ändrade filer med 52 tillägg och 24 borttagningar
  1. 11 0
      cmd/cmd.go
  2. 17 10
      convert/convert.go
  3. 4 3
      convert/convert_test.go
  4. 6 2
      llm/gguf.go
  5. 1 1
      llm/memory_test.go
  6. 8 4
      server/model.go
  7. 2 2
      server/model_test.go
  8. 1 1
      server/routes_create_test.go
  9. 2 1
      server/sched_test.go

+ 11 - 0
cmd/cmd.go

@@ -124,6 +124,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 	}
 
 	bars := make(map[string]*progress.Bar)
+	var convertSpin *progress.Spinner
 	fn := func(resp api.ProgressResponse) error {
 		if resp.Digest != "" {
 			spinner.Stop()
@@ -136,6 +137,16 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 			}
 
 			bar.Set(resp.Completed)
+		} else if strings.Contains(resp.Status, "converting") {
+			spinner.Stop()
+
+			if convertSpin != nil {
+				convertSpin.SetMessage(resp.Status)
+			} else {
+				status = resp.Status
+				convertSpin = progress.NewSpinner(resp.Status)
+				p.Add("convert", convertSpin)
+			}
 		} else if status != resp.Status {
 			spinner.Stop()
 

+ 17 - 10
convert/convert.go

@@ -9,6 +9,7 @@ import (
 	"log/slog"
 	"strings"
 
+	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/llm"
 )
 
@@ -79,12 +80,12 @@ func (ModelParameters) specialTokenTypes() []string {
 	}
 }
 
-func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
-	return llm.WriteGGUF(ws, kv, ts)
+func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor, fn func(api.ProgressResponse)) error {
+	return llm.WriteGGUF(ws, kv, ts, fn)
 }
 
-func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error {
-	return llm.WriteGGUF(ws, kv, ts)
+func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor, fn func(api.ProgressResponse)) error {
+	return llm.WriteGGUF(ws, kv, ts, fn)
 }
 
 type ModelConverter interface {
@@ -99,7 +100,7 @@ type ModelConverter interface {
 	// specialTokenTypes returns any special token types the model uses
 	specialTokenTypes() []string
 	// writeFile writes the model to the provided io.WriteSeeker
-	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
+	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor, func(api.ProgressResponse)) error
 }
 
 type moreParser interface {
@@ -115,10 +116,10 @@ type AdapterConverter interface {
 	// See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details
 	Replacements() []string
 
-	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error
+	writeFile(io.WriteSeeker, llm.KV, []llm.Tensor, func(api.ProgressResponse)) error
 }
 
-func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
+func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV, fn func(api.ProgressResponse)) error {
 	bts, err := fs.ReadFile(fsys, "adapter_config.json")
 	if err != nil {
 		return err
@@ -153,14 +154,17 @@ func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error {
 		return err
 	}
 
-	return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts))
+	fn(api.ProgressResponse{
+		Status: fmt.Sprintf("converting adapter 0%%"),
+	})
+	return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts), fn)
 }
 
 // Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations
 // and files it finds in the input path.
 // Supported input model formats include safetensors.
 // Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model.
-func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
+func ConvertModel(fsys fs.FS, ws io.WriteSeeker, fn func(api.ProgressResponse)) error {
 	bts, err := fs.ReadFile(fsys, "config.json")
 	if err != nil {
 		return err
@@ -224,5 +228,8 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error {
 		return err
 	}
 
-	return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts))
+	fn(api.ProgressResponse{
+		Status: fmt.Sprintf("converting model 0%%"),
+	})
+	return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts), fn)
 }

+ 4 - 3
convert/convert_test.go

@@ -19,6 +19,7 @@ import (
 
 	"golang.org/x/exp/maps"
 
+	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/llm"
 )
 
@@ -31,7 +32,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
 	}
 	defer f.Close()
 
-	if err := ConvertModel(fsys, f); err != nil {
+	if err := ConvertModel(fsys, f, func(api.ProgressResponse){}); err != nil {
 		t.Fatal(err)
 	}
 
@@ -150,7 +151,7 @@ func TestConvertInvalidDatatype(t *testing.T) {
 	tempDir := t.TempDir()
 	generateSafetensorTestData(t, tempDir)
 
-	err = ConvertModel(os.DirFS(tempDir), f)
+	err = ConvertModel(os.DirFS(tempDir), f, func(api.ProgressResponse){})
 	if err == nil || err.Error() != "unsupported safetensors model" {
 		t.Errorf("expected error but didn't get one")
 	}
@@ -287,7 +288,7 @@ func TestConvertAdapter(t *testing.T) {
 			tempDir := t.TempDir()
 			generateLoraTestData(t, tempDir)
 
-			if err = ConvertAdapter(os.DirFS(tempDir), f, c.BaseKV); err != nil {
+			if err = ConvertAdapter(os.DirFS(tempDir), f, c.BaseKV, func(api.ProgressResponse){}); err != nil {
 				t.Fatal(err)
 			}
 

+ 6 - 2
llm/gguf.go

@@ -11,6 +11,7 @@ import (
 	"slices"
 	"strings"
 
+	"github.com/ollama/ollama/api"
 	"golang.org/x/exp/maps"
 )
 
@@ -506,7 +507,7 @@ func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error {
 	return binary.Write(w, binary.LittleEndian, s)
 }
 
-func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
+func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor, fn func(api.ProgressResponse)) error {
 	if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil {
 		return err
 	}
@@ -552,7 +553,10 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error {
 	}
 
 	var alignment int64 = 32
-	for _, t := range ts {
+	for i, t := range ts {
+		fn(api.ProgressResponse{
+			Status: fmt.Sprintf("converting model %d%%", 100*(i+1)/len(ts)),
+		})
 		if err := ggufWriteTensor(ws, t, alignment); err != nil {
 			return err
 		}

+ 1 - 1
llm/memory_test.go

@@ -41,7 +41,7 @@ func TestEstimateGPULayers(t *testing.T) {
 		"tokenizer.ggml.tokens":         []string{" "},
 		"tokenizer.ggml.scores":         []float32{0},
 		"tokenizer.ggml.token_type":     []int32{0},
-	}, tensors)
+	}, tensors, func(api.ProgressResponse){})
 	require.NoError(t, err)
 
 	ggml, err := LoadModel(f.Name(), 0)

+ 8 - 4
server/model.go

@@ -98,7 +98,6 @@ func parseFromZipFile(_ context.Context, command string, baseLayers []*layerGGML
 	}
 	defer os.RemoveAll(p)
 
-	fn(api.ProgressResponse{Status: "converting model"})
 	// TODO(mxyng): this should write directly into a layer
 	// e.g. NewLayer(arch.Reader(), "application/vnd.ollama.image.model")
 	t, err := os.CreateTemp(p, "fp16")
@@ -123,13 +122,18 @@ func parseFromZipFile(_ context.Context, command string, baseLayers []*layerGGML
 		if baseModel == nil {
 			return nil, fmt.Errorf("no base model specified for the adapter")
 		}
-
-		if err := convert.ConvertAdapter(convert.NewZipReader(r, p, 32<<20), t, baseModel.KV()); err != nil {
+		fn(api.ProgressResponse{
+			Status: "converting adapter",
+		})
+		if err := convert.ConvertAdapter(convert.NewZipReader(r, p, 32<<20), t, baseModel.KV(), fn); err != nil {
 			return nil, err
 		}
 		layerType = "application/vnd.ollama.image.adapter"
 	case "model":
-		if err := convert.ConvertModel(convert.NewZipReader(r, p, 32<<20), t); err != nil {
+		fn(api.ProgressResponse{
+			Status: "converting model",
+		})
+		if err := convert.ConvertModel(convert.NewZipReader(r, p, 32<<20), t, fn); err != nil {
 			return nil, err
 		}
 		layerType = "application/vnd.ollama.image.model"

+ 2 - 2
server/model_test.go

@@ -145,7 +145,7 @@ func TestParseFromFileFromLayer(t *testing.T) {
 		t.Fatalf("failed to open file: %v", err)
 	}
 	defer file.Close()
-	if err := llm.WriteGGUF(file, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}); err != nil {
+	if err := llm.WriteGGUF(file, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}, func(api.ProgressResponse){}); err != nil {
 		t.Fatalf("failed to write gguf: %v", err)
 	}
 
@@ -197,7 +197,7 @@ func TestParseLayerFromCopy(t *testing.T) {
 	defer file2.Close()
 
 	for range 5 {
-		if err := llm.WriteGGUF(file2, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}); err != nil {
+		if err := llm.WriteGGUF(file2, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}, func(api.ProgressResponse){}); err != nil {
 			t.Fatalf("failed to write gguf: %v", err)
 		}
 	}

+ 1 - 1
server/routes_create_test.go

@@ -30,7 +30,7 @@ func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string {
 	}
 	defer f.Close()
 
-	if err := llm.WriteGGUF(f, kv, ti); err != nil {
+	if err := llm.WriteGGUF(f, kv, ti, func(api.ProgressResponse){}); err != nil {
 		t.Fatal(err)
 	}
 

+ 2 - 1
server/sched_test.go

@@ -128,7 +128,8 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
 	}, []llm.Tensor{
 		{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
 		{Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
-	}))
+	}, 
+	func(api.ProgressResponse){}))
 	require.NoError(t, err)
 
 	fname := f.Name()