Kaynağa Gözat

Use `--quantize` flag and `quantize` api parameter (#4321)

* rename `--quantization` to `--quantize`

* backwards

* Update api/types.go

Co-authored-by: Michael Yang <mxyng@pm.me>

---------

Co-authored-by: Michael Yang <mxyng@pm.me>
Jeffrey Morgan 11 ay önce
ebeveyn
işleme
6602e793c0
3 değiştirilmiş dosya ile 17 ekleme ve 9 silme
  1. 8 5
      api/types.go
  2. 3 3
      cmd/cmd.go
  3. 6 1
      server/routes.go

+ 8 - 5
api/types.go

@@ -197,14 +197,17 @@ type EmbeddingResponse struct {
 
 // CreateRequest is the request passed to [Client.Create].
 type CreateRequest struct {
-	Model        string `json:"model"`
-	Path         string `json:"path"`
-	Modelfile    string `json:"modelfile"`
-	Stream       *bool  `json:"stream,omitempty"`
-	Quantization string `json:"quantization,omitempty"`
+	Model     string `json:"model"`
+	Path      string `json:"path"`
+	Modelfile string `json:"modelfile"`
+	Stream    *bool  `json:"stream,omitempty"`
+	Quantize  string `json:"quantize,omitempty"`
 
 	// Name is deprecated, see Model
 	Name string `json:"name"`
+
+	// Quantization is deprecated, see Quantize
+	Quantization string `json:"quantization,omitempty"`
 }
 
 // DeleteRequest is the request passed to [Client.Delete].

+ 3 - 3
cmd/cmd.go

@@ -142,9 +142,9 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 		return nil
 	}
 
-	quantization, _ := cmd.Flags().GetString("quantization")
+	quantize, _ := cmd.Flags().GetString("quantize")
 
-	request := api.CreateRequest{Name: args[0], Modelfile: modelfile.String(), Quantization: quantization}
+	request := api.CreateRequest{Name: args[0], Modelfile: modelfile.String(), Quantize: quantize}
 	if err := client.Create(cmd.Context(), &request, fn); err != nil {
 		return err
 	}
@@ -1051,7 +1051,7 @@ func NewCLI() *cobra.Command {
 	}
 
 	createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile (default \"Modelfile\")")
-	createCmd.Flags().StringP("quantization", "q", "", "Quantization level.")
+	createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_0)")
 
 	showCmd := &cobra.Command{
 		Use:     "show MODEL",

+ 6 - 1
server/routes.go

@@ -554,7 +554,12 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
 		ctx, cancel := context.WithCancel(c.Request.Context())
 		defer cancel()
 
-		if err := CreateModel(ctx, name.String(), filepath.Dir(req.Path), strings.ToUpper(req.Quantization), modelfile, fn); err != nil {
+		quantization := req.Quantization
+		if req.Quantize != "" {
+			quantization = req.Quantize
+		}
+
+		if err := CreateModel(ctx, name.String(), filepath.Dir(req.Path), strings.ToUpper(quantization), modelfile, fn); err != nil {
 			ch <- gin.H{"error": err.Error()}
 		}
 	}()