浏览代码

WIP updated routes

ParthSareen 4 月之前
父节点
当前提交
e679885733
共有 3 个文件被更改,包括 68 次插入21 次删除
  1. 0 8
      api/types.go
  2. 5 5
      docs/api.md
  3. 63 8
      server/routes.go

+ 0 - 8
api/types.go

@@ -297,10 +297,6 @@ type EmbeddingResponse struct {
 type TokenizeRequest struct {
 	Model string `json:"model"`
 	Text  string `json:"text"`
-
-	// KeepAlive controls how long the model will stay loaded in memory following
-	// this request.
-	KeepAlive *Duration `json:"keep_alive,omitempty"`
 }
 
 // TokenizeResponse is the response from [Client.Tokenize].
@@ -312,10 +308,6 @@ type TokenizeResponse struct {
 type DetokenizeRequest struct {
 	Model  string `json:"model"`
 	Tokens []int  `json:"tokens"`
-
-	// KeepAlive controls how long the model will stay loaded in memory following
-	// this request.
-	KeepAlive *Duration `json:"keep_alive,omitempty"`
 }
 
 // DetokenizeResponse is the response from [Client.Detokenize].

+ 5 - 5
docs/api.md

@@ -1506,8 +1506,8 @@ POST /api/tokenize
 
 ```shell
 curl -X POST http://localhost:11434/api/tokenize -d '{
-  "model": "llama3.1:8b",
-  "text": "Why the sky is blue?"
+  "model": "llama3.2",
+  "text": "Why is the sky blue?"
 }'
 ```
 
@@ -1538,15 +1538,15 @@ POST /api/detokenize
 
 ```shell
 curl -X POST http://localhost:11434/api/detokenize -d '{
-  "model": "llama3.1:8b",
-  "tokens": [10445,279,13180,374,6437,30]
+  "model": "llama3.2",
+  "tokens": [10445,374,279,13180,6437,30]
 }'
 ```
 
 #### Response
 
 ```json
-{"text":"Why the sky is blue?"}
+{"text":"Why is the sky blue?"}
 ```
 
 

+ 63 - 8
server/routes.go

@@ -30,6 +30,7 @@ import (
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/discover"
 	"github.com/ollama/ollama/envconfig"
+	"github.com/ollama/ollama/llama"
 	"github.com/ollama/ollama/llm"
 	"github.com/ollama/ollama/openai"
 	"github.com/ollama/ollama/parser"
@@ -569,15 +570,43 @@ func (s *Server) TokenizeHandler(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	runner, _, _, err := s.scheduleRunner(r.Context(), req.Model, []Capability{}, nil, req.KeepAlive)
+	if req.Model == "" {
+		http.Error(w, "missing `model` for tokenization", http.StatusBadRequest)
+		return
+	}
+
+	name := model.ParseName(req.Model)
+	if !name.IsValid() {
+		http.Error(w, fmt.Sprintf("model name `%q` is invalid", req.Model), http.StatusBadRequest)
+		return
+	}
+	name, err := getExistingName(name)
+	if err != nil {
+		http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
+		return
+	}
+
+	// Get local model path
+	modelPath, err := GetModel(name.String())
+	if err != nil {
+		http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
+		return
+	}
+
+	model, err := llama.LoadModelFromFile(modelPath.ModelPath, llama.ModelParams{
+		VocabOnly: true,
+		UseMmap:   true,
+	})
 	if err != nil {
-		http.Error(w, fmt.Sprintf("model '%s' not found", req.Model), http.StatusNotFound)
+		http.Error(w, fmt.Sprintf("failed to load model: %v", err), http.StatusInternalServerError)
 		return
 	}
+	defer llama.FreeModel(model)
 
-	tokens, err := runner.Tokenize(r.Context(), req.Text)
+	// Tokenize the text
+	tokens, err := model.Tokenize(req.Text, false, true)
 	if err != nil {
-		http.Error(w, err.Error(), http.StatusInternalServerError)
+		http.Error(w, fmt.Sprintf("failed to tokenize text: %v", err), http.StatusInternalServerError)
 		return
 	}
 
@@ -611,17 +640,43 @@ func (s *Server) DetokenizeHandler(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	runner, _, _, err := s.scheduleRunner(r.Context(), req.Model, []Capability{}, nil, req.KeepAlive)
+	if req.Model == "" {
+		http.Error(w, "missing `model` for detokenization", http.StatusBadRequest)
+		return
+	}
+
+	name := model.ParseName(req.Model)
+	if !name.IsValid() {
+		http.Error(w, fmt.Sprintf("model name `%q` is invalid", req.Model), http.StatusBadRequest)
+		return
+	}
+	name, err := getExistingName(name)
+	if err != nil {
+		http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
+		return
+	}
+
+	// Get local model path
+	modelPath, err := GetModel(name.String())
 	if err != nil {
-		http.Error(w, fmt.Sprintf("model '%s' not found", req.Model), http.StatusNotFound)
+		http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
 		return
 	}
 
-	text, err := runner.Detokenize(r.Context(), req.Tokens)
+	model, err := llama.LoadModelFromFile(modelPath.ModelPath, llama.ModelParams{
+		VocabOnly: true,
+		UseMmap:   true,
+	})
 	if err != nil {
-		http.Error(w, err.Error(), http.StatusInternalServerError)
+		http.Error(w, fmt.Sprintf("failed to load model: %v", err), http.StatusInternalServerError)
 		return
 	}
+	defer llama.FreeModel(model)
+
+	var text string
+	for _, token := range req.Tokens {
+		text += model.TokenToPiece(token)
+	}
 
 	w.Header().Set("Content-Type", "application/json")
 	if err := json.NewEncoder(w).Encode(api.DetokenizeResponse{