浏览代码

llm: set done reason at server level

No functional change. Many different done reasons can be set at the runner
level, so rather than obsuring them we should return them to the server
process and let it choose what to do with the done reason. This separates
the API concerns from the runner.
Bruce MacDonald 1 月之前
父节点
当前提交
22f2f6e229
共有 3 个文件被更改,包括 10 次插入12 次删除
  1. 8 0
      llm/server.go
  2. 1 6
      runner/llamarunner/runner.go
  3. 1 6
      runner/ollamarunner/runner.go

+ 8 - 0
llm/server.go

@@ -796,6 +796,14 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
 			if err := json.Unmarshal(evt, &c); err != nil {
 				return fmt.Errorf("error unmarshalling llm prediction response: %v", err)
 			}
+			// convert internal done reason to one of our standard api format done reasons
+			switch c.DoneReason {
+			case "limit":
+				c.DoneReason = "length"
+			default:
+				c.DoneReason = "stop"
+			}
+
 			switch {
 			case strings.TrimSpace(c.Content) == lastToken:
 				tokenRepeat++

+ 1 - 6
runner/llamarunner/runner.go

@@ -647,14 +647,9 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) {
 
 				flusher.Flush()
 			} else {
-				// Send the final response
-				doneReason := "stop"
-				if seq.doneReason == "limit" {
-					doneReason = "length"
-				}
 				if err := json.NewEncoder(w).Encode(&llm.CompletionResponse{
 					Done:               true,
-					DoneReason:         doneReason,
+					DoneReason:         seq.doneReason,
 					PromptEvalCount:    seq.numPromptInputs,
 					PromptEvalDuration: seq.startGenerationTime.Sub(seq.startProcessingTime),
 					EvalCount:          seq.numDecoded,

+ 1 - 6
runner/ollamarunner/runner.go

@@ -627,14 +627,9 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) {
 
 				flusher.Flush()
 			} else {
-				// Send the final response
-				doneReason := "stop"
-				if seq.doneReason == "limit" {
-					doneReason = "length"
-				}
 				if err := json.NewEncoder(w).Encode(&llm.CompletionResponse{
 					Done:               true,
-					DoneReason:         doneReason,
+					DoneReason:         seq.doneReason,
 					PromptEvalCount:    seq.numPromptInputs,
 					PromptEvalDuration: seq.startGenerationTime.Sub(seq.startProcessingTime),
 					EvalCount:          seq.numPredicted,