Explorar o código

runner.go: Health endpoint comments

The health endpoint needs a little more work to show progress as Ollama
expects but we can at least return the right status and have comments
for the future.
Jesse Gross hai 8 meses
pai
achega
52e88ab7b3
Modificáronse 1 ficheiros con 4 adicións e 2 borrados
  1. 4 2
      llama/runner/runner.go

+ 4 - 2
llama/runner/runner.go

@@ -582,7 +582,7 @@ type HealthResponse struct {
 	Progress float32 `json:"progress"`
 	Progress float32 `json:"progress"`
 }
 }
 
 
-// TODO (jmorganca): is it safe to do this concurrently with decoding?
+// TODO (jmorganca): is it safe to do this concurrently with updating status?
 func (s *Server) health(w http.ResponseWriter, r *http.Request) {
 func (s *Server) health(w http.ResponseWriter, r *http.Request) {
 	w.Header().Set("Content-Type", "application/json")
 	w.Header().Set("Content-Type", "application/json")
 	if err := json.NewEncoder(w).Encode(&HealthResponse{
 	if err := json.NewEncoder(w).Encode(&HealthResponse{
@@ -659,9 +659,11 @@ func main() {
 		batchSize: *batchSize,
 		batchSize: *batchSize,
 		parallel:  *parallel,
 		parallel:  *parallel,
 		seqs:      make([]*Sequence, *parallel),
 		seqs:      make([]*Sequence, *parallel),
-		status:    "loading",
+		status:    "loading model",
 	}
 	}
 
 
+	// TODO (jessegross): This should be in a separate goroutine so we can report progress,
+	// otherwise Ollama can timeout for large model loads
 	// load the model
 	// load the model
 	llama.BackendInit()
 	llama.BackendInit()
 	params := llama.NewModelParams(*nGpuLayers, *mainGpu, func(progress float32) {
 	params := llama.NewModelParams(*nGpuLayers, *mainGpu, func(progress float32) {