1 year ago · 274d5a5fdf
--- a/api/client.go
+++ b/api/client.go
@@ -17,6 +17,10 @@ import (
 
				 	"github.com/jmorganca/ollama/version"
			
 
				 )
			
 
				 
			
 
				+const DefaultHost = "127.0.0.1:11434"
			
 
				+
			
 
				+var envHost = os.Getenv("OLLAMA_HOST")
			
 
				+
			
 
				 type Client struct {
			
 
				 	base *url.URL
			
 
				 	http http.Client
			
@@ -143,7 +147,7 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
 
				 	}
			
 
				 
			
 
				 	request.Header.Set("Content-Type", "application/json")
			
 
				-	request.Header.Set("Accept", "application/json")
			
 
				+	request.Header.Set("Accept", "application/x-ndjson")
			
 
				 	request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))
			
 
				 
			
 
				 	response, err := c.http.Do(request)
			
--- a/api/types.go
+++ b/api/types.go
@@ -37,6 +37,7 @@ type GenerateRequest struct {
 
				 	System   string `json:"system"`
			
 
				 	Template string `json:"template"`
			
 
				 	Context  []int  `json:"context,omitempty"`
			
 
				+	Stream   *bool  `json:"stream,omitempty"`
			
 
				 
			
 
				 	Options map[string]interface{} `json:"options"`
			
 
				 }
			
@@ -53,8 +54,9 @@ type EmbeddingResponse struct {
 
				 }
			
 
				 
			
 
				 type CreateRequest struct {
			
 
				-	Name string `json:"name"`
			
 
				-	Path string `json:"path"`
			
 
				+	Name   string `json:"name"`
			
 
				+	Path   string `json:"path"`
			
 
				+	Stream *bool  `json:"stream,omitempty"`
			
 
				 }
			
 
				 
			
 
				 type DeleteRequest struct {
			
@@ -81,6 +83,9 @@ type CopyRequest struct {
 
				 type PullRequest struct {
			
 
				 	Name     string `json:"name"`
			
 
				 	Insecure bool   `json:"insecure,omitempty"`
			
 
				+	Username string `json:"username"`
			
 
				+	Password string `json:"password"`
			
 
				+	Stream   *bool  `json:"stream,omitempty"`
			
 
				 }
			
 
				 
			
 
				 type ProgressResponse struct {
			
@@ -93,6 +98,9 @@ type ProgressResponse struct {
 
				 type PushRequest struct {
			
 
				 	Name     string `json:"name"`
			
 
				 	Insecure bool   `json:"insecure,omitempty"`
			
 
				+	Username string `json:"username"`
			
 
				+	Password string `json:"password"`
			
 
				+	Stream   *bool  `json:"stream,omitempty"`
			
 
				 }
			
 
				 
			
 
				 type ListResponse struct {
			
@@ -113,7 +121,7 @@ type TokenResponse struct {
 
				 type GenerateResponse struct {
			
 
				 	Model     string    `json:"model"`
			
 
				 	CreatedAt time.Time `json:"created_at"`
			
 
				-	Response  string    `json:"response,omitempty"`
			
 
				+	Response  string    `json:"response"`
			
 
				 
			
 
				 	Done    bool  `json:"done"`
			
 
				 	Context []int `json:"context,omitempty"`
			
--- a/docs/api.md
+++ b/docs/api.md
@@ -12,7 +12,6 @@
 
				 - [Push a Model](#push-a-model)
			
 
				 - [Generate Embeddings](#generate-embeddings)
			
 
				 
			
 
				-
			
 
				 ## Conventions
			
 
				 
			
 
				 ### Model names
			
@@ -40,12 +39,13 @@ Generate a response for a given prompt with a provided model. This is a streamin
 
				 - `model`: (required) the [model name](#model-names)
			
 
				 - `prompt`: the prompt to generate a response for
			
 
				 
			
 
				-Advanced parameters:
			
 
				+Advanced parameters (optional):
			
 
				 
			
 
				 - `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
			
 
				 - `system`: system prompt to (overrides what is defined in the `Modelfile`)
			
 
				 - `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
			
 
				 - `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
			
 
				+- `stream`: if `false` the response will be be returned as a single response object, rather than a stream of objects
			
 
				 
			
 
				 ### Request
			
 
				 
			
@@ -80,6 +80,7 @@ The final response in the stream also includes additional data about the generat
 
				 - `eval_count`: number of tokens the response
			
 
				 - `eval_duration`: time in nanoseconds spent generating the response
			
 
				 - `context`: an encoding of the conversation used in this response, this can be sent in the next request to keep a conversational memory
			
 
				+- `response`: empty if the response was streamed, if not streamed, this will contain the full response
			
 
				 
			
 
				 To calculate how fast the response is generated in tokens per second (token/s), divide `eval_count` / `eval_duration`.
			
 
				 
			
@@ -87,6 +88,7 @@ To calculate how fast the response is generated in tokens per second (token/s),
 
				 {
			
 
				   "model": "llama2:7b",
			
 
				   "created_at": "2023-08-04T19:22:45.499127Z",
			
 
				+  "response": "",
			
 
				   "context": [1, 2, 3],
			
 
				   "done": true,
			
 
				   "total_duration": 5589157167,
			
@@ -112,6 +114,7 @@ Create a model from a [`Modelfile`](./modelfile.md)
 
				 
			
 
				 - `name`: name of the model to create
			
 
				 - `path`: path to the Modelfile
			
 
				+- `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects
			
 
				 
			
 
				 ### Request
			
 
				 
			
@@ -179,7 +182,7 @@ Show details about a model including modelfile, template, parameters, license, a
 
				 
			
 
				 ### Request
			
 
				 
			
 
				-```shell  
			
 
				+```shell
			
 
				 curl http://localhost:11434/api/show -d '{
			
 
				   "name": "llama2:7b"
			
 
				 }'
			
@@ -189,10 +192,10 @@ curl http://localhost:11434/api/show -d '{
 
				 
			
 
				 ```json
			
 
				 {
			
 
				-    "license": "<contents of license block>",
			
 
				-    "modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llama2:latest\n\nFROM /Users/username/.ollama/models/blobs/sha256:8daa9615cce30c259a9555b1cc250d461d1bc69980a274b44d7eda0be78076d8\nTEMPLATE \"\"\"[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] \"\"\"\nSYSTEM \"\"\"\"\"\"\nPARAMETER stop [INST]\nPARAMETER stop [/INST]\nPARAMETER stop <<SYS>>\nPARAMETER stop <</SYS>>\n",
			
 
				-    "parameters": "stop                           [INST]\nstop                           [/INST]\nstop                           <<SYS>>\nstop                           <</SYS>>",
			
 
				-    "template": "[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] "
			
 
				+  "license": "<contents of license block>",
			
 
				+  "modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llama2:latest\n\nFROM /Users/username/.ollama/models/blobs/sha256:8daa9615cce30c259a9555b1cc250d461d1bc69980a274b44d7eda0be78076d8\nTEMPLATE \"\"\"[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] \"\"\"\nSYSTEM \"\"\"\"\"\"\nPARAMETER stop [INST]\nPARAMETER stop [/INST]\nPARAMETER stop <<SYS>>\nPARAMETER stop <</SYS>>\n",
			
 
				+  "parameters": "stop                           [INST]\nstop                           [/INST]\nstop                           <<SYS>>\nstop                           <</SYS>>",
			
 
				+  "template": "[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] "
			
 
				 }
			
 
				 ```
			
 
				 
			
@@ -245,6 +248,7 @@ Download a model from the ollama library. Cancelled pulls are resumed from where
 
				 
			
 
				 - `name`: name of the model to pull
			
 
				 - `insecure`: (optional) allow insecure connections to the library. Only use this if you are pulling from your own library during development.
			
 
				+- `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects
			
 
				 
			
 
				 ### Request
			
 
				 
			
@@ -275,7 +279,8 @@ Upload a model to a model library. Requires registering for ollama.ai and adding
 
				 ### Parameters
			
 
				 
			
 
				 - `name`: name of the model to push in the form of `<namespace>/<model>:<tag>`
			
 
				-- `insecure`: (optional) allow insecure connections to the library. Only use this if you are pushing to your library during development.  
			
 
				+- `insecure`: (optional) allow insecure connections to the library. Only use this if you are pushing to your library during development.
			
 
				+- `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects
			
 
				 
			
 
				 ### Request
			
 
				 
			
@@ -290,15 +295,16 @@ curl -X POST http://localhost:11434/api/push -d '{
 
				 Streaming response that starts with:
			
 
				 
			
 
				 ```json
			
 
				-{"status":"retrieving manifest"}
			
 
				+{ "status": "retrieving manifest" }
			
 
				 ```
			
 
				 
			
 
				 and then:
			
 
				 
			
 
				 ```json
			
 
				 {
			
 
				-"status":"starting upload","digest":"sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab",
			
 
				-"total":1928429856
			
 
				+  "status": "starting upload",
			
 
				+  "digest": "sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab",
			
 
				+  "total": 1928429856
			
 
				 }
			
 
				 ```
			
 
				 
			
@@ -306,9 +312,10 @@ Then there is a series of uploading responses:
 
				 
			
 
				 ```json
			
 
				 {
			
 
				-"status":"starting upload",
			
 
				-"digest":"sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab",
			
 
				-"total":1928429856}
			
 
				+  "status": "starting upload",
			
 
				+  "digest": "sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab",
			
 
				+  "total": 1928429856
			
 
				+}
			
 
				 ```
			
 
				 
			
 
				 Finally, when the upload is complete:
			
--- a/server/routes.go
+++ b/server/routes.go
@@ -240,6 +240,23 @@ func GenerateHandler(c *gin.Context) {
 
				 		}
			
 
				 	}()
			
 
				 
			
 
				+	if req.Stream != nil && !*req.Stream {
			
 
				+		var response api.GenerateResponse
			
 
				+		generated := ""
			
 
				+		for resp := range ch {
			
 
				+			if r, ok := resp.(api.GenerateResponse); ok {
			
 
				+				generated += r.Response
			
 
				+				response = r
			
 
				+			} else {
			
 
				+				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			
 
				+				return
			
 
				+			}
			
 
				+		}
			
 
				+		response.Response = generated
			
 
				+		c.JSON(http.StatusOK, response)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				 	streamResponse(c, ch)
			
 
				 }
			
 
				 
			
@@ -309,6 +326,11 @@ func PullModelHandler(c *gin.Context) {
 
				 		}
			
 
				 	}()
			
 
				 
			
 
				+	if req.Stream != nil && !*req.Stream {
			
 
				+		waitForStream(c, ch)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				 	streamResponse(c, ch)
			
 
				 }
			
 
				 
			
@@ -336,6 +358,11 @@ func PushModelHandler(c *gin.Context) {
 
				 		}
			
 
				 	}()
			
 
				 
			
 
				+	if req.Stream != nil && !*req.Stream {
			
 
				+		waitForStream(c, ch)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				 	streamResponse(c, ch)
			
 
				 }
			
 
				 
			
@@ -363,6 +390,11 @@ func CreateModelHandler(c *gin.Context) {
 
				 		}
			
 
				 	}()
			
 
				 
			
 
				+	if req.Stream != nil && !*req.Stream {
			
 
				+		waitForStream(c, ch)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				 	streamResponse(c, ch)
			
 
				 }
			
 
				 
			
@@ -603,6 +635,31 @@ func Serve(ln net.Listener, allowOrigins []string) error {
 
				 	return s.Serve(ln)
			
 
				 }
			
 
				 
			
 
				+func waitForStream(c *gin.Context, ch chan interface{}) {
			
 
				+	c.Header("Content-Type", "application/json")
			
 
				+	for resp := range ch {
			
 
				+		switch r := resp.(type) {
			
 
				+		case api.ProgressResponse:
			
 
				+			if r.Status == "success" {
			
 
				+				c.JSON(http.StatusOK, r)
			
 
				+				return
			
 
				+			}
			
 
				+		case gin.H:
			
 
				+			if errorMsg, ok := r["error"].(string); ok {
			
 
				+				c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
			
 
				+				return
			
 
				+			} else {
			
 
				+				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in progress response"})
			
 
				+				return
			
 
				+			}
			
 
				+		default:
			
 
				+			c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected progress response"})
			
 
				+			return
			
 
				+		}
			
 
				+	}
			
 
				+	c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected end of progress response"})
			
 
				+}
			
 
				+
			
 
				 func streamResponse(c *gin.Context, ch chan any) {
			
 
				 	c.Header("Content-Type", "application/x-ndjson")
			
 
				 	c.Stream(func(w io.Writer) bool {