1 rok temu · 5cba29b9d6
--- a/api/types.go
+++ b/api/types.go
@@ -38,6 +38,7 @@ type GenerateRequest struct {
 
															 	Context  []int  `json:"context,omitempty"`
														
 
															 	Stream   *bool  `json:"stream,omitempty"`
														
 
															 	Raw      bool   `json:"raw,omitempty"`
														
 
															+	Format   string `json:"format"`
														
 
															 	Options map[string]interface{} `json:"options"`
														
 
															 }
														
--- a/docs/api.md
+++ b/docs/api.md
@@ -38,6 +38,7 @@ Generate a response for a given prompt with a provided model. This is a streamin
 
															 - `model`: (required) the [model name](#model-names)
														
 
															 - `prompt`: the prompt to generate a response for
														
 
															+- `format`: the format to return a response in. Currently the only accepted value is `json`
														
 
															 Advanced parameters (optional):
														
@@ -48,13 +49,17 @@ Advanced parameters (optional):
 
															 - `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
														
 
															 - `raw`: if `true` no formatting will be applied to the prompt and no context will be returned. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself.
														
 
															+### JSON mode
														
 
															+
														
 
															+Enable JSON mode by setting the `format` parameter to `json` and specifying the model should use JSON in the `prompt`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below.
														
 
															+
														
 
															 ### Examples
														
 
															 #### Request
														
 
															 ```shell
														
 
															 curl -X POST http://localhost:11434/api/generate -d '{
														
 
															-  "model": "llama2:7b",
														
 
															+  "model": "llama2",
														
 
															   "prompt": "Why is the sky blue?"
														
 
															 }'
														
 
															 ```
														
@@ -65,7 +70,7 @@ A stream of JSON objects is returned:
 
															 ```json
														
 
															 {
														
 
															-  "model": "llama2:7b",
														
 
															+  "model": "llama2",
														
 
															   "created_at": "2023-08-04T08:52:19.385406455-07:00",
														
 
															   "response": "The",
														
 
															   "done": false
														
@@ -89,7 +94,7 @@ To calculate how fast the response is generated in tokens per second (token/s),
 
															 ```json
														
 
															 {
														
 
															-  "model": "llama2:7b",
														
 
															+  "model": "llama2",
														
 
															   "created_at": "2023-08-04T19:22:45.499127Z",
														
 
															   "response": "",
														
 
															   "context": [1, 2, 3],
														
@@ -105,7 +110,7 @@ To calculate how fast the response is generated in tokens per second (token/s),
 
															 }
														
 
															 ```
														
 
															-#### Request
														
 
															+#### Request (No streaming)
														
 
															 ```shell
														
 
															 curl -X POST http://localhost:11434/api/generate -d '{
														
@@ -137,7 +142,7 @@ If `stream` is set to `false`, the response will be a single JSON object:
 
															 }
														
 
															 ```
														
 
															-#### Request
														
 
															+#### Request (Raw mode)
														
 
															 In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.
														
@@ -167,7 +172,54 @@ curl -X POST http://localhost:11434/api/generate -d '{
 
															 }
														
 
															 ```
														
 
															-#### Request
														
 
															+#### Request (JSON mode)
														
 
															+
														
 
															+```shell
														
 
															+curl -X POST http://localhost:11434/api/generate -d '{
														
 
															+  "model": "llama2",
														
 
															+  "prompt": "What color is the sky at different times of the day? Respond using JSON",
														
 
															+  "format": "json",
														
 
															+  "stream": false
														
 
															+}'
														
 
															+```
														
 
															+
														
 
															+#### Response
														
 
															+
														
 
															+```json
														
 
															+{
														
 
															+  "model": "llama2",
														
 
															+  "created_at": "2023-11-09T21:07:55.186497Z",
														
 
															+  "response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n",
														
 
															+  "done": true,
														
 
															+  "total_duration": 4661289125,
														
 
															+  "load_duration": 1714434500,
														
 
															+  "prompt_eval_count": 36,
														
 
															+  "prompt_eval_duration": 264132000,
														
 
															+  "eval_count": 75,
														
 
															+  "eval_duration": 2112149000
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+The value of `response` will be a string containing JSON similar to:
														
 
															+
														
 
															+```json
														
 
															+{
														
 
															+  "morning": {
														
 
															+    "color": "blue"
														
 
															+  },
														
 
															+  "noon": {
														
 
															+    "color": "blue-gray"
														
 
															+  },
														
 
															+  "afternoon": {
														
 
															+    "color": "warm gray"
														
 
															+  },
														
 
															+  "evening": {
														
 
															+    "color": "orange"
														
 
															+  }
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+#### Request (With options)
														
 
															 If you want to set custom options for the model at runtime rather than in the Modelfile, you can do so with the `options` parameter. This example sets every available option, but you can set any of them individually and omit the ones you do not want to override.
														
--- a/llm/llama.go
+++ b/llm/llama.go
@@ -27,6 +27,34 @@ import (
 
															 	"github.com/jmorganca/ollama/format"
														
 
															 )
														
 
															+const jsonGrammar = `
														
 
															+root   ::= object
														
 
															+value  ::= object | array | string | number | ("true" | "false" | "null") ws
														
 
															+
														
 
															+object ::=
														
 
															+  "{" ws (
														
 
															+            string ":" ws value
														
 
															+    ("," ws string ":" ws value)*
														
 
															+  )? "}" ws
														
 
															+
														
 
															+array  ::=
														
 
															+  "[" ws (
														
 
															+            value
														
 
															+    ("," ws value)*
														
 
															+  )? "]" ws
														
 
															+
														
 
															+string ::=
														
 
															+  "\"" (
														
 
															+    [^"\\] |
														
 
															+    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
														
 
															+  )* "\"" ws
														
 
															+
														
 
															+number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
														
 
															+
														
 
															+# Optional space: by convention, applied in this grammar after literal chars when allowed
														
 
															+ws ::= ([ \t\n] ws)?
														
 
															+`
														
 
															+
														
 
															 //go:embed llama.cpp/*/build/*/bin/*
														
 
															 var llamaCppEmbed embed.FS
														
@@ -497,7 +525,7 @@ type prediction struct {
 
															 const maxBufferSize = 512 * format.KiloByte
														
 
															-func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, fn func(api.GenerateResponse)) error {
														
 
															+func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, format string, fn func(api.GenerateResponse)) error {
														
 
															 	prevConvo, err := llm.Decode(ctx, prevContext)
														
 
															 	if err != nil {
														
 
															 		return err
														
@@ -532,6 +560,10 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
 
															 		"stop":              llm.Stop,
														
 
															 	}
														
 
															+	if format == "json" {
														
 
															+		request["grammar"] = jsonGrammar
														
 
															+	}
														
 
															+
														
 
															 	// Handling JSON marshaling with special characters unescaped.
														
 
															 	buffer := &bytes.Buffer{}
														
 
															 	enc := json.NewEncoder(buffer)
														
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -14,7 +14,7 @@ import (
 
															 )
														
 
															 type LLM interface {
														
 
															-	Predict(context.Context, []int, string, func(api.GenerateResponse)) error
														
 
															+	Predict(context.Context, []int, string, string, func(api.GenerateResponse)) error
														
 
															 	Embedding(context.Context, string) ([]float64, error)
														
 
															 	Encode(context.Context, string) ([]int, error)
														
 
															 	Decode(context.Context, []int) (string, error)
														
--- a/server/routes.go
+++ b/server/routes.go
@@ -163,6 +163,9 @@ func GenerateHandler(c *gin.Context) {
 
															 	case req.Model == "":
														
 
															 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
														
 
															 		return
														
 
															+	case len(req.Format) > 0 && req.Format != "json":
														
 
															+		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
														
 
															+		return
														
 
															 	case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0):
														
 
															 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
														
 
															 		return
														
@@ -231,7 +234,7 @@ func GenerateHandler(c *gin.Context) {
 
															 			ch <- r
														
 
															 		}
														
 
															-		if err := loaded.runner.Predict(c.Request.Context(), req.Context, prompt, fn); err != nil {
														
 
															+		if err := loaded.runner.Predict(c.Request.Context(), req.Context, prompt, req.Format, fn); err != nil {
														
 
															 			ch <- gin.H{"error": err.Error()}
														
 
															 		}
														
 
															 	}()