Bladeren bron

support raw generation requests (#952)

- add the optional `raw` generate request parameter to bypass prompt formatting and response context
-add raw request to docs
Bruce MacDonald 1 jaar geleden
bovenliggende
commit
ec2a31e9b3
3 gewijzigde bestanden met toevoegingen van 50 en 5 verwijderingen
  1. 1 0
      api/types.go
  2. 31 0
      docs/api.md
  3. 18 5
      server/routes.go

+ 1 - 0
api/types.go

@@ -37,6 +37,7 @@ type GenerateRequest struct {
 	Template string `json:"template"`
 	Template string `json:"template"`
 	Context  []int  `json:"context,omitempty"`
 	Context  []int  `json:"context,omitempty"`
 	Stream   *bool  `json:"stream,omitempty"`
 	Stream   *bool  `json:"stream,omitempty"`
+	Raw      bool   `json:"raw,omitempty"`
 
 
 	Options map[string]interface{} `json:"options"`
 	Options map[string]interface{} `json:"options"`
 }
 }

+ 31 - 0
docs/api.md

@@ -46,6 +46,7 @@ Advanced parameters (optional):
 - `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
 - `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
 - `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
 - `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
 - `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
 - `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
+- `raw`: if `true` no formatting will be applied to the prompt and no context will be returned. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself.
 
 
 ### Examples
 ### Examples
 
 
@@ -136,6 +137,36 @@ If `stream` is set to `false`, the response will be a single JSON object:
 }
 }
 ```
 ```
 
 
+#### Request
+
+In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.
+
+```shell
+curl -X POST http://localhost:11434/api/generate -d '{
+  "model": "mistral",
+  "prompt": "[INST] why is the sky blue? [/INST]",
+  "raw": true,
+  "stream": false
+}'
+```
+
+#### Response
+
+```json
+{
+  "model": "mistral",
+  "created_at": "2023-11-03T15:36:02.583064Z",
+  "response": " The sky appears blue because of a phenomenon called Rayleigh scattering.",
+  "done": true,
+  "total_duration": 14648695333,
+  "load_duration": 3302671417,
+  "prompt_eval_count": 14,
+  "prompt_eval_duration": 286243000,
+  "eval_count": 129,
+  "eval_duration": 10931424000
+}
+```
+
 ## Create a Model
 ## Create a Model
 
 
 ```shell
 ```shell

+ 18 - 5
server/routes.go

@@ -158,9 +158,14 @@ func GenerateHandler(c *gin.Context) {
 		return
 		return
 	}
 	}
 
 
-	if req.Model == "" {
+	// validate the request
+	switch {
+	case req.Model == "":
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
 		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
 		return
 		return
+	case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0):
+		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
+		return
 	}
 	}
 
 
 	model, err := GetModel(req.Model)
 	model, err := GetModel(req.Model)
@@ -189,10 +194,13 @@ func GenerateHandler(c *gin.Context) {
 
 
 	checkpointLoaded := time.Now()
 	checkpointLoaded := time.Now()
 
 
-	prompt, err := model.Prompt(req)
-	if err != nil {
-		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
-		return
+	prompt := req.Prompt
+	if !req.Raw {
+		prompt, err = model.Prompt(req)
+		if err != nil {
+			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+			return
+		}
 	}
 	}
 
 
 	ch := make(chan any)
 	ch := make(chan any)
@@ -215,6 +223,11 @@ func GenerateHandler(c *gin.Context) {
 				r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
 				r.LoadDuration = checkpointLoaded.Sub(checkpointStart)
 			}
 			}
 
 
+			if req.Raw {
+				// in raw mode the client must manage history on their own
+				r.Context = nil
+			}
+
 			ch <- r
 			ch <- r
 		}
 		}