瀏覽代碼

use prompt templates

Michael Yang 1 年之前
父節點
當前提交
68e6b4550c

+ 8 - 0
api/types.go

@@ -34,3 +34,11 @@ type GenerateRequest struct {
 type GenerateResponse struct {
 	Response string `json:"response"`
 }
+
+type TokenResponse struct {
+	Choices []TokenResponseChoice `json:"choices"`
+}
+
+type TokenResponseChoice struct {
+	Text string `json:"text"`
+}

+ 1 - 0
go.mod

@@ -21,6 +21,7 @@ require (
 	github.com/json-iterator/go v1.1.12 // indirect
 	github.com/klauspost/cpuid/v2 v2.2.4 // indirect
 	github.com/leodido/go-urn v1.2.4 // indirect
+	github.com/lithammer/fuzzysearch v1.1.8
 	github.com/mattn/go-isatty v0.0.19 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect

+ 33 - 0
go.sum

@@ -37,6 +37,8 @@ github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZX
 github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
 github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q=
 github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4=
+github.com/lithammer/fuzzysearch v1.1.8 h1:/HIuJnjHuXS8bKaiTMeeDlW2/AyIWk2brx1V8LFgLN4=
+github.com/lithammer/fuzzysearch v1.1.8/go.mod h1:IdqeyBClc3FFqSzYq/MXESsS4S0FsZ5ajtkr5xPLts4=
 github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
 github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@@ -68,19 +70,50 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS
 github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
 github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU=
 github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
 golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
 golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k=
 golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM=
 golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
 golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=
 golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s=
 golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58=
 golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=

+ 55 - 4
server/routes.go

@@ -1,25 +1,31 @@
 package server
 
 import (
+	"encoding/json"
 	"fmt"
 	"io"
 	"log"
 	"net"
 	"net/http"
+	"path"
 	"runtime"
+	"strings"
+	"text/template"
 
 	"github.com/gin-gonic/gin"
-	llama "github.com/jmorganca/ollama/llama"
+	"github.com/lithammer/fuzzysearch/fuzzy"
 
 	"github.com/jmorganca/ollama/api"
+	"github.com/jmorganca/ollama/llama"
 )
 
+var templates = template.Must(template.ParseGlob("templates/*.prompt"))
+
 func generate(c *gin.Context) {
 	// TODO: these should be request parameters
 	gpulayers := 1
 	tokens := 512
 	threads := runtime.NumCPU()
-	// TODO: set prompt from template
 
 	var req api.GenerateRequest
 	if err := c.ShouldBindJSON(&req); err != nil {
@@ -33,6 +39,22 @@ func generate(c *gin.Context) {
 		return
 	}
 
+	templateNames := make([]string, 0, len(templates.Templates()))
+	for _, template := range templates.Templates() {
+		templateNames = append(templateNames, template.Name())
+	}
+
+	match, _ := matchRankOne(path.Base(req.Prompt), templateNames)
+	if template := templates.Lookup(match); template != nil {
+		var sb strings.Builder
+		if err := template.Execute(&sb, req); err != nil {
+			fmt.Println("Prompt template failed:", err.Error())
+			return
+		}
+
+		req.Prompt = sb.String()
+	}
+
 	ch := make(chan string)
 
 	go func() {
@@ -47,11 +69,29 @@ func generate(c *gin.Context) {
 	}()
 
 	c.Stream(func(w io.Writer) bool {
-		tok, ok := <-ch
+		token, ok := <-ch
 		if !ok {
 			return false
 		}
-		c.SSEvent("token", tok)
+
+		resp := api.TokenResponse{
+			Choices: []api.TokenResponseChoice{
+				{
+					Text: token,
+				},
+			},
+		}
+
+		bts, err := json.Marshal(resp)
+		if err != nil {
+			return false
+		}
+
+		bts = append(bts, '\n')
+		if _, err := w.Write(bts); err != nil {
+			return false
+		}
+
 		return true
 	})
 }
@@ -94,3 +134,14 @@ func Serve(ln net.Listener) error {
 
 	return s.Serve(ln)
 }
+
+func matchRankOne(source string, targets []string) (bestMatch string, bestRank int) {
+	for _, target := range targets {
+		if rank := fuzzy.LevenshteinDistance(source, target); bestRank < rank {
+			bestRank = rank
+			bestMatch = target
+		}
+	}
+
+	return
+}

+ 1 - 1
templates/alpaca.prompt

@@ -1,7 +1,7 @@
 Below is an instruction that describes a task. Write a response that appropriately completes the request.
 
 ### Instruction:
-{{ prompt }}
+{{ .Prompt }}
 
 ### Response:
 

+ 1 - 1
templates/falcon.prompt

@@ -1,3 +1,3 @@
 A helpful assistant who helps the user with any questions asked.
-User: {{ prompt }}
+User: {{ .Prompt }}
 Assistant:

+ 1 - 1
templates/gpt4.prompt

@@ -1,5 +1,5 @@
 ### Instruction:
-{{ prompt }}
+{{ .Prompt }}
 
 ### Response:
 

+ 1 - 1
templates/hermes.prompt

@@ -1,5 +1,5 @@
 ### Instruction:
-{{ prompt }}
+{{ .Prompt }}
 
 ### Response:
 

+ 1 - 1
templates/mpt.prompt

@@ -1,4 +1,4 @@
 Below is an instruction that describes a task. Write a response that appropriately completes the request. Be concise. Once the request is completed, include no other text.
 ### Instruction:
-{{ prompt }}
+{{ .Prompt }}
 ### Response:

+ 1 - 1
templates/oasst.prompt

@@ -1 +1 @@
-{{ prompt }}
+{{ .Prompt }}

+ 1 - 1
templates/orca.prompt

@@ -2,6 +2,6 @@
 You are an AI assistant that follows instruction extremely well. Help as much as you can.
 
 ### User:
-{{ prompt }}
+{{ .Prompt }}
 
 ### Response:

+ 1 - 1
templates/qlora.prompt

@@ -1,2 +1,2 @@
-### Human: {{ prompt }}
+### Human: {{ .Prompt }}
 ### Assistant:

+ 1 - 1
templates/tulu.prompt

@@ -1,4 +1,4 @@
 
-{{ prompt }}
+{{ .Prompt }}
 
 

+ 1 - 1
templates/ultralm.prompt

@@ -1,2 +1,2 @@
-USER: {{ prompt }}
+USER: {{ .Prompt }}
 ASSISTANT:

+ 1 - 1
templates/vicuna.prompt

@@ -1,4 +1,4 @@
 A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
 
-USER: {{ prompt }}
+USER: {{ .Prompt }}
 ASSISTANT:

+ 1 - 1
templates/wizardcoder.prompt

@@ -1,5 +1,5 @@
 Below is an instruction that describes a task. Write a response that appropriately completes the request
 
-### Instruction: {{ prompt }}
+### Instruction: {{ .Prompt }}
 
 ### Response:

+ 1 - 1
templates/wizardlm.prompt

@@ -1,2 +1,2 @@
-{{ prompt }}
+{{ .Prompt }}
 ### Response: