Ver Fonte

use prompt templates

Michael Yang há 1 ano atrás
pai
commit
68e6b4550c

+ 8 - 0
api/types.go

@@ -34,3 +34,11 @@ type GenerateRequest struct {
 type GenerateResponse struct {
 type GenerateResponse struct {
 	Response string `json:"response"`
 	Response string `json:"response"`
 }
 }
+
+type TokenResponse struct {
+	Choices []TokenResponseChoice `json:"choices"`
+}
+
+type TokenResponseChoice struct {
+	Text string `json:"text"`
+}

+ 1 - 0
go.mod

@@ -21,6 +21,7 @@ require (
 	github.com/json-iterator/go v1.1.12 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
 	github.com/klauspost/cpuid/v2 v2.2.4 // indirect
 	github.com/klauspost/cpuid/v2 v2.2.4 // indirect
 	github.com/leodido/go-urn v1.2.4 // indirect
 	github.com/leodido/go-urn v1.2.4 // indirect
+	github.com/lithammer/fuzzysearch v1.1.8
 	github.com/mattn/go-isatty v0.0.19 // indirect
 	github.com/mattn/go-isatty v0.0.19 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect

+ 33 - 0
go.sum

@@ -37,6 +37,8 @@ github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZX
 github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
 github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
 github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q=
 github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q=
 github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4=
 github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4=
+github.com/lithammer/fuzzysearch v1.1.8 h1:/HIuJnjHuXS8bKaiTMeeDlW2/AyIWk2brx1V8LFgLN4=
+github.com/lithammer/fuzzysearch v1.1.8/go.mod h1:IdqeyBClc3FFqSzYq/MXESsS4S0FsZ5ajtkr5xPLts4=
 github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
 github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
 github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@@ -68,19 +70,50 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS
 github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
 github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
 github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU=
 github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU=
 github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
 github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
 golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
 golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
 golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k=
 golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k=
 golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
 golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM=
 golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM=
 golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I=
 golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
 golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=
 golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=
 golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
 golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s=
 golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s=
 golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58=
 golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58=
 golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
 google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=

+ 55 - 4
server/routes.go

@@ -1,25 +1,31 @@
 package server
 package server
 
 
 import (
 import (
+	"encoding/json"
 	"fmt"
 	"fmt"
 	"io"
 	"io"
 	"log"
 	"log"
 	"net"
 	"net"
 	"net/http"
 	"net/http"
+	"path"
 	"runtime"
 	"runtime"
+	"strings"
+	"text/template"
 
 
 	"github.com/gin-gonic/gin"
 	"github.com/gin-gonic/gin"
-	llama "github.com/jmorganca/ollama/llama"
+	"github.com/lithammer/fuzzysearch/fuzzy"
 
 
 	"github.com/jmorganca/ollama/api"
 	"github.com/jmorganca/ollama/api"
+	"github.com/jmorganca/ollama/llama"
 )
 )
 
 
+var templates = template.Must(template.ParseGlob("templates/*.prompt"))
+
 func generate(c *gin.Context) {
 func generate(c *gin.Context) {
 	// TODO: these should be request parameters
 	// TODO: these should be request parameters
 	gpulayers := 1
 	gpulayers := 1
 	tokens := 512
 	tokens := 512
 	threads := runtime.NumCPU()
 	threads := runtime.NumCPU()
-	// TODO: set prompt from template
 
 
 	var req api.GenerateRequest
 	var req api.GenerateRequest
 	if err := c.ShouldBindJSON(&req); err != nil {
 	if err := c.ShouldBindJSON(&req); err != nil {
@@ -33,6 +39,22 @@ func generate(c *gin.Context) {
 		return
 		return
 	}
 	}
 
 
+	templateNames := make([]string, 0, len(templates.Templates()))
+	for _, template := range templates.Templates() {
+		templateNames = append(templateNames, template.Name())
+	}
+
+	match, _ := matchRankOne(path.Base(req.Prompt), templateNames)
+	if template := templates.Lookup(match); template != nil {
+		var sb strings.Builder
+		if err := template.Execute(&sb, req); err != nil {
+			fmt.Println("Prompt template failed:", err.Error())
+			return
+		}
+
+		req.Prompt = sb.String()
+	}
+
 	ch := make(chan string)
 	ch := make(chan string)
 
 
 	go func() {
 	go func() {
@@ -47,11 +69,29 @@ func generate(c *gin.Context) {
 	}()
 	}()
 
 
 	c.Stream(func(w io.Writer) bool {
 	c.Stream(func(w io.Writer) bool {
-		tok, ok := <-ch
+		token, ok := <-ch
 		if !ok {
 		if !ok {
 			return false
 			return false
 		}
 		}
-		c.SSEvent("token", tok)
+
+		resp := api.TokenResponse{
+			Choices: []api.TokenResponseChoice{
+				{
+					Text: token,
+				},
+			},
+		}
+
+		bts, err := json.Marshal(resp)
+		if err != nil {
+			return false
+		}
+
+		bts = append(bts, '\n')
+		if _, err := w.Write(bts); err != nil {
+			return false
+		}
+
 		return true
 		return true
 	})
 	})
 }
 }
@@ -94,3 +134,14 @@ func Serve(ln net.Listener) error {
 
 
 	return s.Serve(ln)
 	return s.Serve(ln)
 }
 }
+
+func matchRankOne(source string, targets []string) (bestMatch string, bestRank int) {
+	for _, target := range targets {
+		if rank := fuzzy.LevenshteinDistance(source, target); bestRank < rank {
+			bestRank = rank
+			bestMatch = target
+		}
+	}
+
+	return
+}

+ 1 - 1
templates/alpaca.prompt

@@ -1,7 +1,7 @@
 Below is an instruction that describes a task. Write a response that appropriately completes the request.
 Below is an instruction that describes a task. Write a response that appropriately completes the request.
 
 
 ### Instruction:
 ### Instruction:
-{{ prompt }}
+{{ .Prompt }}
 
 
 ### Response:
 ### Response:
 
 

+ 1 - 1
templates/falcon.prompt

@@ -1,3 +1,3 @@
 A helpful assistant who helps the user with any questions asked.
 A helpful assistant who helps the user with any questions asked.
-User: {{ prompt }}
+User: {{ .Prompt }}
 Assistant:
 Assistant:

+ 1 - 1
templates/gpt4.prompt

@@ -1,5 +1,5 @@
 ### Instruction:
 ### Instruction:
-{{ prompt }}
+{{ .Prompt }}
 
 
 ### Response:
 ### Response:
 
 

+ 1 - 1
templates/hermes.prompt

@@ -1,5 +1,5 @@
 ### Instruction:
 ### Instruction:
-{{ prompt }}
+{{ .Prompt }}
 
 
 ### Response:
 ### Response:
 
 

+ 1 - 1
templates/mpt.prompt

@@ -1,4 +1,4 @@
 Below is an instruction that describes a task. Write a response that appropriately completes the request. Be concise. Once the request is completed, include no other text.
 Below is an instruction that describes a task. Write a response that appropriately completes the request. Be concise. Once the request is completed, include no other text.
 ### Instruction:
 ### Instruction:
-{{ prompt }}
+{{ .Prompt }}
 ### Response:
 ### Response:

+ 1 - 1
templates/oasst.prompt

@@ -1 +1 @@
-{{ prompt }}
+{{ .Prompt }}

+ 1 - 1
templates/orca.prompt

@@ -2,6 +2,6 @@
 You are an AI assistant that follows instruction extremely well. Help as much as you can.
 You are an AI assistant that follows instruction extremely well. Help as much as you can.
 
 
 ### User:
 ### User:
-{{ prompt }}
+{{ .Prompt }}
 
 
 ### Response:
 ### Response:

+ 1 - 1
templates/qlora.prompt

@@ -1,2 +1,2 @@
-### Human: {{ prompt }}
+### Human: {{ .Prompt }}
 ### Assistant:
 ### Assistant:

+ 1 - 1
templates/tulu.prompt

@@ -1,4 +1,4 @@
 
 
-{{ prompt }}
+{{ .Prompt }}
 
 
 
 

+ 1 - 1
templates/ultralm.prompt

@@ -1,2 +1,2 @@
-USER: {{ prompt }}
+USER: {{ .Prompt }}
 ASSISTANT:
 ASSISTANT:

+ 1 - 1
templates/vicuna.prompt

@@ -1,4 +1,4 @@
 A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
 A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
 
 
-USER: {{ prompt }}
+USER: {{ .Prompt }}
 ASSISTANT:
 ASSISTANT:

+ 1 - 1
templates/wizardcoder.prompt

@@ -1,5 +1,5 @@
 Below is an instruction that describes a task. Write a response that appropriately completes the request
 Below is an instruction that describes a task. Write a response that appropriately completes the request
 
 
-### Instruction: {{ prompt }}
+### Instruction: {{ .Prompt }}
 
 
 ### Response:
 ### Response:

+ 1 - 1
templates/wizardlm.prompt

@@ -1,2 +1,2 @@
-{{ prompt }}
+{{ .Prompt }}
 ### Response:
 ### Response: