routes.go 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. package server
  2. import (
  3. "fmt"
  4. "io"
  5. "log"
  6. "net"
  7. "net/http"
  8. "runtime"
  9. "github.com/gin-gonic/gin"
  10. llama "github.com/jmorganca/ollama/llama"
  11. "github.com/jmorganca/ollama/api"
  12. )
  13. func Serve(ln net.Listener) error {
  14. r := gin.Default()
  15. var l *llama.LLama
  16. gpulayers := 1
  17. tokens := 512
  18. threads := runtime.NumCPU()
  19. model := "/Users/pdevine/.cache/gpt4all/GPT4All-13B-snoozy.ggmlv3.q4_0.bin"
  20. r.POST("/api/load", func(c *gin.Context) {
  21. var err error
  22. l, err = llama.New(model, llama.EnableF16Memory, llama.SetContext(128), llama.EnableEmbeddings, llama.SetGPULayers(gpulayers))
  23. if err != nil {
  24. fmt.Println("Loading the model failed:", err.Error())
  25. }
  26. })
  27. r.POST("/api/unload", func(c *gin.Context) {
  28. })
  29. r.POST("/api/generate", func(c *gin.Context) {
  30. // TODO: set prompt from template
  31. var req api.GenerateRequest
  32. if err := c.ShouldBindJSON(&req); err != nil {
  33. c.JSON(http.StatusBadRequest, gin.H{"message": err.Error()})
  34. return
  35. }
  36. ch := make(chan string)
  37. go func() {
  38. defer close(ch)
  39. _, err := l.Predict(req.Prompt, llama.Debug, llama.SetTokenCallback(func(token string) bool {
  40. ch <- token
  41. return true
  42. }), llama.SetTokens(tokens), llama.SetThreads(threads), llama.SetTopK(90), llama.SetTopP(0.86), llama.SetStopWords("llama"))
  43. if err != nil {
  44. panic(err)
  45. }
  46. }()
  47. c.Stream(func(w io.Writer) bool {
  48. tok, ok := <-ch
  49. if !ok {
  50. return false
  51. }
  52. c.SSEvent("token", tok)
  53. return true
  54. })
  55. // embeds, err := l.Embeddings(text)
  56. // if err != nil {
  57. // fmt.Printf("Embeddings: error %s \n", err.Error())
  58. // }
  59. })
  60. log.Printf("Listening on %s", ln.Addr())
  61. s := &http.Server{
  62. Handler: r,
  63. }
  64. return s.Serve(ln)
  65. }