routes.go 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. package server
  2. import (
  3. "embed"
  4. "encoding/json"
  5. "errors"
  6. "io"
  7. "log"
  8. "math"
  9. "net"
  10. "net/http"
  11. "os"
  12. "path"
  13. "runtime"
  14. "strings"
  15. "text/template"
  16. "github.com/gin-gonic/gin"
  17. "github.com/lithammer/fuzzysearch/fuzzy"
  18. "github.com/jmorganca/ollama/api"
  19. "github.com/jmorganca/ollama/llama"
  20. )
  21. //go:embed templates/*
  22. var templatesFS embed.FS
  23. var templates = template.Must(template.ParseFS(templatesFS, "templates/*.prompt"))
  24. func cacheDir() string {
  25. home, err := os.UserHomeDir()
  26. if err != nil {
  27. panic(err)
  28. }
  29. return path.Join(home, ".ollama")
  30. }
  31. func generate(c *gin.Context) {
  32. var req api.GenerateRequest
  33. if req.ModelOptions == nil {
  34. req.ModelOptions = &api.DefaultModelOptions
  35. }
  36. if req.PredictOptions == nil {
  37. req.PredictOptions = &api.DefaultPredictOptions
  38. }
  39. if err := c.ShouldBindJSON(&req); err != nil {
  40. c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  41. return
  42. }
  43. if remoteModel, _ := getRemote(req.Model); remoteModel != nil {
  44. req.Model = remoteModel.FullName()
  45. }
  46. if _, err := os.Stat(req.Model); err != nil {
  47. if !errors.Is(err, os.ErrNotExist) {
  48. c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  49. return
  50. }
  51. req.Model = path.Join(cacheDir(), "models", req.Model+".bin")
  52. }
  53. modelOpts := getModelOpts(req)
  54. modelOpts.NGPULayers = 1 // hard-code this for now
  55. model, err := llama.New(req.Model, modelOpts)
  56. if err != nil {
  57. c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  58. return
  59. }
  60. defer model.Free()
  61. templateNames := make([]string, 0, len(templates.Templates()))
  62. for _, template := range templates.Templates() {
  63. templateNames = append(templateNames, template.Name())
  64. }
  65. match, _ := matchRankOne(path.Base(req.Model), templateNames)
  66. if template := templates.Lookup(match); template != nil {
  67. var sb strings.Builder
  68. if err := template.Execute(&sb, req); err != nil {
  69. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  70. return
  71. }
  72. req.Prompt = sb.String()
  73. }
  74. ch := make(chan string)
  75. model.SetTokenCallback(func(token string) bool {
  76. ch <- token
  77. return true
  78. })
  79. predictOpts := getPredictOpts(req)
  80. go func() {
  81. defer close(ch)
  82. _, err := model.Predict(req.Prompt, predictOpts)
  83. if err != nil {
  84. panic(err)
  85. }
  86. }()
  87. c.Stream(func(w io.Writer) bool {
  88. token, ok := <-ch
  89. if !ok {
  90. return false
  91. }
  92. resp := api.GenerateResponse{
  93. Response: token,
  94. }
  95. bts, err := json.Marshal(resp)
  96. if err != nil {
  97. return false
  98. }
  99. bts = append(bts, '\n')
  100. if _, err := w.Write(bts); err != nil {
  101. return false
  102. }
  103. return true
  104. })
  105. }
  106. func Serve(ln net.Listener) error {
  107. r := gin.Default()
  108. r.GET("/", func(c *gin.Context) {
  109. c.String(http.StatusOK, "Ollama is running")
  110. })
  111. r.POST("api/pull", func(c *gin.Context) {
  112. var req api.PullRequest
  113. if err := c.ShouldBindJSON(&req); err != nil {
  114. c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  115. return
  116. }
  117. progressCh := make(chan api.PullProgress)
  118. go func() {
  119. defer close(progressCh)
  120. if err := pull(req.Model, progressCh); err != nil {
  121. var opError *net.OpError
  122. if errors.As(err, &opError) {
  123. c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
  124. return
  125. }
  126. c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  127. return
  128. }
  129. }()
  130. c.Stream(func(w io.Writer) bool {
  131. progress, ok := <-progressCh
  132. if !ok {
  133. return false
  134. }
  135. bts, err := json.Marshal(progress)
  136. if err != nil {
  137. return false
  138. }
  139. bts = append(bts, '\n')
  140. if _, err := w.Write(bts); err != nil {
  141. return false
  142. }
  143. return true
  144. })
  145. })
  146. r.POST("/api/generate", generate)
  147. log.Printf("Listening on %s", ln.Addr())
  148. s := &http.Server{
  149. Handler: r,
  150. }
  151. return s.Serve(ln)
  152. }
  153. func matchRankOne(source string, targets []string) (bestMatch string, bestRank int) {
  154. bestRank = math.MaxInt
  155. for _, target := range targets {
  156. if rank := fuzzy.LevenshteinDistance(source, target); bestRank > rank {
  157. bestRank = rank
  158. bestMatch = target
  159. }
  160. }
  161. return
  162. }
  163. func getModelOpts(req api.GenerateRequest) llama.ModelOptions {
  164. var opts llama.ModelOptions
  165. opts.ContextSize = req.ModelOptions.ContextSize
  166. opts.Seed = req.ModelOptions.Seed
  167. opts.F16Memory = req.ModelOptions.F16Memory
  168. opts.MLock = req.ModelOptions.MLock
  169. opts.Embeddings = req.ModelOptions.Embeddings
  170. opts.MMap = req.ModelOptions.MMap
  171. opts.LowVRAM = req.ModelOptions.LowVRAM
  172. opts.NBatch = req.ModelOptions.NBatch
  173. opts.VocabOnly = req.ModelOptions.VocabOnly
  174. opts.NUMA = req.ModelOptions.NUMA
  175. opts.NGPULayers = req.ModelOptions.NGPULayers
  176. opts.MainGPU = req.ModelOptions.MainGPU
  177. opts.TensorSplit = req.ModelOptions.TensorSplit
  178. return opts
  179. }
  180. func getPredictOpts(req api.GenerateRequest) llama.PredictOptions {
  181. var opts llama.PredictOptions
  182. if req.PredictOptions.Threads == -1 {
  183. opts.Threads = runtime.NumCPU()
  184. } else {
  185. opts.Threads = req.PredictOptions.Threads
  186. }
  187. opts.Seed = req.PredictOptions.Seed
  188. opts.Tokens = req.PredictOptions.Tokens
  189. opts.Penalty = req.PredictOptions.Penalty
  190. opts.Repeat = req.PredictOptions.Repeat
  191. opts.Batch = req.PredictOptions.Batch
  192. opts.NKeep = req.PredictOptions.NKeep
  193. opts.TopK = req.PredictOptions.TopK
  194. opts.TopP = req.PredictOptions.TopP
  195. opts.TailFreeSamplingZ = req.PredictOptions.TailFreeSamplingZ
  196. opts.TypicalP = req.PredictOptions.TypicalP
  197. opts.Temperature = req.PredictOptions.Temperature
  198. opts.FrequencyPenalty = req.PredictOptions.FrequencyPenalty
  199. opts.PresencePenalty = req.PredictOptions.PresencePenalty
  200. opts.Mirostat = req.PredictOptions.Mirostat
  201. opts.MirostatTAU = req.PredictOptions.MirostatTAU
  202. opts.MirostatETA = req.PredictOptions.MirostatETA
  203. opts.MMap = req.PredictOptions.MMap
  204. return opts
  205. }