routes.go 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382
  1. package server
  2. import (
  3. "context"
  4. "encoding/json"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "io/fs"
  9. "log/slog"
  10. "math"
  11. "net"
  12. "net/http"
  13. "net/netip"
  14. "os"
  15. "os/signal"
  16. "path/filepath"
  17. "strconv"
  18. "strings"
  19. "sync"
  20. "syscall"
  21. "time"
  22. "github.com/gin-contrib/cors"
  23. "github.com/gin-gonic/gin"
  24. "golang.org/x/exp/slices"
  25. "github.com/ollama/ollama/api"
  26. "github.com/ollama/ollama/client/registry"
  27. "github.com/ollama/ollama/gpu"
  28. "github.com/ollama/ollama/llm"
  29. "github.com/ollama/ollama/openai"
  30. "github.com/ollama/ollama/parser"
  31. "github.com/ollama/ollama/types/model"
  32. "github.com/ollama/ollama/version"
  33. )
  34. var experiments = sync.OnceValue(func() []string {
  35. return strings.Split(os.Getenv("OLLAMA_EXPERIMENT"), ",")
  36. })
  37. func useExperiemntal(flag string) bool {
  38. return slices.Contains(experiments(), flag)
  39. }
  40. var mode string = gin.DebugMode
  41. type Server struct {
  42. addr net.Addr
  43. sched *Scheduler
  44. }
  45. func init() {
  46. switch mode {
  47. case gin.DebugMode:
  48. case gin.ReleaseMode:
  49. case gin.TestMode:
  50. default:
  51. mode = gin.DebugMode
  52. }
  53. gin.SetMode(mode)
  54. }
  55. var defaultSessionDuration = 5 * time.Minute
  56. func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options, error) {
  57. opts := api.DefaultOptions()
  58. if err := opts.FromMap(model.Options); err != nil {
  59. return api.Options{}, err
  60. }
  61. if err := opts.FromMap(requestOpts); err != nil {
  62. return api.Options{}, err
  63. }
  64. return opts, nil
  65. }
  66. func isSupportedImageType(image []byte) bool {
  67. contentType := http.DetectContentType(image)
  68. allowedTypes := []string{"image/jpeg", "image/jpg", "image/png"}
  69. return slices.Contains(allowedTypes, contentType)
  70. }
  71. func (s *Server) GenerateHandler(c *gin.Context) {
  72. checkpointStart := time.Now()
  73. var req api.GenerateRequest
  74. err := c.ShouldBindJSON(&req)
  75. switch {
  76. case errors.Is(err, io.EOF):
  77. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  78. return
  79. case err != nil:
  80. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  81. return
  82. }
  83. // validate the request
  84. switch {
  85. case req.Model == "":
  86. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
  87. return
  88. case len(req.Format) > 0 && req.Format != "json":
  89. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
  90. return
  91. case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0):
  92. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
  93. return
  94. }
  95. for _, img := range req.Images {
  96. if !isSupportedImageType(img) {
  97. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "unsupported image format"})
  98. return
  99. }
  100. }
  101. model, err := GetModel(req.Model)
  102. if err != nil {
  103. var pErr *fs.PathError
  104. if errors.As(err, &pErr) {
  105. c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
  106. return
  107. }
  108. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  109. return
  110. }
  111. if model.IsEmbedding() {
  112. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "embedding models do not support generate"})
  113. return
  114. }
  115. opts, err := modelOptions(model, req.Options)
  116. if err != nil {
  117. if errors.Is(err, api.ErrInvalidOpts) {
  118. c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  119. return
  120. }
  121. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  122. return
  123. }
  124. var sessionDuration time.Duration
  125. if req.KeepAlive == nil {
  126. sessionDuration = getDefaultSessionDuration()
  127. } else {
  128. sessionDuration = req.KeepAlive.Duration
  129. }
  130. rCh, eCh := s.sched.GetRunner(c.Request.Context(), model, opts, sessionDuration)
  131. var runner *runnerRef
  132. select {
  133. case runner = <-rCh:
  134. case err = <-eCh:
  135. if errors.Is(err, context.Canceled) {
  136. c.JSON(499, gin.H{"error": "request canceled"})
  137. return
  138. }
  139. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  140. return
  141. }
  142. // an empty request loads the model
  143. // note: for a short while template was used in lieu
  144. // of `raw` mode so we need to check for it too
  145. if req.Prompt == "" && req.Template == "" && req.System == "" {
  146. c.JSON(http.StatusOK, api.GenerateResponse{
  147. CreatedAt: time.Now().UTC(),
  148. Model: req.Model,
  149. Done: true,
  150. })
  151. return
  152. }
  153. checkpointLoaded := time.Now()
  154. var prompt string
  155. switch {
  156. case req.Raw:
  157. prompt = req.Prompt
  158. case req.Prompt != "":
  159. if req.Template == "" {
  160. req.Template = model.Template
  161. }
  162. if req.System == "" {
  163. req.System = model.System
  164. }
  165. slog.Debug("generate handler", "prompt", req.Prompt)
  166. slog.Debug("generate handler", "template", req.Template)
  167. slog.Debug("generate handler", "system", req.System)
  168. var sb strings.Builder
  169. for i := range req.Images {
  170. fmt.Fprintf(&sb, "[img-%d] ", i)
  171. }
  172. sb.WriteString(req.Prompt)
  173. p, err := Prompt(req.Template, req.System, sb.String(), "", true)
  174. if err != nil {
  175. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  176. return
  177. }
  178. sb.Reset()
  179. if req.Context != nil {
  180. prev, err := runner.llama.Detokenize(c.Request.Context(), req.Context)
  181. if err != nil {
  182. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  183. return
  184. }
  185. sb.WriteString(prev)
  186. }
  187. sb.WriteString(p)
  188. prompt = sb.String()
  189. }
  190. slog.Debug("generate handler", "prompt", prompt)
  191. ch := make(chan any)
  192. var generated strings.Builder
  193. go func() {
  194. defer close(ch)
  195. fn := func(r llm.CompletionResponse) {
  196. // Build up the full response
  197. if _, err := generated.WriteString(r.Content); err != nil {
  198. ch <- gin.H{"error": err.Error()}
  199. return
  200. }
  201. resp := api.GenerateResponse{
  202. Model: req.Model,
  203. CreatedAt: time.Now().UTC(),
  204. Done: r.Done,
  205. Response: r.Content,
  206. Metrics: api.Metrics{
  207. PromptEvalCount: r.PromptEvalCount,
  208. PromptEvalDuration: r.PromptEvalDuration,
  209. EvalCount: r.EvalCount,
  210. EvalDuration: r.EvalDuration,
  211. },
  212. }
  213. if r.Done {
  214. resp.TotalDuration = time.Since(checkpointStart)
  215. resp.LoadDuration = checkpointLoaded.Sub(checkpointStart)
  216. if !req.Raw {
  217. p, err := Prompt(req.Template, req.System, req.Prompt, generated.String(), false)
  218. if err != nil {
  219. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  220. return
  221. }
  222. // TODO (jmorganca): encode() should not strip special tokens
  223. tokens, err := runner.llama.Tokenize(c.Request.Context(), p)
  224. if err != nil {
  225. ch <- gin.H{"error": err.Error()}
  226. return
  227. }
  228. resp.Context = append(req.Context, tokens...)
  229. }
  230. }
  231. ch <- resp
  232. }
  233. var images []llm.ImageData
  234. for i := range req.Images {
  235. images = append(images, llm.ImageData{
  236. ID: i,
  237. Data: req.Images[i],
  238. })
  239. }
  240. // Start prediction
  241. req := llm.CompletionRequest{
  242. Prompt: prompt,
  243. Format: req.Format,
  244. Images: images,
  245. Options: opts,
  246. }
  247. if err := runner.llama.Completion(c.Request.Context(), req, fn); err != nil {
  248. ch <- gin.H{"error": err.Error()}
  249. }
  250. }()
  251. if req.Stream != nil && !*req.Stream {
  252. // Accumulate responses into the final response
  253. var final api.GenerateResponse
  254. var sb strings.Builder
  255. for resp := range ch {
  256. switch r := resp.(type) {
  257. case api.GenerateResponse:
  258. sb.WriteString(r.Response)
  259. final = r
  260. case gin.H:
  261. if errorMsg, ok := r["error"].(string); ok {
  262. c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
  263. return
  264. } else {
  265. c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in response"})
  266. return
  267. }
  268. default:
  269. c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error"})
  270. return
  271. }
  272. }
  273. final.Response = sb.String()
  274. c.JSON(http.StatusOK, final)
  275. return
  276. }
  277. streamResponse(c, ch)
  278. }
  279. func getDefaultSessionDuration() time.Duration {
  280. if t, exists := os.LookupEnv("OLLAMA_KEEP_ALIVE"); exists {
  281. v, err := strconv.Atoi(t)
  282. if err != nil {
  283. d, err := time.ParseDuration(t)
  284. if err != nil {
  285. return defaultSessionDuration
  286. }
  287. if d < 0 {
  288. return time.Duration(math.MaxInt64)
  289. }
  290. return d
  291. }
  292. d := time.Duration(v) * time.Second
  293. if d < 0 {
  294. return time.Duration(math.MaxInt64)
  295. }
  296. return d
  297. }
  298. return defaultSessionDuration
  299. }
  300. func (s *Server) EmbeddingsHandler(c *gin.Context) {
  301. var req api.EmbeddingRequest
  302. err := c.ShouldBindJSON(&req)
  303. switch {
  304. case errors.Is(err, io.EOF):
  305. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  306. return
  307. case err != nil:
  308. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  309. return
  310. }
  311. if req.Model == "" {
  312. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
  313. return
  314. }
  315. model, err := GetModel(req.Model)
  316. if err != nil {
  317. var pErr *fs.PathError
  318. if errors.As(err, &pErr) {
  319. c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
  320. return
  321. }
  322. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  323. return
  324. }
  325. opts, err := modelOptions(model, req.Options)
  326. if err != nil {
  327. if errors.Is(err, api.ErrInvalidOpts) {
  328. c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  329. return
  330. }
  331. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  332. return
  333. }
  334. var sessionDuration time.Duration
  335. if req.KeepAlive == nil {
  336. sessionDuration = getDefaultSessionDuration()
  337. } else {
  338. sessionDuration = req.KeepAlive.Duration
  339. }
  340. rCh, eCh := s.sched.GetRunner(c.Request.Context(), model, opts, sessionDuration)
  341. var runner *runnerRef
  342. select {
  343. case runner = <-rCh:
  344. case err = <-eCh:
  345. if errors.Is(err, context.Canceled) {
  346. c.JSON(499, gin.H{"error": "request canceled"})
  347. return
  348. }
  349. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  350. return
  351. }
  352. // an empty request loads the model
  353. if req.Prompt == "" {
  354. c.JSON(http.StatusOK, api.EmbeddingResponse{Embedding: []float64{}})
  355. return
  356. }
  357. embedding, err := runner.llama.Embedding(c.Request.Context(), req.Prompt)
  358. if err != nil {
  359. slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
  360. c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
  361. return
  362. }
  363. resp := api.EmbeddingResponse{
  364. Embedding: embedding,
  365. }
  366. c.JSON(http.StatusOK, resp)
  367. }
  368. func (s *Server) PullModelHandler(c *gin.Context) {
  369. var req api.PullRequest
  370. err := c.ShouldBindJSON(&req)
  371. switch {
  372. case errors.Is(err, io.EOF):
  373. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  374. return
  375. case err != nil:
  376. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  377. return
  378. }
  379. var model string
  380. if req.Model != "" {
  381. model = req.Model
  382. } else if req.Name != "" {
  383. model = req.Name
  384. } else {
  385. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
  386. return
  387. }
  388. if useExperiemntal("pull") {
  389. rc := &registry.Client{
  390. BaseURL: os.Getenv("OLLAMA_REGISTRY_BASE_URL"),
  391. }
  392. modelsDir, err := modelsDir()
  393. if err != nil {
  394. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  395. return
  396. }
  397. cache := &cache{dir: modelsDir}
  398. // TODO(bmizerany): progress updates
  399. if err := rc.Pull(c.Request.Context(), cache, model); err != nil {
  400. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  401. return
  402. }
  403. return
  404. }
  405. ch := make(chan any)
  406. go func() {
  407. defer close(ch)
  408. fn := func(r api.ProgressResponse) {
  409. ch <- r
  410. }
  411. regOpts := &registryOptions{
  412. Insecure: req.Insecure,
  413. }
  414. ctx, cancel := context.WithCancel(c.Request.Context())
  415. defer cancel()
  416. if err := PullModel(ctx, model, regOpts, fn); err != nil {
  417. ch <- gin.H{"error": err.Error()}
  418. }
  419. }()
  420. if req.Stream != nil && !*req.Stream {
  421. waitForStream(c, ch)
  422. return
  423. }
  424. streamResponse(c, ch)
  425. }
  426. func (s *Server) PushModelHandler(c *gin.Context) {
  427. var req api.PushRequest
  428. err := c.ShouldBindJSON(&req)
  429. switch {
  430. case errors.Is(err, io.EOF):
  431. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  432. return
  433. case err != nil:
  434. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  435. return
  436. }
  437. var model string
  438. if req.Model != "" {
  439. model = req.Model
  440. } else if req.Name != "" {
  441. model = req.Name
  442. } else {
  443. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
  444. return
  445. }
  446. ch := make(chan any)
  447. go func() {
  448. defer close(ch)
  449. fn := func(r api.ProgressResponse) {
  450. ch <- r
  451. }
  452. regOpts := &registryOptions{
  453. Insecure: req.Insecure,
  454. }
  455. ctx, cancel := context.WithCancel(c.Request.Context())
  456. defer cancel()
  457. if err := PushModel(ctx, model, regOpts, fn); err != nil {
  458. ch <- gin.H{"error": err.Error()}
  459. }
  460. }()
  461. if req.Stream != nil && !*req.Stream {
  462. waitForStream(c, ch)
  463. return
  464. }
  465. streamResponse(c, ch)
  466. }
  467. func (s *Server) CreateModelHandler(c *gin.Context) {
  468. var req api.CreateRequest
  469. err := c.ShouldBindJSON(&req)
  470. switch {
  471. case errors.Is(err, io.EOF):
  472. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  473. return
  474. case err != nil:
  475. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  476. return
  477. }
  478. var model string
  479. if req.Model != "" {
  480. model = req.Model
  481. } else if req.Name != "" {
  482. model = req.Name
  483. } else {
  484. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
  485. return
  486. }
  487. if err := ParseModelPath(model).Validate(); err != nil {
  488. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  489. return
  490. }
  491. if req.Path == "" && req.Modelfile == "" {
  492. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or modelfile are required"})
  493. return
  494. }
  495. var modelfile io.Reader = strings.NewReader(req.Modelfile)
  496. if req.Path != "" && req.Modelfile == "" {
  497. mf, err := os.Open(req.Path)
  498. if err != nil {
  499. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)})
  500. return
  501. }
  502. defer mf.Close()
  503. modelfile = mf
  504. }
  505. commands, err := parser.Parse(modelfile)
  506. if err != nil {
  507. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  508. return
  509. }
  510. ch := make(chan any)
  511. go func() {
  512. defer close(ch)
  513. fn := func(resp api.ProgressResponse) {
  514. ch <- resp
  515. }
  516. ctx, cancel := context.WithCancel(c.Request.Context())
  517. defer cancel()
  518. if err := CreateModel(ctx, model, filepath.Dir(req.Path), req.Quantization, commands, fn); err != nil {
  519. ch <- gin.H{"error": err.Error()}
  520. }
  521. }()
  522. if req.Stream != nil && !*req.Stream {
  523. waitForStream(c, ch)
  524. return
  525. }
  526. streamResponse(c, ch)
  527. }
  528. func (s *Server) DeleteModelHandler(c *gin.Context) {
  529. var req api.DeleteRequest
  530. err := c.ShouldBindJSON(&req)
  531. switch {
  532. case errors.Is(err, io.EOF):
  533. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  534. return
  535. case err != nil:
  536. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  537. return
  538. }
  539. var model string
  540. if req.Model != "" {
  541. model = req.Model
  542. } else if req.Name != "" {
  543. model = req.Name
  544. } else {
  545. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
  546. return
  547. }
  548. if err := DeleteModel(model); err != nil {
  549. if os.IsNotExist(err) {
  550. c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", model)})
  551. } else {
  552. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  553. }
  554. return
  555. }
  556. manifestsPath, err := GetManifestPath()
  557. if err != nil {
  558. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  559. return
  560. }
  561. if err := PruneDirectory(manifestsPath); err != nil {
  562. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  563. return
  564. }
  565. c.JSON(http.StatusOK, nil)
  566. }
  567. func (s *Server) ShowModelHandler(c *gin.Context) {
  568. var req api.ShowRequest
  569. err := c.ShouldBindJSON(&req)
  570. switch {
  571. case errors.Is(err, io.EOF):
  572. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  573. return
  574. case err != nil:
  575. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  576. return
  577. }
  578. if req.Model != "" {
  579. // noop
  580. } else if req.Name != "" {
  581. req.Model = req.Name
  582. } else {
  583. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
  584. return
  585. }
  586. resp, err := GetModelInfo(req)
  587. if err != nil {
  588. if os.IsNotExist(err) {
  589. c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
  590. } else {
  591. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  592. }
  593. return
  594. }
  595. c.JSON(http.StatusOK, resp)
  596. }
  597. func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
  598. model, err := GetModel(req.Model)
  599. if err != nil {
  600. return nil, err
  601. }
  602. modelDetails := api.ModelDetails{
  603. ParentModel: model.ParentModel,
  604. Format: model.Config.ModelFormat,
  605. Family: model.Config.ModelFamily,
  606. Families: model.Config.ModelFamilies,
  607. ParameterSize: model.Config.ModelType,
  608. QuantizationLevel: model.Config.FileType,
  609. }
  610. if req.System != "" {
  611. model.System = req.System
  612. }
  613. if req.Template != "" {
  614. model.Template = req.Template
  615. }
  616. msgs := make([]api.Message, 0)
  617. for _, msg := range model.Messages {
  618. msgs = append(msgs, api.Message{Role: msg.Role, Content: msg.Content})
  619. }
  620. resp := &api.ShowResponse{
  621. License: strings.Join(model.License, "\n"),
  622. System: model.System,
  623. Template: model.Template,
  624. Details: modelDetails,
  625. Messages: msgs,
  626. }
  627. var params []string
  628. cs := 30
  629. for k, v := range model.Options {
  630. switch val := v.(type) {
  631. case []interface{}:
  632. for _, nv := range val {
  633. params = append(params, fmt.Sprintf("%-*s %#v", cs, k, nv))
  634. }
  635. default:
  636. params = append(params, fmt.Sprintf("%-*s %#v", cs, k, v))
  637. }
  638. }
  639. resp.Parameters = strings.Join(params, "\n")
  640. for k, v := range req.Options {
  641. if _, ok := req.Options[k]; ok {
  642. model.Options[k] = v
  643. }
  644. }
  645. mf, err := ShowModelfile(model)
  646. if err != nil {
  647. return nil, err
  648. }
  649. resp.Modelfile = mf
  650. return resp, nil
  651. }
  652. func (s *Server) ListModelsHandler(c *gin.Context) {
  653. models := make([]api.ModelResponse, 0)
  654. manifestsPath, err := GetManifestPath()
  655. if err != nil {
  656. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  657. return
  658. }
  659. modelResponse := func(modelName string) (api.ModelResponse, error) {
  660. model, err := GetModel(modelName)
  661. if err != nil {
  662. return api.ModelResponse{}, err
  663. }
  664. modelDetails := api.ModelDetails{
  665. Format: model.Config.ModelFormat,
  666. Family: model.Config.ModelFamily,
  667. Families: model.Config.ModelFamilies,
  668. ParameterSize: model.Config.ModelType,
  669. QuantizationLevel: model.Config.FileType,
  670. }
  671. return api.ModelResponse{
  672. Model: model.ShortName,
  673. Name: model.ShortName,
  674. Size: model.Size,
  675. Digest: model.Digest,
  676. Details: modelDetails,
  677. }, nil
  678. }
  679. walkFunc := func(path string, info os.FileInfo, _ error) error {
  680. if !info.IsDir() {
  681. path, tag := filepath.Split(path)
  682. model := strings.Trim(strings.TrimPrefix(path, manifestsPath), string(os.PathSeparator))
  683. modelPath := strings.Join([]string{model, tag}, ":")
  684. canonicalModelPath := strings.ReplaceAll(modelPath, string(os.PathSeparator), "/")
  685. resp, err := modelResponse(canonicalModelPath)
  686. if err != nil {
  687. slog.Info(fmt.Sprintf("skipping file: %s", canonicalModelPath))
  688. // nolint: nilerr
  689. return nil
  690. }
  691. resp.ModifiedAt = info.ModTime()
  692. models = append(models, resp)
  693. }
  694. return nil
  695. }
  696. if err := filepath.Walk(manifestsPath, walkFunc); err != nil {
  697. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  698. return
  699. }
  700. c.JSON(http.StatusOK, api.ListResponse{Models: models})
  701. }
  702. func (s *Server) CopyModelHandler(c *gin.Context) {
  703. var r api.CopyRequest
  704. if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
  705. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  706. return
  707. } else if err != nil {
  708. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  709. return
  710. }
  711. src := model.ParseName(r.Source)
  712. if !src.IsValid() {
  713. _ = c.Error(fmt.Errorf("source %q is invalid", r.Source))
  714. }
  715. dst := model.ParseName(r.Destination)
  716. if !dst.IsValid() {
  717. _ = c.Error(fmt.Errorf("destination %q is invalid", r.Destination))
  718. }
  719. if len(c.Errors) > 0 {
  720. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": c.Errors.Errors()})
  721. return
  722. }
  723. if err := CopyModel(src, dst); errors.Is(err, os.ErrNotExist) {
  724. c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found", r.Source)})
  725. } else if err != nil {
  726. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  727. }
  728. }
  729. func (s *Server) HeadBlobHandler(c *gin.Context) {
  730. path, err := GetBlobsPath(c.Param("digest"))
  731. if err != nil {
  732. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  733. return
  734. }
  735. if _, err := os.Stat(path); err != nil {
  736. c.AbortWithStatusJSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("blob %q not found", c.Param("digest"))})
  737. return
  738. }
  739. c.Status(http.StatusOK)
  740. }
  741. func (s *Server) CreateBlobHandler(c *gin.Context) {
  742. path, err := GetBlobsPath(c.Param("digest"))
  743. if err != nil {
  744. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  745. return
  746. }
  747. _, err = os.Stat(path)
  748. switch {
  749. case errors.Is(err, os.ErrNotExist):
  750. // noop
  751. case err != nil:
  752. c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  753. return
  754. default:
  755. c.Status(http.StatusOK)
  756. return
  757. }
  758. layer, err := NewLayer(c.Request.Body, "")
  759. if err != nil {
  760. c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  761. return
  762. }
  763. if layer.Digest != c.Param("digest") {
  764. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("digest mismatch, expected %q, got %q", c.Param("digest"), layer.Digest)})
  765. return
  766. }
  767. if _, err := layer.Commit(); err != nil {
  768. c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  769. return
  770. }
  771. c.Status(http.StatusCreated)
  772. }
  773. var defaultAllowOrigins = []string{
  774. "localhost",
  775. "127.0.0.1",
  776. "0.0.0.0",
  777. }
  778. func isLocalIP(ip netip.Addr) bool {
  779. if interfaces, err := net.Interfaces(); err == nil {
  780. for _, iface := range interfaces {
  781. addrs, err := iface.Addrs()
  782. if err != nil {
  783. continue
  784. }
  785. for _, a := range addrs {
  786. if parsed, _, err := net.ParseCIDR(a.String()); err == nil {
  787. if parsed.String() == ip.String() {
  788. return true
  789. }
  790. }
  791. }
  792. }
  793. }
  794. return false
  795. }
  796. func allowedHost(host string) bool {
  797. if host == "" || host == "localhost" {
  798. return true
  799. }
  800. if hostname, err := os.Hostname(); err == nil && host == hostname {
  801. return true
  802. }
  803. var tlds = []string{
  804. "localhost",
  805. "local",
  806. "internal",
  807. }
  808. // check if the host is a local TLD
  809. for _, tld := range tlds {
  810. if strings.HasSuffix(host, "."+tld) {
  811. return true
  812. }
  813. }
  814. return false
  815. }
  816. func allowedHostsMiddleware(addr net.Addr) gin.HandlerFunc {
  817. return func(c *gin.Context) {
  818. if addr == nil {
  819. c.Next()
  820. return
  821. }
  822. if addr, err := netip.ParseAddrPort(addr.String()); err == nil && !addr.Addr().IsLoopback() {
  823. c.Next()
  824. return
  825. }
  826. host, _, err := net.SplitHostPort(c.Request.Host)
  827. if err != nil {
  828. host = c.Request.Host
  829. }
  830. if addr, err := netip.ParseAddr(host); err == nil {
  831. if addr.IsLoopback() || addr.IsPrivate() || addr.IsUnspecified() || isLocalIP(addr) {
  832. c.Next()
  833. return
  834. }
  835. }
  836. if allowedHost(host) {
  837. c.Next()
  838. return
  839. }
  840. c.AbortWithStatus(http.StatusForbidden)
  841. }
  842. }
  843. func (s *Server) GenerateRoutes() http.Handler {
  844. config := cors.DefaultConfig()
  845. config.AllowWildcard = true
  846. config.AllowBrowserExtensions = true
  847. if allowedOrigins := strings.Trim(os.Getenv("OLLAMA_ORIGINS"), "\"'"); allowedOrigins != "" {
  848. config.AllowOrigins = strings.Split(allowedOrigins, ",")
  849. }
  850. for _, allowOrigin := range defaultAllowOrigins {
  851. config.AllowOrigins = append(config.AllowOrigins,
  852. fmt.Sprintf("http://%s", allowOrigin),
  853. fmt.Sprintf("https://%s", allowOrigin),
  854. fmt.Sprintf("http://%s:*", allowOrigin),
  855. fmt.Sprintf("https://%s:*", allowOrigin),
  856. )
  857. }
  858. r := gin.Default()
  859. r.Use(
  860. cors.New(config),
  861. allowedHostsMiddleware(s.addr),
  862. )
  863. r.POST("/api/pull", s.PullModelHandler)
  864. r.POST("/api/generate", s.GenerateHandler)
  865. r.POST("/api/chat", s.ChatHandler)
  866. r.POST("/api/embeddings", s.EmbeddingsHandler)
  867. r.POST("/api/create", s.CreateModelHandler)
  868. r.POST("/api/push", s.PushModelHandler)
  869. r.POST("/api/copy", s.CopyModelHandler)
  870. r.DELETE("/api/delete", s.DeleteModelHandler)
  871. r.POST("/api/show", s.ShowModelHandler)
  872. r.POST("/api/blobs/:digest", s.CreateBlobHandler)
  873. r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
  874. // Compatibility endpoints
  875. r.POST("/v1/chat/completions", openai.Middleware(), s.ChatHandler)
  876. for _, method := range []string{http.MethodGet, http.MethodHead} {
  877. r.Handle(method, "/", func(c *gin.Context) {
  878. c.String(http.StatusOK, "Ollama is running")
  879. })
  880. r.Handle(method, "/api/tags", s.ListModelsHandler)
  881. r.Handle(method, "/api/version", func(c *gin.Context) {
  882. c.JSON(http.StatusOK, gin.H{"version": version.Version})
  883. })
  884. }
  885. return r
  886. }
  887. func Serve(ln net.Listener) error {
  888. level := slog.LevelInfo
  889. if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
  890. level = slog.LevelDebug
  891. }
  892. handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
  893. Level: level,
  894. AddSource: true,
  895. ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
  896. if attr.Key == slog.SourceKey {
  897. source := attr.Value.Any().(*slog.Source)
  898. source.File = filepath.Base(source.File)
  899. }
  900. return attr
  901. },
  902. })
  903. slog.SetDefault(slog.New(handler))
  904. blobsDir, err := GetBlobsPath("")
  905. if err != nil {
  906. return err
  907. }
  908. if err := fixBlobs(blobsDir); err != nil {
  909. return err
  910. }
  911. if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
  912. // clean up unused layers and manifests
  913. if err := PruneLayers(); err != nil {
  914. return err
  915. }
  916. manifestsPath, err := GetManifestPath()
  917. if err != nil {
  918. return err
  919. }
  920. if err := PruneDirectory(manifestsPath); err != nil {
  921. return err
  922. }
  923. }
  924. ctx, done := context.WithCancel(context.Background())
  925. sched := InitScheduler(ctx)
  926. s := &Server{addr: ln.Addr(), sched: sched}
  927. r := s.GenerateRoutes()
  928. slog.Info(fmt.Sprintf("Listening on %s (version %s)", ln.Addr(), version.Version))
  929. srvr := &http.Server{
  930. Handler: r,
  931. }
  932. // listen for a ctrl+c and stop any loaded llm
  933. signals := make(chan os.Signal, 1)
  934. signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
  935. go func() {
  936. <-signals
  937. done()
  938. sched.unloadAllRunners()
  939. gpu.Cleanup()
  940. os.Exit(0)
  941. }()
  942. if err := llm.Init(); err != nil {
  943. return fmt.Errorf("unable to initialize llm library %w", err)
  944. }
  945. s.sched.Run(ctx)
  946. // At startup we retrieve GPU information so we can get log messages before loading a model
  947. // This will log warnings to the log in case we have problems with detected GPUs
  948. _ = gpu.GetGPUInfo()
  949. return srvr.Serve(ln)
  950. }
  951. func waitForStream(c *gin.Context, ch chan interface{}) {
  952. c.Header("Content-Type", "application/json")
  953. for resp := range ch {
  954. switch r := resp.(type) {
  955. case api.ProgressResponse:
  956. if r.Status == "success" {
  957. c.JSON(http.StatusOK, r)
  958. return
  959. }
  960. case gin.H:
  961. if errorMsg, ok := r["error"].(string); ok {
  962. c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
  963. return
  964. } else {
  965. c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in progress response"})
  966. return
  967. }
  968. default:
  969. c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected progress response"})
  970. return
  971. }
  972. }
  973. c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected end of progress response"})
  974. }
  975. func streamResponse(c *gin.Context, ch chan any) {
  976. c.Header("Content-Type", "application/x-ndjson")
  977. c.Stream(func(w io.Writer) bool {
  978. val, ok := <-ch
  979. if !ok {
  980. return false
  981. }
  982. bts, err := json.Marshal(val)
  983. if err != nil {
  984. slog.Info(fmt.Sprintf("streamResponse: json.Marshal failed with %s", err))
  985. return false
  986. }
  987. // Delineate chunks with new-line delimiter
  988. bts = append(bts, '\n')
  989. if _, err := w.Write(bts); err != nil {
  990. slog.Info(fmt.Sprintf("streamResponse: w.Write failed with %s", err))
  991. return false
  992. }
  993. return true
  994. })
  995. }
  996. // ChatPrompt builds up a prompt from a series of messages for the currently `loaded` model
  997. func chatPrompt(ctx context.Context, runner *runnerRef, template string, messages []api.Message, numCtx int) (string, error) {
  998. encode := func(s string) ([]int, error) {
  999. return runner.llama.Tokenize(ctx, s)
  1000. }
  1001. prompt, err := ChatPrompt(template, messages, numCtx, encode)
  1002. if err != nil {
  1003. return "", err
  1004. }
  1005. return prompt, nil
  1006. }
  1007. func (s *Server) ChatHandler(c *gin.Context) {
  1008. checkpointStart := time.Now()
  1009. var req api.ChatRequest
  1010. err := c.ShouldBindJSON(&req)
  1011. switch {
  1012. case errors.Is(err, io.EOF):
  1013. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  1014. return
  1015. case err != nil:
  1016. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  1017. return
  1018. }
  1019. // validate the request
  1020. switch {
  1021. case req.Model == "":
  1022. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
  1023. return
  1024. case len(req.Format) > 0 && req.Format != "json":
  1025. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
  1026. return
  1027. }
  1028. model, err := GetModel(req.Model)
  1029. if err != nil {
  1030. var pErr *fs.PathError
  1031. if errors.As(err, &pErr) {
  1032. c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
  1033. return
  1034. }
  1035. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  1036. return
  1037. }
  1038. if model.IsEmbedding() {
  1039. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "embedding models do not support chat"})
  1040. return
  1041. }
  1042. opts, err := modelOptions(model, req.Options)
  1043. if err != nil {
  1044. if errors.Is(err, api.ErrInvalidOpts) {
  1045. c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  1046. return
  1047. }
  1048. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  1049. return
  1050. }
  1051. var sessionDuration time.Duration
  1052. if req.KeepAlive == nil {
  1053. sessionDuration = getDefaultSessionDuration()
  1054. } else {
  1055. sessionDuration = req.KeepAlive.Duration
  1056. }
  1057. rCh, eCh := s.sched.GetRunner(c.Request.Context(), model, opts, sessionDuration)
  1058. var runner *runnerRef
  1059. select {
  1060. case runner = <-rCh:
  1061. case err = <-eCh:
  1062. if errors.Is(err, context.Canceled) {
  1063. c.JSON(499, gin.H{"error": "request canceled"})
  1064. return
  1065. }
  1066. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  1067. return
  1068. }
  1069. checkpointLoaded := time.Now()
  1070. // if the first message is not a system message, then add the model's default system message
  1071. if len(req.Messages) > 0 && req.Messages[0].Role != "system" {
  1072. req.Messages = append([]api.Message{
  1073. {
  1074. Role: "system",
  1075. Content: model.System,
  1076. },
  1077. }, req.Messages...)
  1078. }
  1079. prompt, err := chatPrompt(c.Request.Context(), runner, model.Template, req.Messages, opts.NumCtx)
  1080. if err != nil {
  1081. c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  1082. return
  1083. }
  1084. // an empty request loads the model
  1085. if len(req.Messages) == 0 || prompt == "" {
  1086. resp := api.ChatResponse{
  1087. CreatedAt: time.Now().UTC(),
  1088. Model: req.Model,
  1089. Done: true,
  1090. Message: api.Message{Role: "assistant"},
  1091. }
  1092. c.JSON(http.StatusOK, resp)
  1093. return
  1094. }
  1095. // only send images that are in the prompt
  1096. var i int
  1097. var images []llm.ImageData
  1098. for _, m := range req.Messages {
  1099. for _, img := range m.Images {
  1100. if !isSupportedImageType(img) {
  1101. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "unsupported image format"})
  1102. return
  1103. }
  1104. if strings.Contains(prompt, fmt.Sprintf("[img-%d]", i)) {
  1105. images = append(images, llm.ImageData{Data: img, ID: i})
  1106. }
  1107. i += 1
  1108. }
  1109. }
  1110. slog.Debug("chat handler", "prompt", prompt, "images", len(images))
  1111. ch := make(chan any)
  1112. go func() {
  1113. defer close(ch)
  1114. fn := func(r llm.CompletionResponse) {
  1115. resp := api.ChatResponse{
  1116. Model: req.Model,
  1117. CreatedAt: time.Now().UTC(),
  1118. Message: api.Message{Role: "assistant", Content: r.Content},
  1119. Done: r.Done,
  1120. Metrics: api.Metrics{
  1121. PromptEvalCount: r.PromptEvalCount,
  1122. PromptEvalDuration: r.PromptEvalDuration,
  1123. EvalCount: r.EvalCount,
  1124. EvalDuration: r.EvalDuration,
  1125. },
  1126. }
  1127. if r.Done {
  1128. resp.TotalDuration = time.Since(checkpointStart)
  1129. resp.LoadDuration = checkpointLoaded.Sub(checkpointStart)
  1130. }
  1131. ch <- resp
  1132. }
  1133. if err := runner.llama.Completion(c.Request.Context(), llm.CompletionRequest{
  1134. Prompt: prompt,
  1135. Format: req.Format,
  1136. Images: images,
  1137. Options: opts,
  1138. }, fn); err != nil {
  1139. ch <- gin.H{"error": err.Error()}
  1140. }
  1141. }()
  1142. if req.Stream != nil && !*req.Stream {
  1143. // Accumulate responses into the final response
  1144. var final api.ChatResponse
  1145. var sb strings.Builder
  1146. for resp := range ch {
  1147. switch r := resp.(type) {
  1148. case api.ChatResponse:
  1149. sb.WriteString(r.Message.Content)
  1150. final = r
  1151. case gin.H:
  1152. if errorMsg, ok := r["error"].(string); ok {
  1153. c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
  1154. return
  1155. } else {
  1156. c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in response"})
  1157. return
  1158. }
  1159. default:
  1160. c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error"})
  1161. return
  1162. }
  1163. }
  1164. final.Message = api.Message{Role: "assistant", Content: sb.String()}
  1165. c.JSON(http.StatusOK, final)
  1166. return
  1167. }
  1168. streamResponse(c, ch)
  1169. }