routes.go 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421
  1. package server
  2. import (
  3. "cmp"
  4. "context"
  5. "encoding/json"
  6. "errors"
  7. "fmt"
  8. "io"
  9. "io/fs"
  10. "log/slog"
  11. "math"
  12. "net"
  13. "net/http"
  14. "net/netip"
  15. "os"
  16. "os/signal"
  17. "path/filepath"
  18. "strconv"
  19. "strings"
  20. "syscall"
  21. "time"
  22. "github.com/gin-contrib/cors"
  23. "github.com/gin-gonic/gin"
  24. "golang.org/x/exp/slices"
  25. "github.com/ollama/ollama/api"
  26. "github.com/ollama/ollama/gpu"
  27. "github.com/ollama/ollama/llm"
  28. "github.com/ollama/ollama/openai"
  29. "github.com/ollama/ollama/server/envconfig"
  30. "github.com/ollama/ollama/types/model"
  31. "github.com/ollama/ollama/version"
  32. )
  33. var mode string = gin.DebugMode
  34. type Server struct {
  35. addr net.Addr
  36. sched *Scheduler
  37. }
  38. func init() {
  39. switch mode {
  40. case gin.DebugMode:
  41. case gin.ReleaseMode:
  42. case gin.TestMode:
  43. default:
  44. mode = gin.DebugMode
  45. }
  46. gin.SetMode(mode)
  47. }
  48. var defaultSessionDuration = 5 * time.Minute
  49. func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options, error) {
  50. opts := api.DefaultOptions()
  51. if err := opts.FromMap(model.Options); err != nil {
  52. return api.Options{}, err
  53. }
  54. if err := opts.FromMap(requestOpts); err != nil {
  55. return api.Options{}, err
  56. }
  57. return opts, nil
  58. }
  59. func isSupportedImageType(image []byte) bool {
  60. contentType := http.DetectContentType(image)
  61. allowedTypes := []string{"image/jpeg", "image/jpg", "image/png"}
  62. return slices.Contains(allowedTypes, contentType)
  63. }
  64. func (s *Server) GenerateHandler(c *gin.Context) {
  65. checkpointStart := time.Now()
  66. var req api.GenerateRequest
  67. err := c.ShouldBindJSON(&req)
  68. switch {
  69. case errors.Is(err, io.EOF):
  70. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  71. return
  72. case err != nil:
  73. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  74. return
  75. }
  76. // validate the request
  77. switch {
  78. case req.Model == "":
  79. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
  80. return
  81. case len(req.Format) > 0 && req.Format != "json":
  82. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
  83. return
  84. case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0):
  85. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
  86. return
  87. }
  88. for _, img := range req.Images {
  89. if !isSupportedImageType(img) {
  90. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "unsupported image format"})
  91. return
  92. }
  93. }
  94. model, err := GetModel(req.Model)
  95. if err != nil {
  96. var pErr *fs.PathError
  97. if errors.As(err, &pErr) {
  98. c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
  99. return
  100. }
  101. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  102. return
  103. }
  104. if model.IsEmbedding() {
  105. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "embedding models do not support generate"})
  106. return
  107. }
  108. opts, err := modelOptions(model, req.Options)
  109. if err != nil {
  110. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  111. return
  112. }
  113. var sessionDuration time.Duration
  114. if req.KeepAlive == nil {
  115. sessionDuration = getDefaultSessionDuration()
  116. } else {
  117. sessionDuration = req.KeepAlive.Duration
  118. }
  119. rCh, eCh := s.sched.GetRunner(c.Request.Context(), model, opts, sessionDuration)
  120. var runner *runnerRef
  121. select {
  122. case runner = <-rCh:
  123. case err = <-eCh:
  124. handleErrorResponse(c, err)
  125. return
  126. }
  127. // an empty request loads the model
  128. // note: for a short while template was used in lieu
  129. // of `raw` mode so we need to check for it too
  130. if req.Prompt == "" && req.Template == "" && req.System == "" {
  131. c.JSON(http.StatusOK, api.GenerateResponse{
  132. CreatedAt: time.Now().UTC(),
  133. Model: req.Model,
  134. Done: true,
  135. DoneReason: "load",
  136. })
  137. return
  138. }
  139. checkpointLoaded := time.Now()
  140. var prompt string
  141. switch {
  142. case req.Raw:
  143. prompt = req.Prompt
  144. case req.Prompt != "":
  145. if req.Template == "" {
  146. req.Template = model.Template
  147. }
  148. if req.System == "" {
  149. req.System = model.System
  150. }
  151. slog.Debug("generate handler", "prompt", req.Prompt)
  152. slog.Debug("generate handler", "template", req.Template)
  153. slog.Debug("generate handler", "system", req.System)
  154. var sb strings.Builder
  155. for i := range req.Images {
  156. fmt.Fprintf(&sb, "[img-%d] ", i)
  157. }
  158. sb.WriteString(req.Prompt)
  159. p, err := Prompt(req.Template, req.System, sb.String(), "", true)
  160. if err != nil {
  161. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  162. return
  163. }
  164. sb.Reset()
  165. if req.Context != nil {
  166. prev, err := runner.llama.Detokenize(c.Request.Context(), req.Context)
  167. if err != nil {
  168. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  169. return
  170. }
  171. sb.WriteString(prev)
  172. }
  173. sb.WriteString(p)
  174. prompt = sb.String()
  175. }
  176. slog.Debug("generate handler", "prompt", prompt)
  177. ch := make(chan any)
  178. var generated strings.Builder
  179. go func() {
  180. defer close(ch)
  181. fn := func(r llm.CompletionResponse) {
  182. // Build up the full response
  183. if _, err := generated.WriteString(r.Content); err != nil {
  184. ch <- gin.H{"error": err.Error()}
  185. return
  186. }
  187. resp := api.GenerateResponse{
  188. Model: req.Model,
  189. CreatedAt: time.Now().UTC(),
  190. Done: r.Done,
  191. Response: r.Content,
  192. DoneReason: r.DoneReason,
  193. Metrics: api.Metrics{
  194. PromptEvalCount: r.PromptEvalCount,
  195. PromptEvalDuration: r.PromptEvalDuration,
  196. EvalCount: r.EvalCount,
  197. EvalDuration: r.EvalDuration,
  198. },
  199. }
  200. if r.Done {
  201. resp.TotalDuration = time.Since(checkpointStart)
  202. resp.LoadDuration = checkpointLoaded.Sub(checkpointStart)
  203. if !req.Raw {
  204. p, err := Prompt(req.Template, req.System, req.Prompt, generated.String(), false)
  205. if err != nil {
  206. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  207. return
  208. }
  209. // TODO (jmorganca): encode() should not strip special tokens
  210. tokens, err := runner.llama.Tokenize(c.Request.Context(), p)
  211. if err != nil {
  212. ch <- gin.H{"error": err.Error()}
  213. return
  214. }
  215. resp.Context = append(req.Context, tokens...)
  216. }
  217. }
  218. ch <- resp
  219. }
  220. var images []llm.ImageData
  221. for i := range req.Images {
  222. images = append(images, llm.ImageData{
  223. ID: i,
  224. Data: req.Images[i],
  225. })
  226. }
  227. // Start prediction
  228. req := llm.CompletionRequest{
  229. Prompt: prompt,
  230. Format: req.Format,
  231. Images: images,
  232. Options: opts,
  233. }
  234. if err := runner.llama.Completion(c.Request.Context(), req, fn); err != nil {
  235. ch <- gin.H{"error": err.Error()}
  236. }
  237. }()
  238. if req.Stream != nil && !*req.Stream {
  239. // Accumulate responses into the final response
  240. var final api.GenerateResponse
  241. var sb strings.Builder
  242. for resp := range ch {
  243. switch r := resp.(type) {
  244. case api.GenerateResponse:
  245. sb.WriteString(r.Response)
  246. final = r
  247. case gin.H:
  248. if errorMsg, ok := r["error"].(string); ok {
  249. c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
  250. return
  251. } else {
  252. c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in response"})
  253. return
  254. }
  255. default:
  256. c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error"})
  257. return
  258. }
  259. }
  260. final.Response = sb.String()
  261. c.JSON(http.StatusOK, final)
  262. return
  263. }
  264. streamResponse(c, ch)
  265. }
  266. func getDefaultSessionDuration() time.Duration {
  267. if t, exists := os.LookupEnv("OLLAMA_KEEP_ALIVE"); exists {
  268. v, err := strconv.Atoi(t)
  269. if err != nil {
  270. d, err := time.ParseDuration(t)
  271. if err != nil {
  272. return defaultSessionDuration
  273. }
  274. if d < 0 {
  275. return time.Duration(math.MaxInt64)
  276. }
  277. return d
  278. }
  279. d := time.Duration(v) * time.Second
  280. if d < 0 {
  281. return time.Duration(math.MaxInt64)
  282. }
  283. return d
  284. }
  285. return defaultSessionDuration
  286. }
  287. func (s *Server) EmbeddingsHandler(c *gin.Context) {
  288. var req api.EmbeddingRequest
  289. err := c.ShouldBindJSON(&req)
  290. switch {
  291. case errors.Is(err, io.EOF):
  292. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  293. return
  294. case err != nil:
  295. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  296. return
  297. }
  298. if req.Model == "" {
  299. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
  300. return
  301. }
  302. model, err := GetModel(req.Model)
  303. if err != nil {
  304. var pErr *fs.PathError
  305. if errors.As(err, &pErr) {
  306. c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
  307. return
  308. }
  309. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  310. return
  311. }
  312. opts, err := modelOptions(model, req.Options)
  313. if err != nil {
  314. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  315. return
  316. }
  317. var sessionDuration time.Duration
  318. if req.KeepAlive == nil {
  319. sessionDuration = getDefaultSessionDuration()
  320. } else {
  321. sessionDuration = req.KeepAlive.Duration
  322. }
  323. rCh, eCh := s.sched.GetRunner(c.Request.Context(), model, opts, sessionDuration)
  324. var runner *runnerRef
  325. select {
  326. case runner = <-rCh:
  327. case err = <-eCh:
  328. handleErrorResponse(c, err)
  329. return
  330. }
  331. // an empty request loads the model
  332. if req.Prompt == "" {
  333. c.JSON(http.StatusOK, api.EmbeddingResponse{Embedding: []float64{}})
  334. return
  335. }
  336. embedding, err := runner.llama.Embedding(c.Request.Context(), req.Prompt)
  337. if err != nil {
  338. slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
  339. c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
  340. return
  341. }
  342. resp := api.EmbeddingResponse{
  343. Embedding: embedding,
  344. }
  345. c.JSON(http.StatusOK, resp)
  346. }
  347. func (s *Server) TokenizeHandler(c *gin.Context) {
  348. var req api.TokenizeRequest
  349. err := c.ShouldBindJSON(&req)
  350. switch {
  351. case errors.Is(err, io.EOF):
  352. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  353. return
  354. case err != nil:
  355. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  356. return
  357. }
  358. if req.Model == "" {
  359. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
  360. return
  361. }
  362. model, err := GetModel(req.Model)
  363. if err != nil {
  364. var pErr *fs.PathError
  365. if errors.As(err, &pErr) {
  366. c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
  367. return
  368. }
  369. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  370. return
  371. }
  372. opts, err := modelOptions(model, req.Options)
  373. if err != nil {
  374. // TODO: handle specific errors
  375. // if errors.Is(err, api.ErrInvalidOpts) {
  376. // c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  377. // return
  378. // }
  379. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  380. return
  381. }
  382. var sessionDuration time.Duration
  383. if req.KeepAlive == nil {
  384. sessionDuration = getDefaultSessionDuration()
  385. } else {
  386. sessionDuration = req.KeepAlive.Duration
  387. }
  388. rCh, eCh := s.sched.GetRunner(c.Request.Context(), model, opts, sessionDuration)
  389. var runner *runnerRef
  390. select {
  391. case runner = <-rCh:
  392. case err = <-eCh:
  393. if errors.Is(err, context.Canceled) {
  394. c.JSON(499, gin.H{"error": "request canceled"})
  395. return
  396. }
  397. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  398. return
  399. }
  400. // an empty request loads the model
  401. if req.Prompt == "" {
  402. c.JSON(http.StatusOK, api.TokenizeResponse{Tokens: []int{}})
  403. return
  404. }
  405. tokens, err := runner.llama.Tokenize(c.Request.Context(), req.Prompt)
  406. if err != nil {
  407. slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
  408. c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
  409. return
  410. }
  411. resp := api.TokenizeResponse{
  412. Tokens: tokens,
  413. }
  414. c.JSON(http.StatusOK, resp)
  415. }
  416. func (s *Server) PullModelHandler(c *gin.Context) {
  417. var req api.PullRequest
  418. err := c.ShouldBindJSON(&req)
  419. switch {
  420. case errors.Is(err, io.EOF):
  421. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  422. return
  423. case err != nil:
  424. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  425. return
  426. }
  427. var model string
  428. if req.Model != "" {
  429. model = req.Model
  430. } else if req.Name != "" {
  431. model = req.Name
  432. } else {
  433. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
  434. return
  435. }
  436. ch := make(chan any)
  437. go func() {
  438. defer close(ch)
  439. fn := func(r api.ProgressResponse) {
  440. ch <- r
  441. }
  442. regOpts := &registryOptions{
  443. Insecure: req.Insecure,
  444. }
  445. ctx, cancel := context.WithCancel(c.Request.Context())
  446. defer cancel()
  447. if err := PullModel(ctx, model, regOpts, fn); err != nil {
  448. ch <- gin.H{"error": err.Error()}
  449. }
  450. }()
  451. if req.Stream != nil && !*req.Stream {
  452. waitForStream(c, ch)
  453. return
  454. }
  455. streamResponse(c, ch)
  456. }
  457. func (s *Server) PushModelHandler(c *gin.Context) {
  458. var req api.PushRequest
  459. err := c.ShouldBindJSON(&req)
  460. switch {
  461. case errors.Is(err, io.EOF):
  462. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  463. return
  464. case err != nil:
  465. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  466. return
  467. }
  468. var model string
  469. if req.Model != "" {
  470. model = req.Model
  471. } else if req.Name != "" {
  472. model = req.Name
  473. } else {
  474. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
  475. return
  476. }
  477. ch := make(chan any)
  478. go func() {
  479. defer close(ch)
  480. fn := func(r api.ProgressResponse) {
  481. ch <- r
  482. }
  483. regOpts := &registryOptions{
  484. Insecure: req.Insecure,
  485. }
  486. ctx, cancel := context.WithCancel(c.Request.Context())
  487. defer cancel()
  488. if err := PushModel(ctx, model, regOpts, fn); err != nil {
  489. ch <- gin.H{"error": err.Error()}
  490. }
  491. }()
  492. if req.Stream != nil && !*req.Stream {
  493. waitForStream(c, ch)
  494. return
  495. }
  496. streamResponse(c, ch)
  497. }
  498. func (s *Server) CreateModelHandler(c *gin.Context) {
  499. var req api.CreateRequest
  500. if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
  501. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  502. return
  503. } else if err != nil {
  504. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  505. return
  506. }
  507. name := model.ParseName(cmp.Or(req.Model, req.Name))
  508. if !name.IsValid() {
  509. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid model name"})
  510. return
  511. }
  512. if req.Path == "" && req.Modelfile == "" {
  513. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or modelfile are required"})
  514. return
  515. }
  516. var r io.Reader = strings.NewReader(req.Modelfile)
  517. if req.Path != "" && req.Modelfile == "" {
  518. f, err := os.Open(req.Path)
  519. if err != nil {
  520. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)})
  521. return
  522. }
  523. defer f.Close()
  524. r = f
  525. }
  526. modelfile, err := model.ParseFile(r)
  527. if err != nil {
  528. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  529. return
  530. }
  531. ch := make(chan any)
  532. go func() {
  533. defer close(ch)
  534. fn := func(resp api.ProgressResponse) {
  535. ch <- resp
  536. }
  537. ctx, cancel := context.WithCancel(c.Request.Context())
  538. defer cancel()
  539. if err := CreateModel(ctx, name.String(), filepath.Dir(req.Path), strings.ToUpper(req.Quantization), modelfile, fn); err != nil {
  540. ch <- gin.H{"error": err.Error()}
  541. }
  542. }()
  543. if req.Stream != nil && !*req.Stream {
  544. waitForStream(c, ch)
  545. return
  546. }
  547. streamResponse(c, ch)
  548. }
  549. func (s *Server) DeleteModelHandler(c *gin.Context) {
  550. var req api.DeleteRequest
  551. err := c.ShouldBindJSON(&req)
  552. switch {
  553. case errors.Is(err, io.EOF):
  554. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  555. return
  556. case err != nil:
  557. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  558. return
  559. }
  560. var model string
  561. if req.Model != "" {
  562. model = req.Model
  563. } else if req.Name != "" {
  564. model = req.Name
  565. } else {
  566. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
  567. return
  568. }
  569. if err := DeleteModel(model); err != nil {
  570. if os.IsNotExist(err) {
  571. c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", model)})
  572. } else {
  573. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  574. }
  575. return
  576. }
  577. manifestsPath, err := GetManifestPath()
  578. if err != nil {
  579. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  580. return
  581. }
  582. if err := PruneDirectory(manifestsPath); err != nil {
  583. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  584. return
  585. }
  586. c.JSON(http.StatusOK, nil)
  587. }
  588. func (s *Server) ShowModelHandler(c *gin.Context) {
  589. var req api.ShowRequest
  590. err := c.ShouldBindJSON(&req)
  591. switch {
  592. case errors.Is(err, io.EOF):
  593. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  594. return
  595. case err != nil:
  596. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  597. return
  598. }
  599. if req.Model != "" {
  600. // noop
  601. } else if req.Name != "" {
  602. req.Model = req.Name
  603. } else {
  604. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
  605. return
  606. }
  607. resp, err := GetModelInfo(req)
  608. if err != nil {
  609. if os.IsNotExist(err) {
  610. c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
  611. } else {
  612. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  613. }
  614. return
  615. }
  616. c.JSON(http.StatusOK, resp)
  617. }
  618. func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
  619. model, err := GetModel(req.Model)
  620. if err != nil {
  621. return nil, err
  622. }
  623. modelDetails := api.ModelDetails{
  624. ParentModel: model.ParentModel,
  625. Format: model.Config.ModelFormat,
  626. Family: model.Config.ModelFamily,
  627. Families: model.Config.ModelFamilies,
  628. ParameterSize: model.Config.ModelType,
  629. QuantizationLevel: model.Config.FileType,
  630. }
  631. if req.System != "" {
  632. model.System = req.System
  633. }
  634. if req.Template != "" {
  635. model.Template = req.Template
  636. }
  637. msgs := make([]api.Message, 0)
  638. for _, msg := range model.Messages {
  639. msgs = append(msgs, api.Message{Role: msg.Role, Content: msg.Content})
  640. }
  641. resp := &api.ShowResponse{
  642. License: strings.Join(model.License, "\n"),
  643. System: model.System,
  644. Template: model.Template,
  645. Details: modelDetails,
  646. Messages: msgs,
  647. }
  648. var params []string
  649. cs := 30
  650. for k, v := range model.Options {
  651. switch val := v.(type) {
  652. case []interface{}:
  653. for _, nv := range val {
  654. params = append(params, fmt.Sprintf("%-*s %#v", cs, k, nv))
  655. }
  656. default:
  657. params = append(params, fmt.Sprintf("%-*s %#v", cs, k, v))
  658. }
  659. }
  660. resp.Parameters = strings.Join(params, "\n")
  661. for k, v := range req.Options {
  662. if _, ok := req.Options[k]; ok {
  663. model.Options[k] = v
  664. }
  665. }
  666. var sb strings.Builder
  667. fmt.Fprintln(&sb, "# Modelfile generate by \"ollama show\"")
  668. fmt.Fprintln(&sb, "# To build a new Modelfile based on this, replace FROM with:")
  669. fmt.Fprintf(&sb, "# FROM %s\n\n", model.ShortName)
  670. fmt.Fprint(&sb, model.String())
  671. resp.Modelfile = sb.String()
  672. return resp, nil
  673. }
  674. func (s *Server) ListModelsHandler(c *gin.Context) {
  675. manifests, err := GetManifestPath()
  676. if err != nil {
  677. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  678. return
  679. }
  680. var models []api.ModelResponse
  681. if err := filepath.Walk(manifests, func(path string, info os.FileInfo, _ error) error {
  682. if !info.IsDir() {
  683. rel, err := filepath.Rel(manifests, path)
  684. if err != nil {
  685. return err
  686. }
  687. if hidden, err := filepath.Match(".*", filepath.Base(rel)); err != nil {
  688. return err
  689. } else if hidden {
  690. return nil
  691. }
  692. n := model.ParseNameFromFilepath(rel)
  693. if !n.IsValid() {
  694. slog.Warn("bad manifest filepath", "path", rel)
  695. return nil
  696. }
  697. m, err := ParseNamedManifest(n)
  698. if err != nil {
  699. slog.Warn("bad manifest", "name", n, "error", err)
  700. return nil
  701. }
  702. f, err := m.Config.Open()
  703. if err != nil {
  704. slog.Warn("bad manifest config filepath", "name", n, "error", err)
  705. return nil
  706. }
  707. defer f.Close()
  708. var c ConfigV2
  709. if err := json.NewDecoder(f).Decode(&c); err != nil {
  710. slog.Warn("bad manifest config", "name", n, "error", err)
  711. return nil
  712. }
  713. // tag should never be masked
  714. models = append(models, api.ModelResponse{
  715. Model: n.DisplayShortest(),
  716. Name: n.DisplayShortest(),
  717. Size: m.Size(),
  718. Digest: m.Digest,
  719. ModifiedAt: info.ModTime(),
  720. Details: api.ModelDetails{
  721. Format: c.ModelFormat,
  722. Family: c.ModelFamily,
  723. Families: c.ModelFamilies,
  724. ParameterSize: c.ModelType,
  725. QuantizationLevel: c.FileType,
  726. },
  727. })
  728. }
  729. return nil
  730. }); err != nil {
  731. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  732. return
  733. }
  734. slices.SortStableFunc(models, func(i, j api.ModelResponse) int {
  735. // most recently modified first
  736. return cmp.Compare(j.ModifiedAt.Unix(), i.ModifiedAt.Unix())
  737. })
  738. c.JSON(http.StatusOK, api.ListResponse{Models: models})
  739. }
  740. func (s *Server) CopyModelHandler(c *gin.Context) {
  741. var r api.CopyRequest
  742. if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
  743. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  744. return
  745. } else if err != nil {
  746. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  747. return
  748. }
  749. src := model.ParseName(r.Source)
  750. if !src.IsValid() {
  751. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("source %q is invalid", r.Source)})
  752. return
  753. }
  754. dst := model.ParseName(r.Destination)
  755. if !dst.IsValid() {
  756. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("destination %q is invalid", r.Destination)})
  757. return
  758. }
  759. if err := CopyModel(src, dst); errors.Is(err, os.ErrNotExist) {
  760. c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found", r.Source)})
  761. } else if err != nil {
  762. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  763. }
  764. }
  765. func (s *Server) HeadBlobHandler(c *gin.Context) {
  766. path, err := GetBlobsPath(c.Param("digest"))
  767. if err != nil {
  768. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  769. return
  770. }
  771. if _, err := os.Stat(path); err != nil {
  772. c.AbortWithStatusJSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("blob %q not found", c.Param("digest"))})
  773. return
  774. }
  775. c.Status(http.StatusOK)
  776. }
  777. func (s *Server) CreateBlobHandler(c *gin.Context) {
  778. path, err := GetBlobsPath(c.Param("digest"))
  779. if err != nil {
  780. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  781. return
  782. }
  783. _, err = os.Stat(path)
  784. switch {
  785. case errors.Is(err, os.ErrNotExist):
  786. // noop
  787. case err != nil:
  788. c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  789. return
  790. default:
  791. c.Status(http.StatusOK)
  792. return
  793. }
  794. layer, err := NewLayer(c.Request.Body, "")
  795. if err != nil {
  796. c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  797. return
  798. }
  799. if layer.Digest != c.Param("digest") {
  800. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("digest mismatch, expected %q, got %q", c.Param("digest"), layer.Digest)})
  801. return
  802. }
  803. c.Status(http.StatusCreated)
  804. }
  805. func isLocalIP(ip netip.Addr) bool {
  806. if interfaces, err := net.Interfaces(); err == nil {
  807. for _, iface := range interfaces {
  808. addrs, err := iface.Addrs()
  809. if err != nil {
  810. continue
  811. }
  812. for _, a := range addrs {
  813. if parsed, _, err := net.ParseCIDR(a.String()); err == nil {
  814. if parsed.String() == ip.String() {
  815. return true
  816. }
  817. }
  818. }
  819. }
  820. }
  821. return false
  822. }
  823. func allowedHost(host string) bool {
  824. if host == "" || host == "localhost" {
  825. return true
  826. }
  827. if hostname, err := os.Hostname(); err == nil && host == hostname {
  828. return true
  829. }
  830. var tlds = []string{
  831. "localhost",
  832. "local",
  833. "internal",
  834. }
  835. // check if the host is a local TLD
  836. for _, tld := range tlds {
  837. if strings.HasSuffix(host, "."+tld) {
  838. return true
  839. }
  840. }
  841. return false
  842. }
  843. func allowedHostsMiddleware(addr net.Addr) gin.HandlerFunc {
  844. return func(c *gin.Context) {
  845. if addr == nil {
  846. c.Next()
  847. return
  848. }
  849. if addr, err := netip.ParseAddrPort(addr.String()); err == nil && !addr.Addr().IsLoopback() {
  850. c.Next()
  851. return
  852. }
  853. host, _, err := net.SplitHostPort(c.Request.Host)
  854. if err != nil {
  855. host = c.Request.Host
  856. }
  857. if addr, err := netip.ParseAddr(host); err == nil {
  858. if addr.IsLoopback() || addr.IsPrivate() || addr.IsUnspecified() || isLocalIP(addr) {
  859. c.Next()
  860. return
  861. }
  862. }
  863. if allowedHost(host) {
  864. if c.Request.Method == "OPTIONS" {
  865. c.AbortWithStatus(http.StatusNoContent)
  866. return
  867. }
  868. c.Next()
  869. return
  870. }
  871. c.AbortWithStatus(http.StatusForbidden)
  872. }
  873. }
  874. func (s *Server) GenerateRoutes() http.Handler {
  875. config := cors.DefaultConfig()
  876. config.AllowWildcard = true
  877. config.AllowBrowserExtensions = true
  878. config.AllowHeaders = []string{"Authorization", "Content-Type", "User-Agent", "Accept", "X-Requested-With"}
  879. config.AllowOrigins = envconfig.AllowOrigins
  880. r := gin.Default()
  881. r.Use(
  882. cors.New(config),
  883. allowedHostsMiddleware(s.addr),
  884. )
  885. r.POST("/api/pull", s.PullModelHandler)
  886. r.POST("/api/generate", s.GenerateHandler)
  887. r.POST("/api/chat", s.ChatHandler)
  888. r.POST("/api/embeddings", s.EmbeddingsHandler)
  889. r.POST("/api/tokenize", s.TokenizeHandler)
  890. r.POST("/api/create", s.CreateModelHandler)
  891. r.POST("/api/push", s.PushModelHandler)
  892. r.POST("/api/copy", s.CopyModelHandler)
  893. r.DELETE("/api/delete", s.DeleteModelHandler)
  894. r.POST("/api/show", s.ShowModelHandler)
  895. r.POST("/api/blobs/:digest", s.CreateBlobHandler)
  896. r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
  897. // Compatibility endpoints
  898. r.POST("/v1/chat/completions", openai.Middleware(), s.ChatHandler)
  899. for _, method := range []string{http.MethodGet, http.MethodHead} {
  900. r.Handle(method, "/", func(c *gin.Context) {
  901. c.String(http.StatusOK, "Ollama is running")
  902. })
  903. r.Handle(method, "/api/tags", s.ListModelsHandler)
  904. r.Handle(method, "/api/version", func(c *gin.Context) {
  905. c.JSON(http.StatusOK, gin.H{"version": version.Version})
  906. })
  907. }
  908. return r
  909. }
  910. func Serve(ln net.Listener) error {
  911. level := slog.LevelInfo
  912. if envconfig.Debug {
  913. level = slog.LevelDebug
  914. }
  915. slog.Info("server config", "env", envconfig.AsMap())
  916. handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
  917. Level: level,
  918. AddSource: true,
  919. ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
  920. if attr.Key == slog.SourceKey {
  921. source := attr.Value.Any().(*slog.Source)
  922. source.File = filepath.Base(source.File)
  923. }
  924. return attr
  925. },
  926. })
  927. slog.SetDefault(slog.New(handler))
  928. blobsDir, err := GetBlobsPath("")
  929. if err != nil {
  930. return err
  931. }
  932. if err := fixBlobs(blobsDir); err != nil {
  933. return err
  934. }
  935. if !envconfig.NoPrune {
  936. // clean up unused layers and manifests
  937. if err := PruneLayers(); err != nil {
  938. return err
  939. }
  940. manifestsPath, err := GetManifestPath()
  941. if err != nil {
  942. return err
  943. }
  944. if err := PruneDirectory(manifestsPath); err != nil {
  945. return err
  946. }
  947. }
  948. ctx, done := context.WithCancel(context.Background())
  949. schedCtx, schedDone := context.WithCancel(ctx)
  950. sched := InitScheduler(schedCtx)
  951. s := &Server{addr: ln.Addr(), sched: sched}
  952. r := s.GenerateRoutes()
  953. slog.Info(fmt.Sprintf("Listening on %s (version %s)", ln.Addr(), version.Version))
  954. srvr := &http.Server{
  955. Handler: r,
  956. }
  957. // listen for a ctrl+c and stop any loaded llm
  958. signals := make(chan os.Signal, 1)
  959. signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
  960. go func() {
  961. <-signals
  962. srvr.Close()
  963. schedDone()
  964. sched.unloadAllRunners()
  965. gpu.Cleanup()
  966. done()
  967. }()
  968. if err := llm.Init(); err != nil {
  969. return fmt.Errorf("unable to initialize llm library %w", err)
  970. }
  971. s.sched.Run(schedCtx)
  972. // At startup we retrieve GPU information so we can get log messages before loading a model
  973. // This will log warnings to the log in case we have problems with detected GPUs
  974. gpus := gpu.GetGPUInfo()
  975. gpus.LogDetails()
  976. err = srvr.Serve(ln)
  977. // If server is closed from the signal handler, wait for the ctx to be done
  978. // otherwise error out quickly
  979. if !errors.Is(err, http.ErrServerClosed) {
  980. return err
  981. }
  982. <-ctx.Done()
  983. return err
  984. }
  985. func waitForStream(c *gin.Context, ch chan interface{}) {
  986. c.Header("Content-Type", "application/json")
  987. for resp := range ch {
  988. switch r := resp.(type) {
  989. case api.ProgressResponse:
  990. if r.Status == "success" {
  991. c.JSON(http.StatusOK, r)
  992. return
  993. }
  994. case gin.H:
  995. if errorMsg, ok := r["error"].(string); ok {
  996. c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
  997. return
  998. } else {
  999. c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in progress response"})
  1000. return
  1001. }
  1002. default:
  1003. c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected progress response"})
  1004. return
  1005. }
  1006. }
  1007. c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected end of progress response"})
  1008. }
  1009. func streamResponse(c *gin.Context, ch chan any) {
  1010. c.Header("Content-Type", "application/x-ndjson")
  1011. c.Stream(func(w io.Writer) bool {
  1012. val, ok := <-ch
  1013. if !ok {
  1014. return false
  1015. }
  1016. bts, err := json.Marshal(val)
  1017. if err != nil {
  1018. slog.Info(fmt.Sprintf("streamResponse: json.Marshal failed with %s", err))
  1019. return false
  1020. }
  1021. // Delineate chunks with new-line delimiter
  1022. bts = append(bts, '\n')
  1023. if _, err := w.Write(bts); err != nil {
  1024. slog.Info(fmt.Sprintf("streamResponse: w.Write failed with %s", err))
  1025. return false
  1026. }
  1027. return true
  1028. })
  1029. }
  1030. // ChatPrompt builds up a prompt from a series of messages for the currently `loaded` model
  1031. func chatPrompt(ctx context.Context, runner *runnerRef, template string, messages []api.Message, numCtx int) (string, error) {
  1032. encode := func(s string) ([]int, error) {
  1033. return runner.llama.Tokenize(ctx, s)
  1034. }
  1035. prompt, err := ChatPrompt(template, messages, numCtx, encode)
  1036. if err != nil {
  1037. return "", err
  1038. }
  1039. return prompt, nil
  1040. }
  1041. func (s *Server) ChatHandler(c *gin.Context) {
  1042. checkpointStart := time.Now()
  1043. var req api.ChatRequest
  1044. err := c.ShouldBindJSON(&req)
  1045. switch {
  1046. case errors.Is(err, io.EOF):
  1047. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  1048. return
  1049. case err != nil:
  1050. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  1051. return
  1052. }
  1053. // validate the request
  1054. switch {
  1055. case req.Model == "":
  1056. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
  1057. return
  1058. case len(req.Format) > 0 && req.Format != "json":
  1059. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
  1060. return
  1061. }
  1062. model, err := GetModel(req.Model)
  1063. if err != nil {
  1064. var pErr *fs.PathError
  1065. if errors.As(err, &pErr) {
  1066. c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found, try pulling it first", req.Model)})
  1067. return
  1068. }
  1069. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  1070. return
  1071. }
  1072. if model.IsEmbedding() {
  1073. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "embedding models do not support chat"})
  1074. return
  1075. }
  1076. opts, err := modelOptions(model, req.Options)
  1077. if err != nil {
  1078. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  1079. return
  1080. }
  1081. var sessionDuration time.Duration
  1082. if req.KeepAlive == nil {
  1083. sessionDuration = getDefaultSessionDuration()
  1084. } else {
  1085. sessionDuration = req.KeepAlive.Duration
  1086. }
  1087. rCh, eCh := s.sched.GetRunner(c.Request.Context(), model, opts, sessionDuration)
  1088. var runner *runnerRef
  1089. select {
  1090. case runner = <-rCh:
  1091. case err = <-eCh:
  1092. handleErrorResponse(c, err)
  1093. return
  1094. }
  1095. checkpointLoaded := time.Now()
  1096. // if the first message is not a system message, then add the model's default system message
  1097. if len(req.Messages) > 0 && req.Messages[0].Role != "system" {
  1098. req.Messages = append([]api.Message{
  1099. {
  1100. Role: "system",
  1101. Content: model.System,
  1102. },
  1103. }, req.Messages...)
  1104. }
  1105. prompt, err := chatPrompt(c.Request.Context(), runner, model.Template, req.Messages, opts.NumCtx)
  1106. if err != nil {
  1107. c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  1108. return
  1109. }
  1110. // an empty request loads the model
  1111. if len(req.Messages) == 0 || prompt == "" {
  1112. resp := api.ChatResponse{
  1113. CreatedAt: time.Now().UTC(),
  1114. Model: req.Model,
  1115. Done: true,
  1116. DoneReason: "load",
  1117. Message: api.Message{Role: "assistant"},
  1118. }
  1119. c.JSON(http.StatusOK, resp)
  1120. return
  1121. }
  1122. // only send images that are in the prompt
  1123. var i int
  1124. var images []llm.ImageData
  1125. for _, m := range req.Messages {
  1126. for _, img := range m.Images {
  1127. if !isSupportedImageType(img) {
  1128. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "unsupported image format"})
  1129. return
  1130. }
  1131. if strings.Contains(prompt, fmt.Sprintf("[img-%d]", i)) {
  1132. images = append(images, llm.ImageData{Data: img, ID: i})
  1133. }
  1134. i += 1
  1135. }
  1136. }
  1137. slog.Debug("chat handler", "prompt", prompt, "images", len(images))
  1138. ch := make(chan any)
  1139. go func() {
  1140. defer close(ch)
  1141. fn := func(r llm.CompletionResponse) {
  1142. resp := api.ChatResponse{
  1143. Model: req.Model,
  1144. CreatedAt: time.Now().UTC(),
  1145. Message: api.Message{Role: "assistant", Content: r.Content},
  1146. Done: r.Done,
  1147. DoneReason: r.DoneReason,
  1148. Metrics: api.Metrics{
  1149. PromptEvalCount: r.PromptEvalCount,
  1150. PromptEvalDuration: r.PromptEvalDuration,
  1151. EvalCount: r.EvalCount,
  1152. EvalDuration: r.EvalDuration,
  1153. },
  1154. }
  1155. if r.Done {
  1156. resp.TotalDuration = time.Since(checkpointStart)
  1157. resp.LoadDuration = checkpointLoaded.Sub(checkpointStart)
  1158. }
  1159. ch <- resp
  1160. }
  1161. if err := runner.llama.Completion(c.Request.Context(), llm.CompletionRequest{
  1162. Prompt: prompt,
  1163. Format: req.Format,
  1164. Images: images,
  1165. Options: opts,
  1166. }, fn); err != nil {
  1167. ch <- gin.H{"error": err.Error()}
  1168. }
  1169. }()
  1170. if req.Stream != nil && !*req.Stream {
  1171. // Accumulate responses into the final response
  1172. var final api.ChatResponse
  1173. var sb strings.Builder
  1174. for resp := range ch {
  1175. switch r := resp.(type) {
  1176. case api.ChatResponse:
  1177. sb.WriteString(r.Message.Content)
  1178. final = r
  1179. case gin.H:
  1180. if errorMsg, ok := r["error"].(string); ok {
  1181. c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
  1182. return
  1183. } else {
  1184. c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in response"})
  1185. return
  1186. }
  1187. default:
  1188. c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error"})
  1189. return
  1190. }
  1191. }
  1192. final.Message = api.Message{Role: "assistant", Content: sb.String()}
  1193. c.JSON(http.StatusOK, final)
  1194. return
  1195. }
  1196. streamResponse(c, ch)
  1197. }
  1198. func handleErrorResponse(c *gin.Context, err error) {
  1199. if errors.Is(err, context.Canceled) {
  1200. c.JSON(499, gin.H{"error": "request canceled"})
  1201. return
  1202. }
  1203. if errors.Is(err, ErrMaxQueue) {
  1204. c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()})
  1205. return
  1206. }
  1207. c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
  1208. }