create.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693
  1. package server
  2. import (
  3. "bytes"
  4. "cmp"
  5. "context"
  6. "encoding/json"
  7. "errors"
  8. "fmt"
  9. "io"
  10. "log/slog"
  11. "net/http"
  12. "os"
  13. "path/filepath"
  14. "slices"
  15. "strings"
  16. "github.com/gin-gonic/gin"
  17. "github.com/ollama/ollama/api"
  18. "github.com/ollama/ollama/convert"
  19. "github.com/ollama/ollama/envconfig"
  20. "github.com/ollama/ollama/format"
  21. "github.com/ollama/ollama/llama"
  22. "github.com/ollama/ollama/llm"
  23. "github.com/ollama/ollama/template"
  24. "github.com/ollama/ollama/types/errtypes"
  25. "github.com/ollama/ollama/types/model"
  26. )
  27. var (
  28. errNoFilesProvided = errors.New("no files provided to convert")
  29. errOnlyOneAdapterSupported = errors.New("only one adapter is currently supported")
  30. errOnlyGGUFSupported = errors.New("supplied file was not in GGUF format")
  31. errUnknownType = errors.New("unknown type")
  32. errNeitherFromOrFiles = errors.New("neither 'from' or 'files' was specified")
  33. )
  34. func (s *Server) CreateHandler(c *gin.Context) {
  35. var r api.CreateRequest
  36. if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
  37. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
  38. return
  39. } else if err != nil {
  40. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  41. return
  42. }
  43. name := model.ParseName(cmp.Or(r.Model, r.Name))
  44. if !name.IsValid() {
  45. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errtypes.InvalidModelNameErrMsg})
  46. return
  47. }
  48. name, err := getExistingName(name)
  49. if err != nil {
  50. c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
  51. return
  52. }
  53. ch := make(chan any)
  54. go func() {
  55. defer close(ch)
  56. fn := func(resp api.ProgressResponse) {
  57. ch <- resp
  58. }
  59. oldManifest, _ := ParseNamedManifest(name)
  60. var baseLayers []*layerGGML
  61. if r.From != "" {
  62. slog.Debug("create model from model name")
  63. fromName := model.ParseName(r.From)
  64. if !fromName.IsValid() {
  65. ch <- gin.H{"error": errtypes.InvalidModelNameErrMsg, "status": http.StatusBadRequest}
  66. return
  67. }
  68. ctx, cancel := context.WithCancel(c.Request.Context())
  69. defer cancel()
  70. baseLayers, err = parseFromModel(ctx, fromName, fn)
  71. if err != nil {
  72. ch <- gin.H{"error": err.Error()}
  73. }
  74. } else if r.Files != nil {
  75. baseLayers, err = convertModelFromFiles(r.Files, baseLayers, false, fn)
  76. if err != nil {
  77. for _, badReq := range []error{errNoFilesProvided, errOnlyGGUFSupported, errUnknownType} {
  78. if errors.Is(err, badReq) {
  79. ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
  80. return
  81. }
  82. }
  83. ch <- gin.H{"error": err.Error()}
  84. return
  85. }
  86. } else {
  87. ch <- gin.H{"error": errNeitherFromOrFiles.Error(), "status": http.StatusBadRequest}
  88. return
  89. }
  90. var adapterLayers []*layerGGML
  91. if r.Adapters != nil {
  92. adapterLayers, err = convertModelFromFiles(r.Adapters, baseLayers, true, fn)
  93. if err != nil {
  94. for _, badReq := range []error{errNoFilesProvided, errOnlyOneAdapterSupported, errOnlyGGUFSupported, errUnknownType} {
  95. if errors.Is(err, badReq) {
  96. ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
  97. return
  98. }
  99. }
  100. ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
  101. return
  102. }
  103. }
  104. if len(adapterLayers) > 0 {
  105. baseLayers = append(baseLayers, adapterLayers...)
  106. }
  107. if err := createModel(r, name, baseLayers, fn); err != nil {
  108. if errors.Is(err, errBadTemplate) {
  109. ch <- gin.H{"error": err.Error(), "status": http.StatusBadRequest}
  110. return
  111. }
  112. ch <- gin.H{"error": err.Error()}
  113. return
  114. }
  115. if !envconfig.NoPrune() && oldManifest != nil {
  116. if err := oldManifest.RemoveLayers(); err != nil {
  117. ch <- gin.H{"error": err.Error()}
  118. }
  119. }
  120. ch <- api.ProgressResponse{Status: "success"}
  121. }()
  122. if r.Stream != nil && !*r.Stream {
  123. waitForStream(c, ch)
  124. return
  125. }
  126. streamResponse(c, ch)
  127. }
  128. func convertModelFromFiles(files map[string]string, baseLayers []*layerGGML, isAdapter bool, fn func(resp api.ProgressResponse)) ([]*layerGGML, error) {
  129. switch detectModelTypeFromFiles(files) {
  130. case "safetensors":
  131. layers, err := convertFromSafetensors(files, baseLayers, isAdapter, fn)
  132. if err != nil {
  133. slog.Error("error converting from safetensors", "error", err)
  134. return nil, err
  135. }
  136. return layers, nil
  137. case "gguf":
  138. if len(files) == 0 {
  139. return nil, errNoFilesProvided
  140. } else if len(files) > 1 && isAdapter {
  141. return nil, errOnlyOneAdapterSupported
  142. }
  143. var digest string
  144. var allLayers []*layerGGML
  145. for _, v := range files {
  146. digest = v
  147. layers, err := ggufLayers(digest, fn)
  148. if err != nil {
  149. return nil, err
  150. }
  151. allLayers = append(allLayers, layers...)
  152. }
  153. return allLayers, nil
  154. default:
  155. return nil, errUnknownType
  156. }
  157. }
  158. func detectModelTypeFromFiles(files map[string]string) string {
  159. for fn := range files {
  160. if strings.HasSuffix(fn, ".safetensors") {
  161. return "safetensors"
  162. } else if strings.HasSuffix(fn, ".gguf") {
  163. return "gguf"
  164. } else {
  165. // try to see if we can find a gguf file even without the file extension
  166. blobPath, err := GetBlobsPath(files[fn])
  167. if err != nil {
  168. slog.Error("error getting blobs path", "file", fn)
  169. return ""
  170. }
  171. f, err := os.Open(blobPath)
  172. if err != nil {
  173. slog.Error("error reading file", "error", err)
  174. return ""
  175. }
  176. defer f.Close()
  177. buf := make([]byte, 4)
  178. _, err = f.Read(buf)
  179. if err != nil {
  180. slog.Error("error reading file", "error", err)
  181. return ""
  182. }
  183. ct := llm.DetectGGMLType(buf)
  184. if ct == "gguf" {
  185. return "gguf"
  186. }
  187. }
  188. }
  189. return ""
  190. }
  191. func convertFromSafetensors(files map[string]string, baseLayers []*layerGGML, isAdapter bool, fn func(resp api.ProgressResponse)) ([]*layerGGML, error) {
  192. tmpDir, err := os.MkdirTemp("", "ollama-safetensors")
  193. if err != nil {
  194. return nil, err
  195. }
  196. defer os.RemoveAll(tmpDir)
  197. for fp, digest := range files {
  198. blobPath, err := GetBlobsPath(digest)
  199. if err != nil {
  200. return nil, err
  201. }
  202. if err := createLink(blobPath, filepath.Join(tmpDir, fp)); err != nil {
  203. return nil, err
  204. }
  205. }
  206. t, err := os.CreateTemp(tmpDir, "fp16")
  207. if err != nil {
  208. return nil, err
  209. }
  210. defer t.Close()
  211. var mediaType string
  212. if !isAdapter {
  213. fn(api.ProgressResponse{Status: "converting model"})
  214. mediaType = "application/vnd.ollama.image.model"
  215. if err := convert.ConvertModel(os.DirFS(tmpDir), t); err != nil {
  216. return nil, err
  217. }
  218. } else {
  219. kv, err := kvFromLayers(baseLayers)
  220. if err != nil {
  221. return nil, err
  222. }
  223. fn(api.ProgressResponse{Status: "converting adapter"})
  224. mediaType = "application/vnd.ollama.image.adapter"
  225. if err := convert.ConvertAdapter(os.DirFS(tmpDir), t, kv); err != nil {
  226. return nil, err
  227. }
  228. }
  229. if _, err := t.Seek(0, io.SeekStart); err != nil {
  230. return nil, err
  231. }
  232. layer, err := NewLayer(t, mediaType)
  233. if err != nil {
  234. return nil, err
  235. }
  236. bin, err := layer.Open()
  237. if err != nil {
  238. return nil, err
  239. }
  240. ggml, _, err := llm.DecodeGGML(bin, 0)
  241. if err != nil {
  242. return nil, err
  243. }
  244. layers := []*layerGGML{{layer, ggml}}
  245. if !isAdapter {
  246. return detectChatTemplate(layers)
  247. }
  248. return layers, nil
  249. }
  250. func kvFromLayers(baseLayers []*layerGGML) (llm.KV, error) {
  251. for _, l := range baseLayers {
  252. if l.GGML != nil {
  253. return l.KV(), nil
  254. }
  255. }
  256. return llm.KV{}, fmt.Errorf("no base model was found")
  257. }
  258. func createModel(r api.CreateRequest, name model.Name, baseLayers []*layerGGML, fn func(resp api.ProgressResponse)) (err error) {
  259. config := ConfigV2{
  260. OS: "linux",
  261. Architecture: "amd64",
  262. RootFS: RootFS{
  263. Type: "layers",
  264. },
  265. }
  266. var layers []Layer
  267. for _, layer := range baseLayers {
  268. if layer.GGML != nil {
  269. quantType := strings.ToUpper(cmp.Or(r.Quantize, r.Quantization))
  270. if quantType != "" && layer.GGML.Name() == "gguf" && layer.MediaType == "application/vnd.ollama.image.model" {
  271. want, err := llm.ParseFileType(quantType)
  272. if err != nil {
  273. return err
  274. }
  275. ft := layer.GGML.KV().FileType()
  276. if !slices.Contains([]string{"F16", "F32"}, ft.String()) {
  277. return errors.New("quantization is only supported for F16 and F32 models")
  278. } else if ft != want {
  279. layer, err = quantizeLayer(layer, quantType, fn)
  280. if err != nil {
  281. return err
  282. }
  283. }
  284. }
  285. config.ModelFormat = cmp.Or(config.ModelFormat, layer.GGML.Name())
  286. config.ModelFamily = cmp.Or(config.ModelFamily, layer.GGML.KV().Architecture())
  287. config.ModelType = cmp.Or(config.ModelType, format.HumanNumber(layer.GGML.KV().ParameterCount()))
  288. config.FileType = cmp.Or(config.FileType, layer.GGML.KV().FileType().String())
  289. config.ModelFamilies = append(config.ModelFamilies, layer.GGML.KV().Architecture())
  290. }
  291. layers = append(layers, layer.Layer)
  292. }
  293. if r.Template != "" {
  294. layers, err = setTemplate(layers, r.Template)
  295. if err != nil {
  296. return err
  297. }
  298. }
  299. if r.System != "" {
  300. layers, err = setSystem(layers, r.System)
  301. if err != nil {
  302. return err
  303. }
  304. }
  305. if r.License != nil {
  306. switch l := r.License.(type) {
  307. case string:
  308. if l != "" {
  309. layers, err = setLicense(layers, l)
  310. if err != nil {
  311. return err
  312. }
  313. }
  314. case any:
  315. var licenses []string
  316. b, _ := json.Marshal(l) // re-marshal to JSON
  317. if err := json.Unmarshal(b, &licenses); err != nil {
  318. return err
  319. }
  320. for _, v := range licenses {
  321. layers, err = setLicense(layers, v)
  322. if err != nil {
  323. return err
  324. }
  325. }
  326. default:
  327. return fmt.Errorf("unknown license type: %T", l)
  328. }
  329. }
  330. layers, err = setParameters(layers, r.Parameters)
  331. if err != nil {
  332. return err
  333. }
  334. layers, err = setMessages(layers, r.Messages)
  335. if err != nil {
  336. return err
  337. }
  338. configLayer, err := createConfigLayer(layers, config)
  339. if err != nil {
  340. return err
  341. }
  342. for _, layer := range layers {
  343. if layer.status != "" {
  344. fn(api.ProgressResponse{Status: layer.status})
  345. }
  346. }
  347. fn(api.ProgressResponse{Status: "writing manifest"})
  348. if err := WriteManifest(name, *configLayer, layers); err != nil {
  349. return err
  350. }
  351. return nil
  352. }
  353. func quantizeLayer(layer *layerGGML, quantizeType string, fn func(resp api.ProgressResponse)) (*layerGGML, error) {
  354. ft := layer.GGML.KV().FileType()
  355. fn(api.ProgressResponse{Status: fmt.Sprintf("quantizing %s model to %s", ft, quantizeType)})
  356. want, err := llm.ParseFileType(quantizeType)
  357. if err != nil {
  358. return nil, err
  359. }
  360. blob, err := GetBlobsPath(layer.Digest)
  361. if err != nil {
  362. return nil, err
  363. }
  364. temp, err := os.CreateTemp(filepath.Dir(blob), quantizeType)
  365. if err != nil {
  366. return nil, err
  367. }
  368. defer temp.Close()
  369. defer os.Remove(temp.Name())
  370. if err := llama.Quantize(blob, temp.Name(), uint32(want)); err != nil {
  371. return nil, err
  372. }
  373. newLayer, err := NewLayer(temp, layer.MediaType)
  374. if err != nil {
  375. return nil, err
  376. }
  377. if _, err := temp.Seek(0, io.SeekStart); err != nil {
  378. return nil, err
  379. }
  380. ggml, _, err := llm.DecodeGGML(temp, 0)
  381. if err != nil {
  382. slog.Error(fmt.Sprintf("error decoding ggml: %s\n", err))
  383. return nil, err
  384. }
  385. return &layerGGML{newLayer, ggml}, nil
  386. }
  387. func ggufLayers(digest string, fn func(resp api.ProgressResponse)) ([]*layerGGML, error) {
  388. var layers []*layerGGML
  389. fn(api.ProgressResponse{Status: "parsing GGUF"})
  390. blobPath, err := GetBlobsPath(digest)
  391. if err != nil {
  392. return nil, err
  393. }
  394. blob, err := os.Open(blobPath)
  395. if err != nil {
  396. return nil, err
  397. }
  398. defer blob.Close()
  399. sr := io.NewSectionReader(blob, 0, 512)
  400. contentType, err := detectContentType(sr)
  401. if err != nil {
  402. return nil, err
  403. }
  404. if contentType != "gguf" {
  405. slog.Error(fmt.Sprintf("unsupported content type: %s", contentType))
  406. return nil, errOnlyGGUFSupported
  407. }
  408. stat, err := blob.Stat()
  409. if err != nil {
  410. return nil, err
  411. }
  412. var offset int64
  413. for offset < stat.Size() {
  414. ggml, n, err := llm.DecodeGGML(blob, 0)
  415. if errors.Is(err, io.EOF) {
  416. break
  417. } else if err != nil {
  418. return nil, err
  419. }
  420. mediatype := "application/vnd.ollama.image.model"
  421. if ggml.KV().Kind() == "adapter" {
  422. mediatype = "application/vnd.ollama.image.adapter"
  423. } else if _, ok := ggml.KV()[fmt.Sprintf("%s.vision.block_count", ggml.KV().Architecture())]; ok || ggml.KV().Kind() == "projector" {
  424. mediatype = "application/vnd.ollama.image.projector"
  425. }
  426. var layer Layer
  427. if digest != "" && n == stat.Size() && offset == 0 {
  428. layer, err = NewLayerFromLayer(digest, mediatype, blob.Name())
  429. if err != nil {
  430. slog.Debug("could not create new layer from layer", "error", err)
  431. return nil, err
  432. }
  433. }
  434. // Fallback to creating layer from file copy (either NewLayerFromLayer failed, or digest empty/n != stat.Size())
  435. if layer.Digest == "" {
  436. layer, err = NewLayer(io.NewSectionReader(blob, offset, n), mediatype)
  437. if err != nil {
  438. return nil, err
  439. }
  440. }
  441. layers = append(layers, &layerGGML{layer, ggml})
  442. offset = n
  443. }
  444. return detectChatTemplate(layers)
  445. }
  446. func removeLayer(layers []Layer, mediatype string) []Layer {
  447. return slices.DeleteFunc(layers, func(layer Layer) bool {
  448. if layer.MediaType != mediatype {
  449. return false
  450. }
  451. if err := layer.Remove(); err != nil {
  452. slog.Warn("couldn't remove blob", "digest", layer.Digest, "error", err)
  453. return true
  454. }
  455. return true
  456. })
  457. }
  458. func setTemplate(layers []Layer, t string) ([]Layer, error) {
  459. layers = removeLayer(layers, "application/vnd.ollama.image.template")
  460. if _, err := template.Parse(t); err != nil {
  461. return nil, fmt.Errorf("%w: %s", errBadTemplate, err)
  462. }
  463. if _, err := template.Parse(t); err != nil {
  464. return nil, fmt.Errorf("%w: %s", errBadTemplate, err)
  465. }
  466. blob := strings.NewReader(t)
  467. layer, err := NewLayer(blob, "application/vnd.ollama.image.template")
  468. if err != nil {
  469. return nil, err
  470. }
  471. layers = append(layers, layer)
  472. return layers, nil
  473. }
  474. func setSystem(layers []Layer, s string) ([]Layer, error) {
  475. layers = removeLayer(layers, "application/vnd.ollama.image.system")
  476. if s != "" {
  477. blob := strings.NewReader(s)
  478. layer, err := NewLayer(blob, "application/vnd.ollama.image.system")
  479. if err != nil {
  480. return nil, err
  481. }
  482. layers = append(layers, layer)
  483. }
  484. return layers, nil
  485. }
  486. func setLicense(layers []Layer, l string) ([]Layer, error) {
  487. blob := strings.NewReader(l)
  488. layer, err := NewLayer(blob, "application/vnd.ollama.image.license")
  489. if err != nil {
  490. return nil, err
  491. }
  492. layers = append(layers, layer)
  493. return layers, nil
  494. }
  495. func setParameters(layers []Layer, p map[string]any) ([]Layer, error) {
  496. if p == nil {
  497. p = make(map[string]any)
  498. }
  499. for _, layer := range layers {
  500. if layer.MediaType != "application/vnd.ollama.image.params" {
  501. continue
  502. }
  503. digestPath, err := GetBlobsPath(layer.Digest)
  504. if err != nil {
  505. return nil, err
  506. }
  507. fn, err := os.Open(digestPath)
  508. if err != nil {
  509. return nil, err
  510. }
  511. defer fn.Close()
  512. var existing map[string]any
  513. if err := json.NewDecoder(fn).Decode(&existing); err != nil {
  514. return nil, err
  515. }
  516. for k, v := range existing {
  517. if _, exists := p[k]; exists {
  518. continue
  519. }
  520. p[k] = v
  521. }
  522. }
  523. if len(p) == 0 {
  524. return layers, nil
  525. }
  526. layers = removeLayer(layers, "application/vnd.ollama.image.params")
  527. var b bytes.Buffer
  528. if err := json.NewEncoder(&b).Encode(p); err != nil {
  529. return nil, err
  530. }
  531. layer, err := NewLayer(&b, "application/vnd.ollama.image.params")
  532. if err != nil {
  533. return nil, err
  534. }
  535. layers = append(layers, layer)
  536. return layers, nil
  537. }
  538. func setMessages(layers []Layer, m []api.Message) ([]Layer, error) {
  539. // this leaves the old messages intact if no new messages were specified
  540. // which may not be the correct behaviour
  541. if len(m) == 0 {
  542. return layers, nil
  543. }
  544. fmt.Printf("removing old messages\n")
  545. layers = removeLayer(layers, "application/vnd.ollama.image.messages")
  546. var b bytes.Buffer
  547. if err := json.NewEncoder(&b).Encode(m); err != nil {
  548. return nil, err
  549. }
  550. layer, err := NewLayer(&b, "application/vnd.ollama.image.messages")
  551. if err != nil {
  552. return nil, err
  553. }
  554. layers = append(layers, layer)
  555. return layers, nil
  556. }
  557. func createConfigLayer(layers []Layer, config ConfigV2) (*Layer, error) {
  558. digests := make([]string, len(layers))
  559. for i, layer := range layers {
  560. digests[i] = layer.Digest
  561. }
  562. config.RootFS.DiffIDs = digests
  563. var b bytes.Buffer
  564. if err := json.NewEncoder(&b).Encode(config); err != nil {
  565. return nil, err
  566. }
  567. layer, err := NewLayer(&b, "application/vnd.docker.container.image.v1+json")
  568. if err != nil {
  569. return nil, err
  570. }
  571. return &layer, nil
  572. }
  573. func createLink(src, dst string) error {
  574. // make any subdirs for dst
  575. if err := os.MkdirAll(filepath.Dir(dst), 0o755); err != nil {
  576. return err
  577. }
  578. _ = os.Remove(dst)
  579. if err := os.Symlink(src, dst); err != nil {
  580. if err := copyFile(src, dst); err != nil {
  581. return err
  582. }
  583. }
  584. return nil
  585. }
  586. func copyFile(src, dst string) error {
  587. srcFile, err := os.Open(src)
  588. if err != nil {
  589. return err
  590. }
  591. defer srcFile.Close()
  592. dstFile, err := os.Create(dst)
  593. if err != nil {
  594. return err
  595. }
  596. defer dstFile.Close()
  597. _, err = io.Copy(dstFile, srcFile)
  598. return err
  599. }