model.go 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. package server
  2. import (
  3. "archive/zip"
  4. "bytes"
  5. "cmp"
  6. "context"
  7. "errors"
  8. "fmt"
  9. "io"
  10. "log/slog"
  11. "net/http"
  12. "os"
  13. "path/filepath"
  14. "slices"
  15. "github.com/ollama/ollama/api"
  16. "github.com/ollama/ollama/convert"
  17. "github.com/ollama/ollama/llm"
  18. "github.com/ollama/ollama/template"
  19. "github.com/ollama/ollama/types/model"
  20. )
  21. var intermediateBlobs map[string]string = make(map[string]string)
  22. type layerGGML struct {
  23. *Layer
  24. *llm.GGML
  25. }
  26. func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
  27. m, err := ParseNamedManifest(name)
  28. switch {
  29. case errors.Is(err, os.ErrNotExist):
  30. if err := PullModel(ctx, name.String(), &registryOptions{}, fn); err != nil {
  31. return nil, err
  32. }
  33. m, err = ParseNamedManifest(name)
  34. if err != nil {
  35. return nil, err
  36. }
  37. case err != nil:
  38. return nil, err
  39. }
  40. for _, layer := range m.Layers {
  41. layer, err := NewLayerFromLayer(layer.Digest, layer.MediaType, name.DisplayShortest())
  42. if err != nil {
  43. return nil, err
  44. }
  45. switch layer.MediaType {
  46. case "application/vnd.ollama.image.model",
  47. "application/vnd.ollama.image.projector",
  48. "application/vnd.ollama.image.adapter":
  49. blobpath, err := GetBlobsPath(layer.Digest)
  50. if err != nil {
  51. return nil, err
  52. }
  53. blob, err := os.Open(blobpath)
  54. if err != nil {
  55. return nil, err
  56. }
  57. defer blob.Close()
  58. ggml, _, err := llm.DecodeGGML(blob, 0)
  59. if err != nil {
  60. return nil, err
  61. }
  62. layers = append(layers, &layerGGML{layer, ggml})
  63. default:
  64. layers = append(layers, &layerGGML{layer, nil})
  65. }
  66. }
  67. return layers, nil
  68. }
  69. func extractFromZipFile(p string, file *os.File, fn func(api.ProgressResponse)) error {
  70. stat, err := file.Stat()
  71. if err != nil {
  72. return err
  73. }
  74. r, err := zip.NewReader(file, stat.Size())
  75. if err != nil {
  76. return err
  77. }
  78. fn(api.ProgressResponse{Status: "unpacking model metadata"})
  79. for _, f := range r.File {
  80. if !filepath.IsLocal(f.Name) {
  81. return fmt.Errorf("%w: %s", zip.ErrInsecurePath, f.Name)
  82. }
  83. n := filepath.Join(p, f.Name)
  84. if err := os.MkdirAll(filepath.Dir(n), 0o750); err != nil {
  85. return err
  86. }
  87. // TODO(mxyng): this should not write out all files to disk
  88. outfile, err := os.Create(n)
  89. if err != nil {
  90. return err
  91. }
  92. defer outfile.Close()
  93. infile, err := f.Open()
  94. if err != nil {
  95. return err
  96. }
  97. defer infile.Close()
  98. if _, err = io.Copy(outfile, infile); err != nil {
  99. return err
  100. }
  101. if err := outfile.Close(); err != nil {
  102. return err
  103. }
  104. if err := infile.Close(); err != nil {
  105. return err
  106. }
  107. }
  108. return nil
  109. }
  110. func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
  111. tempDir, err := os.MkdirTemp(filepath.Dir(file.Name()), "")
  112. if err != nil {
  113. return nil, err
  114. }
  115. defer os.RemoveAll(tempDir)
  116. if err := extractFromZipFile(tempDir, file, fn); err != nil {
  117. return nil, err
  118. }
  119. mf, err := convert.GetModelFormat(tempDir)
  120. if err != nil {
  121. return nil, err
  122. }
  123. params, err := mf.GetParams(tempDir)
  124. if err != nil {
  125. return nil, err
  126. }
  127. mArch, err := mf.GetModelArch("", tempDir, params)
  128. if err != nil {
  129. return nil, err
  130. }
  131. fn(api.ProgressResponse{Status: "processing tensors"})
  132. if err := mArch.GetTensors(); err != nil {
  133. return nil, err
  134. }
  135. if err := mArch.LoadVocab(); err != nil {
  136. return nil, err
  137. }
  138. fn(api.ProgressResponse{Status: "converting model"})
  139. // TODO(mxyng): this should write directly into a layer
  140. // e.g. NewLayer(arch.Reader(), "application/vnd.ollama.image.model")
  141. temp, err := os.CreateTemp(tempDir, "fp16")
  142. if err != nil {
  143. return nil, err
  144. }
  145. defer temp.Close()
  146. defer os.Remove(temp.Name())
  147. if err = mArch.WriteGGUF(temp); err != nil {
  148. return nil, err
  149. }
  150. if _, err := temp.Seek(0, io.SeekStart); err != nil {
  151. return nil, err
  152. }
  153. layer, err := NewLayer(temp, "application/vnd.ollama.image.model")
  154. if err != nil {
  155. return nil, err
  156. }
  157. bin, err := layer.Open()
  158. if err != nil {
  159. return nil, err
  160. }
  161. defer bin.Close()
  162. ggml, _, err := llm.DecodeGGML(bin, 0)
  163. if err != nil {
  164. return nil, err
  165. }
  166. layers = append(layers, &layerGGML{layer, ggml})
  167. intermediateBlobs[digest] = layer.Digest
  168. return detectChatTemplate(layers)
  169. }
  170. func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
  171. sr := io.NewSectionReader(file, 0, 512)
  172. contentType, err := detectContentType(sr)
  173. if err != nil {
  174. return nil, err
  175. }
  176. switch contentType {
  177. case "gguf", "ggla":
  178. // noop
  179. case "application/zip":
  180. return parseFromZipFile(ctx, file, digest, fn)
  181. default:
  182. return nil, fmt.Errorf("unsupported content type: %s", contentType)
  183. }
  184. stat, err := file.Stat()
  185. if err != nil {
  186. return nil, err
  187. }
  188. var offset int64
  189. for offset < stat.Size() {
  190. ggml, n, err := llm.DecodeGGML(file, 0)
  191. if errors.Is(err, io.EOF) {
  192. break
  193. } else if err != nil {
  194. return nil, err
  195. }
  196. mediatype := "application/vnd.ollama.image.model"
  197. if ggml.Name() == "ggla" {
  198. mediatype = "application/vnd.ollama.image.adapter"
  199. } else if ggml.KV().Architecture() == "clip" {
  200. mediatype = "application/vnd.ollama.image.projector"
  201. }
  202. var reader io.Reader = io.NewSectionReader(file, offset, n)
  203. if !slices.IsSortedFunc(ggml.Tensors(), func(a, b *llm.Tensor) int {
  204. var i, j int
  205. if n, err := fmt.Sscanf(a.Name, "blk.%d", &i); err != nil || n != 1 {
  206. return cmp.Compare(a.Name, b.Name)
  207. } else if n, err := fmt.Sscanf(b.Name, "blk.%d", &j); err != nil || n != 1 {
  208. return cmp.Compare(a.Name, b.Name)
  209. }
  210. return cmp.Compare(i, j)
  211. }) {
  212. reader = &llm.GGUFWriter{
  213. KV: ggml.KV(),
  214. T: ggml.Tensors(),
  215. }
  216. }
  217. layer, err := NewLayer(reader, mediatype)
  218. if err != nil {
  219. return nil, err
  220. }
  221. layers = append(layers, &layerGGML{layer, ggml})
  222. offset = n
  223. }
  224. return detectChatTemplate(layers)
  225. }
  226. func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) {
  227. for _, layer := range layers {
  228. if s := layer.GGML.KV().ChatTemplate(); s != "" {
  229. if t, err := template.Named(s); err != nil {
  230. slog.Debug("template detection", "error", err)
  231. } else {
  232. tmpl, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
  233. if err != nil {
  234. return nil, err
  235. }
  236. tmpl.status = fmt.Sprintf("using autodetected template %s", t.Name)
  237. layers = append(layers, &layerGGML{tmpl, nil})
  238. }
  239. }
  240. }
  241. return layers, nil
  242. }
  243. func detectContentType(r io.Reader) (string, error) {
  244. var b bytes.Buffer
  245. if _, err := io.Copy(&b, r); err != nil {
  246. return "", err
  247. }
  248. if contentType := llm.DetectGGMLType(b.Bytes()); contentType != "" {
  249. return contentType, nil
  250. }
  251. if contentType := http.DetectContentType(b.Bytes()); contentType != "application/octet-stream" {
  252. return contentType, nil
  253. }
  254. return "unknown", nil
  255. }