model.go 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. package server
  2. import (
  3. "archive/zip"
  4. "bytes"
  5. "context"
  6. "errors"
  7. "fmt"
  8. "io"
  9. "log/slog"
  10. "net/http"
  11. "os"
  12. "path/filepath"
  13. "sort"
  14. "github.com/ollama/ollama/api"
  15. "github.com/ollama/ollama/convert"
  16. "github.com/ollama/ollama/llm"
  17. "github.com/ollama/ollama/template"
  18. "github.com/ollama/ollama/types/model"
  19. )
  20. var intermediateBlobs map[string]string = make(map[string]string)
  21. type layerGGML struct {
  22. *Layer
  23. *llm.GGML
  24. }
  25. func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
  26. m, err := ParseNamedManifest(name)
  27. switch {
  28. case errors.Is(err, os.ErrNotExist):
  29. if err := PullModel(ctx, name.String(), &registryOptions{}, fn); err != nil {
  30. return nil, err
  31. }
  32. m, err = ParseNamedManifest(name)
  33. if err != nil {
  34. return nil, err
  35. }
  36. case err != nil:
  37. return nil, err
  38. }
  39. for _, layer := range m.Layers {
  40. layer, err := NewLayerFromLayer(layer.Digest, layer.MediaType, name.DisplayShortest())
  41. if err != nil {
  42. return nil, err
  43. }
  44. switch layer.MediaType {
  45. case "application/vnd.ollama.image.model",
  46. "application/vnd.ollama.image.projector",
  47. "application/vnd.ollama.image.adapter":
  48. blobpath, err := GetBlobsPath(layer.Digest)
  49. if err != nil {
  50. return nil, err
  51. }
  52. blob, err := os.Open(blobpath)
  53. if err != nil {
  54. return nil, err
  55. }
  56. defer blob.Close()
  57. ggml, _, err := llm.DecodeGGML(blob, 0)
  58. if err != nil {
  59. return nil, err
  60. }
  61. layers = append(layers, &layerGGML{layer, ggml})
  62. default:
  63. layers = append(layers, &layerGGML{layer, nil})
  64. }
  65. }
  66. return layers, nil
  67. }
  68. func extractFromZipFile(p string, file *os.File, fn func(api.ProgressResponse)) error {
  69. stat, err := file.Stat()
  70. if err != nil {
  71. return err
  72. }
  73. r, err := zip.NewReader(file, stat.Size())
  74. if err != nil {
  75. return err
  76. }
  77. fn(api.ProgressResponse{Status: "unpacking model metadata"})
  78. for _, f := range r.File {
  79. if !filepath.IsLocal(f.Name) {
  80. return fmt.Errorf("%w: %s", zip.ErrInsecurePath, f.Name)
  81. }
  82. n := filepath.Join(p, f.Name)
  83. if err := os.MkdirAll(filepath.Dir(n), 0o750); err != nil {
  84. return err
  85. }
  86. // TODO(mxyng): this should not write out all files to disk
  87. outfile, err := os.Create(n)
  88. if err != nil {
  89. return err
  90. }
  91. defer outfile.Close()
  92. infile, err := f.Open()
  93. if err != nil {
  94. return err
  95. }
  96. defer infile.Close()
  97. if _, err = io.Copy(outfile, infile); err != nil {
  98. return err
  99. }
  100. if err := outfile.Close(); err != nil {
  101. return err
  102. }
  103. if err := infile.Close(); err != nil {
  104. return err
  105. }
  106. }
  107. return nil
  108. }
  109. func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
  110. tempDir, err := os.MkdirTemp(filepath.Dir(file.Name()), "")
  111. if err != nil {
  112. return nil, err
  113. }
  114. defer os.RemoveAll(tempDir)
  115. if err := extractFromZipFile(tempDir, file, fn); err != nil {
  116. return nil, err
  117. }
  118. mf, err := convert.GetModelFormat(tempDir)
  119. if err != nil {
  120. return nil, err
  121. }
  122. params, err := mf.GetParams(tempDir)
  123. if err != nil {
  124. return nil, err
  125. }
  126. mArch, err := mf.GetModelArch("", tempDir, params)
  127. if err != nil {
  128. return nil, err
  129. }
  130. fn(api.ProgressResponse{Status: "processing tensors"})
  131. if err := mArch.GetTensors(); err != nil {
  132. return nil, err
  133. }
  134. if err := mArch.LoadVocab(); err != nil {
  135. return nil, err
  136. }
  137. fn(api.ProgressResponse{Status: "converting model"})
  138. // TODO(mxyng): this should write directly into a layer
  139. // e.g. NewLayer(arch.Reader(), "application/vnd.ollama.image.model")
  140. temp, err := os.CreateTemp(tempDir, "fp16")
  141. if err != nil {
  142. return nil, err
  143. }
  144. defer temp.Close()
  145. defer os.Remove(temp.Name())
  146. if err = mArch.WriteGGUF(temp); err != nil {
  147. return nil, err
  148. }
  149. if _, err := temp.Seek(0, io.SeekStart); err != nil {
  150. return nil, err
  151. }
  152. layer, err := NewLayer(temp, "application/vnd.ollama.image.model")
  153. if err != nil {
  154. return nil, err
  155. }
  156. bin, err := layer.Open()
  157. if err != nil {
  158. return nil, err
  159. }
  160. defer bin.Close()
  161. ggml, _, err := llm.DecodeGGML(bin, 0)
  162. if err != nil {
  163. return nil, err
  164. }
  165. layers = append(layers, &layerGGML{layer, ggml})
  166. intermediateBlobs[digest] = layer.Digest
  167. return detectChatTemplate(layers)
  168. }
  169. func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
  170. sr := io.NewSectionReader(file, 0, 512)
  171. contentType, err := detectContentType(sr)
  172. if err != nil {
  173. return nil, err
  174. }
  175. switch contentType {
  176. case "gguf", "ggla":
  177. // noop
  178. case "application/zip":
  179. return parseFromZipFile(ctx, file, digest, fn)
  180. default:
  181. return nil, fmt.Errorf("unsupported content type: %s", contentType)
  182. }
  183. stat, err := file.Stat()
  184. if err != nil {
  185. return nil, err
  186. }
  187. var offset int64
  188. for offset < stat.Size() {
  189. ggml, n, err := llm.DecodeGGML(file, -1)
  190. if errors.Is(err, io.EOF) {
  191. break
  192. } else if err != nil {
  193. return nil, err
  194. }
  195. mediatype := "application/vnd.ollama.image.model"
  196. if ggml.Name() == "ggla" {
  197. mediatype = "application/vnd.ollama.image.adapter"
  198. } else if ggml.KV().Architecture() == "clip" {
  199. mediatype = "application/vnd.ollama.image.projector"
  200. }
  201. var reader io.Reader = io.NewSectionReader(file, offset, n)
  202. if !sort.IsSorted(ggml.Tensors()) {
  203. // create a new Tensors containing Tensors that have a writeTo
  204. var tensors llm.Tensors
  205. for _, tensor := range ggml.Tensors() {
  206. shape := make([]uint64, len(tensor.Shape))
  207. for i := range len(tensor.Shape) {
  208. shape[i] = tensor.Shape[len(tensor.Shape)-i-1]
  209. }
  210. tensors = append(tensors, &llm.Tensor{
  211. Name: tensor.Name,
  212. Kind: tensor.Kind,
  213. Shape: shape,
  214. WriterTo: &llm.TensorWriter{
  215. // This needs offset + tensors.Offset int64(tensor.Offset) to be correct
  216. Reader: io.NewSectionReader(file, int64(tensor.Offset), int64(tensor.Size())),
  217. },
  218. })
  219. }
  220. reader = &llm.GGUFWriter{
  221. KV: ggml.KV(),
  222. // Update .Tensors
  223. Tensors: tensors,
  224. }
  225. }
  226. layer, err := NewLayer(reader, mediatype)
  227. if err != nil {
  228. return nil, err
  229. }
  230. layers = append(layers, &layerGGML{layer, ggml})
  231. offset = n
  232. }
  233. return detectChatTemplate(layers)
  234. }
  235. func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) {
  236. for _, layer := range layers {
  237. if s := layer.GGML.KV().ChatTemplate(); s != "" {
  238. if t, err := template.Named(s); err != nil {
  239. slog.Debug("template detection", "error", err)
  240. } else {
  241. tmpl, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
  242. if err != nil {
  243. return nil, err
  244. }
  245. tmpl.status = fmt.Sprintf("using autodetected template %s", t.Name)
  246. layers = append(layers, &layerGGML{tmpl, nil})
  247. }
  248. }
  249. }
  250. return layers, nil
  251. }
  252. func detectContentType(r io.Reader) (string, error) {
  253. var b bytes.Buffer
  254. if _, err := io.Copy(&b, r); err != nil {
  255. return "", err
  256. }
  257. if contentType := llm.DetectGGMLType(b.Bytes()); contentType != "" {
  258. return contentType, nil
  259. }
  260. if contentType := http.DetectContentType(b.Bytes()); contentType != "application/octet-stream" {
  261. return contentType, nil
  262. }
  263. return "unknown", nil
  264. }