model.go 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. package server
  2. import (
  3. "archive/zip"
  4. "bytes"
  5. "context"
  6. "encoding/json"
  7. "errors"
  8. "fmt"
  9. "io"
  10. "log/slog"
  11. "net/http"
  12. "os"
  13. "path/filepath"
  14. "slices"
  15. "strings"
  16. "text/template/parse"
  17. "github.com/ollama/ollama/api"
  18. "github.com/ollama/ollama/convert"
  19. "github.com/ollama/ollama/llm"
  20. "github.com/ollama/ollama/template"
  21. "github.com/ollama/ollama/types/model"
  22. )
  23. var intermediateBlobs map[string]string = make(map[string]string)
  24. type layerGGML struct {
  25. *Layer
  26. *llm.GGML
  27. }
  28. func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
  29. m, err := ParseNamedManifest(name)
  30. switch {
  31. case errors.Is(err, os.ErrNotExist):
  32. if err := PullModel(ctx, name.String(), &registryOptions{}, fn); err != nil {
  33. return nil, err
  34. }
  35. m, err = ParseNamedManifest(name)
  36. if err != nil {
  37. return nil, err
  38. }
  39. case err != nil:
  40. return nil, err
  41. }
  42. for _, layer := range m.Layers {
  43. layer, err := NewLayerFromLayer(layer.Digest, layer.MediaType, name.DisplayShortest())
  44. if err != nil {
  45. return nil, err
  46. }
  47. switch layer.MediaType {
  48. case "application/vnd.ollama.image.model",
  49. "application/vnd.ollama.image.projector",
  50. "application/vnd.ollama.image.adapter":
  51. blobpath, err := GetBlobsPath(layer.Digest)
  52. if err != nil {
  53. return nil, err
  54. }
  55. blob, err := os.Open(blobpath)
  56. if err != nil {
  57. return nil, err
  58. }
  59. defer blob.Close()
  60. ggml, _, err := llm.DecodeGGML(blob, 0)
  61. if err != nil {
  62. return nil, err
  63. }
  64. layers = append(layers, &layerGGML{layer, ggml})
  65. default:
  66. layers = append(layers, &layerGGML{layer, nil})
  67. }
  68. }
  69. return layers, nil
  70. }
  71. func extractFromZipFile(p string, file *os.File, fn func(api.ProgressResponse)) error {
  72. stat, err := file.Stat()
  73. if err != nil {
  74. return err
  75. }
  76. r, err := zip.NewReader(file, stat.Size())
  77. if err != nil {
  78. return err
  79. }
  80. fn(api.ProgressResponse{Status: "unpacking model metadata"})
  81. for _, f := range r.File {
  82. if !filepath.IsLocal(f.Name) {
  83. return fmt.Errorf("%w: %s", zip.ErrInsecurePath, f.Name)
  84. }
  85. n := filepath.Join(p, f.Name)
  86. if err := os.MkdirAll(filepath.Dir(n), 0o750); err != nil {
  87. return err
  88. }
  89. // TODO(mxyng): this should not write out all files to disk
  90. outfile, err := os.Create(n)
  91. if err != nil {
  92. return err
  93. }
  94. defer outfile.Close()
  95. infile, err := f.Open()
  96. if err != nil {
  97. return err
  98. }
  99. defer infile.Close()
  100. if _, err = io.Copy(outfile, infile); err != nil {
  101. return err
  102. }
  103. if err := outfile.Close(); err != nil {
  104. return err
  105. }
  106. if err := infile.Close(); err != nil {
  107. return err
  108. }
  109. }
  110. return nil
  111. }
  112. func parseFromZipFile(_ context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
  113. tempDir, err := os.MkdirTemp(filepath.Dir(file.Name()), "")
  114. if err != nil {
  115. return nil, err
  116. }
  117. defer os.RemoveAll(tempDir)
  118. if err := extractFromZipFile(tempDir, file, fn); err != nil {
  119. return nil, err
  120. }
  121. mf, err := convert.GetModelFormat(tempDir)
  122. if err != nil {
  123. return nil, err
  124. }
  125. params, err := mf.GetParams(tempDir)
  126. if err != nil {
  127. return nil, err
  128. }
  129. mArch, err := mf.GetModelArch("", tempDir, params)
  130. if err != nil {
  131. return nil, err
  132. }
  133. fn(api.ProgressResponse{Status: "processing tensors"})
  134. if err := mArch.GetTensors(); err != nil {
  135. return nil, err
  136. }
  137. if err := mArch.LoadVocab(); err != nil {
  138. return nil, err
  139. }
  140. fn(api.ProgressResponse{Status: "converting model"})
  141. // TODO(mxyng): this should write directly into a layer
  142. // e.g. NewLayer(arch.Reader(), "application/vnd.ollama.image.model")
  143. temp, err := os.CreateTemp(tempDir, "fp16")
  144. if err != nil {
  145. return nil, err
  146. }
  147. defer temp.Close()
  148. defer os.Remove(temp.Name())
  149. if err = mArch.WriteGGUF(temp); err != nil {
  150. return nil, err
  151. }
  152. if _, err := temp.Seek(0, io.SeekStart); err != nil {
  153. return nil, err
  154. }
  155. layer, err := NewLayer(temp, "application/vnd.ollama.image.model")
  156. if err != nil {
  157. return nil, err
  158. }
  159. bin, err := layer.Open()
  160. if err != nil {
  161. return nil, err
  162. }
  163. defer bin.Close()
  164. ggml, _, err := llm.DecodeGGML(bin, 0)
  165. if err != nil {
  166. return nil, err
  167. }
  168. layers = append(layers, &layerGGML{layer, ggml})
  169. intermediateBlobs[digest] = layer.Digest
  170. return detectChatTemplate(layers)
  171. }
  172. func parseFromFile(ctx context.Context, file *os.File, digest string, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
  173. sr := io.NewSectionReader(file, 0, 512)
  174. contentType, err := detectContentType(sr)
  175. if err != nil {
  176. return nil, err
  177. }
  178. switch contentType {
  179. case "gguf", "ggla":
  180. // noop
  181. case "application/zip":
  182. return parseFromZipFile(ctx, file, digest, fn)
  183. default:
  184. return nil, fmt.Errorf("unsupported content type: %s", contentType)
  185. }
  186. stat, err := file.Stat()
  187. if err != nil {
  188. return nil, err
  189. }
  190. var offset int64
  191. for offset < stat.Size() {
  192. ggml, n, err := llm.DecodeGGML(file, 0)
  193. if errors.Is(err, io.EOF) {
  194. break
  195. } else if err != nil {
  196. return nil, err
  197. }
  198. mediatype := "application/vnd.ollama.image.model"
  199. if ggml.Name() == "ggla" {
  200. mediatype = "application/vnd.ollama.image.adapter"
  201. } else if ggml.KV().Architecture() == "clip" {
  202. mediatype = "application/vnd.ollama.image.projector"
  203. }
  204. layer, err := NewLayer(io.NewSectionReader(file, offset, n), mediatype)
  205. if err != nil {
  206. return nil, err
  207. }
  208. layers = append(layers, &layerGGML{layer, ggml})
  209. offset = n
  210. }
  211. return detectChatTemplate(layers)
  212. }
  213. func detectChatTemplate(layers []*layerGGML) ([]*layerGGML, error) {
  214. for _, layer := range layers {
  215. if s := layer.GGML.KV().ChatTemplate(); s != "" {
  216. if t, err := template.Named(s); err != nil {
  217. slog.Debug("template detection", "error", err)
  218. } else {
  219. tmpl, err := NewLayer(t.Reader(), "application/vnd.ollama.image.template")
  220. if err != nil {
  221. return nil, err
  222. }
  223. tmpl.status = fmt.Sprintf("using autodetected template %s", t.Name)
  224. layers = append(layers, &layerGGML{tmpl, nil})
  225. }
  226. }
  227. }
  228. return layers, nil
  229. }
  230. func detectContentType(r io.Reader) (string, error) {
  231. var b bytes.Buffer
  232. if _, err := io.Copy(&b, r); err != nil {
  233. return "", err
  234. }
  235. if contentType := llm.DetectGGMLType(b.Bytes()); contentType != "" {
  236. return contentType, nil
  237. }
  238. if contentType := http.DetectContentType(b.Bytes()); contentType != "application/octet-stream" {
  239. return contentType, nil
  240. }
  241. return "unknown", nil
  242. }
  243. // parseToolCalls attempts to parse a JSON string into a slice of ToolCalls.
  244. // mxyng: this only really works if the input contains tool calls in some JSON format
  245. func (m *Model) parseToolCalls(s string) ([]api.ToolCall, bool) {
  246. // create a subtree from the node that ranges over .ToolCalls
  247. tmpl := m.Template.Subtree(func(n parse.Node) bool {
  248. if t, ok := n.(*parse.RangeNode); ok {
  249. return slices.Contains(template.Identifiers(t.Pipe), "ToolCalls")
  250. }
  251. return false
  252. })
  253. if tmpl == nil {
  254. return nil, false
  255. }
  256. var b bytes.Buffer
  257. if err := tmpl.Execute(&b, map[string][]map[string]any{
  258. "ToolCalls": {
  259. {
  260. "Function": map[string]any{
  261. "Name": "@@name@@",
  262. "Arguments": "@@arguments@@",
  263. },
  264. },
  265. },
  266. }); err != nil {
  267. return nil, false
  268. }
  269. var kv map[string]string
  270. // execute the subtree with placeholders to identify the keys
  271. // trim any commands that might exist in the template
  272. if err := json.Unmarshal(bytes.TrimSuffix(b.Bytes(), []byte(",")), &kv); err != nil {
  273. return nil, false
  274. }
  275. // find the keys that correspond to the name and arguments fields
  276. var name, arguments string
  277. for k, v := range kv {
  278. switch v {
  279. case "@@name@@":
  280. name = k
  281. case "@@arguments@@":
  282. arguments = k
  283. }
  284. }
  285. var objs []map[string]any
  286. for offset := 0; offset < len(s); {
  287. if err := json.NewDecoder(strings.NewReader(s[offset:])).Decode(&objs); errors.Is(err, io.EOF) {
  288. break
  289. } else if syntax := &(json.SyntaxError{}); errors.As(err, &syntax) {
  290. // skip over any syntax errors
  291. offset += int(syntax.Offset)
  292. } else if unmarshalType := &(json.UnmarshalTypeError{}); errors.As(err, &unmarshalType) {
  293. // skip over any unmarshalable types
  294. offset += int(unmarshalType.Offset)
  295. } else if err != nil {
  296. return nil, false
  297. } else {
  298. // break when an object is decoded
  299. break
  300. }
  301. }
  302. var toolCalls []api.ToolCall
  303. for _, kv := range objs {
  304. var call api.ToolCall
  305. for k, v := range kv {
  306. switch k {
  307. case name:
  308. call.Function.Name = v.(string)
  309. case arguments:
  310. call.Function.Arguments = v.(map[string]any)
  311. }
  312. }
  313. toolCalls = append(toolCalls, call)
  314. }
  315. return toolCalls, len(toolCalls) > 0
  316. }