parser.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607
  1. package parser
  2. import (
  3. "bufio"
  4. "bytes"
  5. "crypto/sha256"
  6. "errors"
  7. "fmt"
  8. "io"
  9. "log/slog"
  10. "net/http"
  11. "os"
  12. "os/user"
  13. "path/filepath"
  14. "slices"
  15. "strconv"
  16. "strings"
  17. "golang.org/x/text/encoding/unicode"
  18. "golang.org/x/text/transform"
  19. "github.com/ollama/ollama/api"
  20. )
  21. var ErrModelNotFound = errors.New("no Modelfile or safetensors files found")
  22. type Modelfile struct {
  23. Commands []Command
  24. }
  25. func (f Modelfile) String() string {
  26. var sb strings.Builder
  27. for _, cmd := range f.Commands {
  28. fmt.Fprintln(&sb, cmd.String())
  29. }
  30. return sb.String()
  31. }
  32. var deprecatedParameters = []string{"penalize_newline"}
  33. // CreateRequest creates a new *api.CreateRequest from an existing Modelfile
  34. func (f Modelfile) CreateRequest(relativeDir string) (*api.CreateRequest, error) {
  35. req := &api.CreateRequest{}
  36. var messages []api.Message
  37. var licenses []string
  38. params := make(map[string]any)
  39. for _, c := range f.Commands {
  40. switch c.Name {
  41. case "model":
  42. path, err := expandPath(c.Args, relativeDir)
  43. if err != nil {
  44. return nil, err
  45. }
  46. digestMap, err := fileDigestMap(path)
  47. if errors.Is(err, os.ErrNotExist) {
  48. req.From = c.Args
  49. continue
  50. } else if err != nil {
  51. return nil, err
  52. }
  53. if req.Files == nil {
  54. req.Files = digestMap
  55. } else {
  56. for k, v := range digestMap {
  57. req.Files[k] = v
  58. }
  59. }
  60. case "adapter":
  61. path, err := expandPath(c.Args, relativeDir)
  62. if err != nil {
  63. return nil, err
  64. }
  65. digestMap, err := fileDigestMap(path)
  66. if err != nil {
  67. return nil, err
  68. }
  69. req.Adapters = digestMap
  70. case "template":
  71. req.Template = c.Args
  72. case "system":
  73. req.System = c.Args
  74. case "license":
  75. licenses = append(licenses, c.Args)
  76. case "message":
  77. role, msg, _ := strings.Cut(c.Args, ": ")
  78. messages = append(messages, api.Message{Role: role, Content: msg})
  79. default:
  80. if slices.Contains(deprecatedParameters, c.Name) {
  81. fmt.Printf("warning: parameter %s is deprecated\n", c.Name)
  82. break
  83. }
  84. ps, err := api.FormatParams(map[string][]string{c.Name: {c.Args}})
  85. if err != nil {
  86. return nil, err
  87. }
  88. for k, v := range ps {
  89. if ks, ok := params[k].([]string); ok {
  90. params[k] = append(ks, v.([]string)...)
  91. } else if vs, ok := v.([]string); ok {
  92. params[k] = vs
  93. } else {
  94. params[k] = v
  95. }
  96. }
  97. }
  98. }
  99. if len(params) > 0 {
  100. req.Parameters = params
  101. }
  102. if len(messages) > 0 {
  103. req.Messages = messages
  104. }
  105. if len(licenses) > 0 {
  106. req.License = licenses
  107. }
  108. return req, nil
  109. }
  110. func fileDigestMap(path string) (map[string]string, error) {
  111. fl := make(map[string]string)
  112. fi, err := os.Stat(path)
  113. if err != nil {
  114. return nil, err
  115. }
  116. var files []string
  117. if fi.IsDir() {
  118. files, err = filesForModel(path)
  119. if err != nil {
  120. return nil, err
  121. }
  122. } else {
  123. files = []string{path}
  124. }
  125. for _, f := range files {
  126. digest, err := digestForFile(f)
  127. if err != nil {
  128. return nil, err
  129. }
  130. fl[f] = digest
  131. }
  132. return fl, nil
  133. }
  134. func digestForFile(filename string) (string, error) {
  135. filepath, err := filepath.EvalSymlinks(filename)
  136. if err != nil {
  137. return "", err
  138. }
  139. bin, err := os.Open(filepath)
  140. if err != nil {
  141. return "", err
  142. }
  143. defer bin.Close()
  144. hash := sha256.New()
  145. if _, err := io.Copy(hash, bin); err != nil {
  146. return "", err
  147. }
  148. return fmt.Sprintf("sha256:%x", hash.Sum(nil)), nil
  149. }
  150. func filesForModel(path string) ([]string, error) {
  151. detectContentType := func(path string) (string, error) {
  152. f, err := os.Open(path)
  153. if err != nil {
  154. return "", err
  155. }
  156. defer f.Close()
  157. var b bytes.Buffer
  158. b.Grow(512)
  159. if _, err := io.CopyN(&b, f, 512); err != nil && !errors.Is(err, io.EOF) {
  160. return "", err
  161. }
  162. contentType, _, _ := strings.Cut(http.DetectContentType(b.Bytes()), ";")
  163. return contentType, nil
  164. }
  165. glob := func(pattern, contentType string) ([]string, error) {
  166. matches, err := filepath.Glob(pattern)
  167. if err != nil {
  168. return nil, err
  169. }
  170. for _, safetensor := range matches {
  171. if ct, err := detectContentType(safetensor); err != nil {
  172. return nil, err
  173. } else if ct != contentType {
  174. return nil, fmt.Errorf("invalid content type: expected %s for %s", ct, safetensor)
  175. }
  176. }
  177. return matches, nil
  178. }
  179. var files []string
  180. if st, _ := glob(filepath.Join(path, "model*.safetensors"), "application/octet-stream"); len(st) > 0 {
  181. // safetensors files might be unresolved git lfs references; skip if they are
  182. // covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
  183. files = append(files, st...)
  184. } else if st, _ := glob(filepath.Join(path, "adapters.safetensors"), "application/octet-stream"); len(st) > 0 {
  185. // covers adapters.safetensors
  186. files = append(files, st...)
  187. } else if st, _ := glob(filepath.Join(path, "adapter_model.safetensors"), "application/octet-stream"); len(st) > 0 {
  188. // covers adapter_model.safetensors
  189. files = append(files, st...)
  190. } else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
  191. // pytorch files might also be unresolved git lfs references; skip if they are
  192. // covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
  193. files = append(files, pt...)
  194. } else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/zip"); len(pt) > 0 {
  195. // pytorch files might also be unresolved git lfs references; skip if they are
  196. // covers consolidated.x.pth, consolidated.pth
  197. files = append(files, pt...)
  198. } else if gg, _ := glob(filepath.Join(path, "*.gguf"), "application/octet-stream"); len(gg) > 0 {
  199. // covers gguf files ending in .gguf
  200. files = append(files, gg...)
  201. } else if gg, _ := glob(filepath.Join(path, "*.bin"), "application/octet-stream"); len(gg) > 0 {
  202. // covers gguf files ending in .bin
  203. files = append(files, gg...)
  204. } else {
  205. return nil, ErrModelNotFound
  206. }
  207. // add configuration files, json files are detected as text/plain
  208. js, err := glob(filepath.Join(path, "*.json"), "text/plain")
  209. if err != nil {
  210. return nil, err
  211. }
  212. files = append(files, js...)
  213. // bert models require a nested config.json
  214. // TODO(mxyng): merge this with the glob above
  215. js, err = glob(filepath.Join(path, "**/*.json"), "text/plain")
  216. if err != nil {
  217. return nil, err
  218. }
  219. files = append(files, js...)
  220. if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
  221. // add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
  222. // tokenizer.model might be a unresolved git lfs reference; error if it is
  223. files = append(files, tks...)
  224. } else if tks, _ := glob(filepath.Join(path, "**/tokenizer.model"), "text/plain"); len(tks) > 0 {
  225. // some times tokenizer.model is in a subdirectory (e.g. meta-llama/Meta-Llama-3-8B)
  226. files = append(files, tks...)
  227. }
  228. return files, nil
  229. }
  230. type Command struct {
  231. Name string
  232. Args string
  233. }
  234. func (c Command) String() string {
  235. var sb strings.Builder
  236. switch c.Name {
  237. case "model":
  238. fmt.Fprintf(&sb, "FROM %s", c.Args)
  239. case "license", "template", "system", "adapter":
  240. fmt.Fprintf(&sb, "%s %s", strings.ToUpper(c.Name), quote(c.Args))
  241. case "message":
  242. role, message, _ := strings.Cut(c.Args, ": ")
  243. fmt.Fprintf(&sb, "MESSAGE %s %s", role, quote(message))
  244. default:
  245. fmt.Fprintf(&sb, "PARAMETER %s %s", c.Name, quote(c.Args))
  246. }
  247. return sb.String()
  248. }
  249. type state int
  250. const (
  251. stateNil state = iota
  252. stateName
  253. stateValue
  254. stateParameter
  255. stateMessage
  256. stateComment
  257. )
  258. var (
  259. errMissingFrom = errors.New("no FROM line")
  260. errInvalidCommand = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"parameter\", or \"message\"")
  261. )
  262. type ParserError struct {
  263. LineNumber int
  264. Msg string
  265. }
  266. func (e *ParserError) Error() string {
  267. if e.LineNumber > 0 {
  268. return fmt.Sprintf("(line %d): %s", e.LineNumber, e.Msg)
  269. }
  270. return e.Msg
  271. }
  272. func ParseFile(r io.Reader) (*Modelfile, error) {
  273. var cmd Command
  274. var curr state
  275. var currLine int = 1
  276. var b bytes.Buffer
  277. var role string
  278. var f Modelfile
  279. tr := unicode.BOMOverride(unicode.UTF8.NewDecoder())
  280. br := bufio.NewReader(transform.NewReader(r, tr))
  281. for {
  282. r, _, err := br.ReadRune()
  283. if errors.Is(err, io.EOF) {
  284. break
  285. } else if err != nil {
  286. return nil, err
  287. }
  288. if isNewline(r) {
  289. currLine++
  290. }
  291. next, r, err := parseRuneForState(r, curr)
  292. if errors.Is(err, io.ErrUnexpectedEOF) {
  293. return nil, fmt.Errorf("%w: %s", err, b.String())
  294. } else if err != nil {
  295. return nil, &ParserError{
  296. LineNumber: currLine,
  297. Msg: err.Error(),
  298. }
  299. }
  300. // process the state transition, some transitions need to be intercepted and redirected
  301. if next != curr {
  302. switch curr {
  303. case stateName:
  304. if !isValidCommand(b.String()) {
  305. return nil, &ParserError{
  306. LineNumber: currLine,
  307. Msg: errInvalidCommand.Error(),
  308. }
  309. }
  310. // next state sometimes depends on the current buffer value
  311. switch s := strings.ToLower(b.String()); s {
  312. case "from":
  313. cmd.Name = "model"
  314. case "parameter":
  315. // transition to stateParameter which sets command name
  316. next = stateParameter
  317. case "message":
  318. // transition to stateMessage which validates the message role
  319. next = stateMessage
  320. fallthrough
  321. default:
  322. cmd.Name = s
  323. }
  324. case stateParameter:
  325. cmd.Name = b.String()
  326. case stateMessage:
  327. role = b.String()
  328. if !isKnownMessageRole(b.String()) {
  329. slog.Warn("received non-standard role", "role", role)
  330. }
  331. case stateComment, stateNil:
  332. // pass
  333. case stateValue:
  334. s, ok := unquote(strings.TrimSpace(b.String()))
  335. if !ok || isSpace(r) {
  336. if _, err := b.WriteRune(r); err != nil {
  337. return nil, err
  338. }
  339. continue
  340. }
  341. if role != "" {
  342. s = role + ": " + s
  343. role = ""
  344. }
  345. cmd.Args = s
  346. f.Commands = append(f.Commands, cmd)
  347. }
  348. b.Reset()
  349. curr = next
  350. }
  351. if strconv.IsPrint(r) {
  352. if _, err := b.WriteRune(r); err != nil {
  353. return nil, err
  354. }
  355. }
  356. }
  357. // flush the buffer
  358. switch curr {
  359. case stateComment, stateNil:
  360. // pass; nothing to flush
  361. case stateValue:
  362. s, ok := unquote(strings.TrimSpace(b.String()))
  363. if !ok {
  364. return nil, io.ErrUnexpectedEOF
  365. }
  366. if role != "" {
  367. s = role + ": " + s
  368. }
  369. cmd.Args = s
  370. f.Commands = append(f.Commands, cmd)
  371. default:
  372. return nil, io.ErrUnexpectedEOF
  373. }
  374. for _, cmd := range f.Commands {
  375. if cmd.Name == "model" {
  376. return &f, nil
  377. }
  378. }
  379. return nil, errMissingFrom
  380. }
  381. func parseRuneForState(r rune, cs state) (state, rune, error) {
  382. switch cs {
  383. case stateNil:
  384. switch {
  385. case r == '#':
  386. return stateComment, 0, nil
  387. case isSpace(r), isNewline(r):
  388. return stateNil, 0, nil
  389. default:
  390. return stateName, r, nil
  391. }
  392. case stateName:
  393. switch {
  394. case isAlpha(r):
  395. return stateName, r, nil
  396. case isSpace(r):
  397. return stateValue, 0, nil
  398. default:
  399. return stateNil, 0, errInvalidCommand
  400. }
  401. case stateValue:
  402. switch {
  403. case isNewline(r):
  404. return stateNil, r, nil
  405. case isSpace(r):
  406. return stateNil, r, nil
  407. default:
  408. return stateValue, r, nil
  409. }
  410. case stateParameter:
  411. switch {
  412. case isAlpha(r), isNumber(r), r == '_':
  413. return stateParameter, r, nil
  414. case isSpace(r):
  415. return stateValue, 0, nil
  416. default:
  417. return stateNil, 0, io.ErrUnexpectedEOF
  418. }
  419. case stateMessage:
  420. switch {
  421. case isAlpha(r):
  422. return stateMessage, r, nil
  423. case isSpace(r):
  424. return stateValue, 0, nil
  425. default:
  426. return stateNil, 0, io.ErrUnexpectedEOF
  427. }
  428. case stateComment:
  429. switch {
  430. case isNewline(r):
  431. return stateNil, 0, nil
  432. default:
  433. return stateComment, 0, nil
  434. }
  435. default:
  436. return stateNil, 0, errors.New("")
  437. }
  438. }
  439. func quote(s string) string {
  440. if strings.Contains(s, "\n") || strings.HasPrefix(s, " ") || strings.HasSuffix(s, " ") {
  441. if strings.Contains(s, "\"") {
  442. return `"""` + s + `"""`
  443. }
  444. return `"` + s + `"`
  445. }
  446. return s
  447. }
  448. func unquote(s string) (string, bool) {
  449. // TODO: single quotes
  450. if len(s) >= 3 && s[:3] == `"""` {
  451. if len(s) >= 6 && s[len(s)-3:] == `"""` {
  452. return s[3 : len(s)-3], true
  453. }
  454. return "", false
  455. }
  456. if len(s) >= 1 && s[0] == '"' {
  457. if len(s) >= 2 && s[len(s)-1] == '"' {
  458. return s[1 : len(s)-1], true
  459. }
  460. return "", false
  461. }
  462. return s, true
  463. }
  464. func isAlpha(r rune) bool {
  465. return r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z'
  466. }
  467. func isNumber(r rune) bool {
  468. return r >= '0' && r <= '9'
  469. }
  470. func isSpace(r rune) bool {
  471. return r == ' ' || r == '\t'
  472. }
  473. func isNewline(r rune) bool {
  474. return r == '\r' || r == '\n'
  475. }
  476. func isKnownMessageRole(role string) bool {
  477. return role == "system" || role == "user" || role == "assistant"
  478. }
  479. func isValidCommand(cmd string) bool {
  480. switch strings.ToLower(cmd) {
  481. case "from", "license", "template", "system", "adapter", "parameter", "message":
  482. return true
  483. default:
  484. return false
  485. }
  486. }
  487. func expandPathImpl(path, relativeDir string, currentUserFunc func() (*user.User, error), lookupUserFunc func(string) (*user.User, error)) (string, error) {
  488. if filepath.IsAbs(path) || strings.HasPrefix(path, "\\") || strings.HasPrefix(path, "/") {
  489. return filepath.Abs(path)
  490. } else if strings.HasPrefix(path, "~") {
  491. var homeDir string
  492. if path == "~" || strings.HasPrefix(path, "~/") {
  493. // Current user's home directory
  494. currentUser, err := currentUserFunc()
  495. if err != nil {
  496. return "", fmt.Errorf("failed to get current user: %w", err)
  497. }
  498. homeDir = currentUser.HomeDir
  499. path = strings.TrimPrefix(path, "~")
  500. } else {
  501. // Specific user's home directory
  502. parts := strings.SplitN(path[1:], "/", 2)
  503. userInfo, err := lookupUserFunc(parts[0])
  504. if err != nil {
  505. return "", fmt.Errorf("failed to find user '%s': %w", parts[0], err)
  506. }
  507. homeDir = userInfo.HomeDir
  508. if len(parts) > 1 {
  509. path = "/" + parts[1]
  510. } else {
  511. path = ""
  512. }
  513. }
  514. path = filepath.Join(homeDir, path)
  515. } else {
  516. path = filepath.Join(relativeDir, path)
  517. }
  518. return filepath.Abs(path)
  519. }
  520. func expandPath(path, relativeDir string) (string, error) {
  521. return expandPathImpl(path, relativeDir, user.Current, user.Lookup)
  522. }