parser.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. package parser
  2. import (
  3. "bufio"
  4. "bytes"
  5. "crypto/sha256"
  6. "errors"
  7. "fmt"
  8. "io"
  9. "net/http"
  10. "os"
  11. "os/user"
  12. "path/filepath"
  13. "slices"
  14. "strconv"
  15. "strings"
  16. "golang.org/x/text/encoding/unicode"
  17. "golang.org/x/text/transform"
  18. "github.com/ollama/ollama/api"
  19. )
  20. var ErrModelNotFound = errors.New("no Modelfile or safetensors files found")
  21. type Modelfile struct {
  22. Commands []Command
  23. }
  24. func (f Modelfile) String() string {
  25. var sb strings.Builder
  26. for _, cmd := range f.Commands {
  27. fmt.Fprintln(&sb, cmd.String())
  28. }
  29. return sb.String()
  30. }
  31. var deprecatedParameters = []string{"penalize_newline"}
  32. // CreateRequest creates a new *api.CreateRequest from an existing Modelfile
  33. func (f Modelfile) CreateRequest(relativeDir string) (*api.CreateRequest, error) {
  34. req := &api.CreateRequest{}
  35. var messages []api.Message
  36. var licenses []string
  37. params := make(map[string]any)
  38. for _, c := range f.Commands {
  39. switch c.Name {
  40. case "model":
  41. path, err := expandPath(c.Args, relativeDir)
  42. if err != nil {
  43. return nil, err
  44. }
  45. digestMap, err := fileDigestMap(path)
  46. if errors.Is(err, os.ErrNotExist) {
  47. req.From = c.Args
  48. continue
  49. } else if err != nil {
  50. return nil, err
  51. }
  52. req.Files = digestMap
  53. case "adapter":
  54. path, err := expandPath(c.Args, relativeDir)
  55. if err != nil {
  56. return nil, err
  57. }
  58. digestMap, err := fileDigestMap(path)
  59. if err != nil {
  60. return nil, err
  61. }
  62. req.Adapters = digestMap
  63. case "template":
  64. req.Template = c.Args
  65. case "system":
  66. req.System = c.Args
  67. case "license":
  68. licenses = append(licenses, c.Args)
  69. case "message":
  70. role, msg, _ := strings.Cut(c.Args, ": ")
  71. messages = append(messages, api.Message{Role: role, Content: msg})
  72. default:
  73. if slices.Contains(deprecatedParameters, c.Name) {
  74. fmt.Printf("warning: parameter %s is deprecated\n", c.Name)
  75. break
  76. }
  77. ps, err := api.FormatParams(map[string][]string{c.Name: {c.Args}})
  78. if err != nil {
  79. return nil, err
  80. }
  81. for k, v := range ps {
  82. if ks, ok := params[k].([]string); ok {
  83. params[k] = append(ks, v.([]string)...)
  84. } else if vs, ok := v.([]string); ok {
  85. params[k] = vs
  86. } else {
  87. params[k] = v
  88. }
  89. }
  90. }
  91. }
  92. if len(params) > 0 {
  93. req.Parameters = params
  94. }
  95. if len(messages) > 0 {
  96. req.Messages = messages
  97. }
  98. if len(licenses) > 0 {
  99. req.License = licenses
  100. }
  101. return req, nil
  102. }
  103. func fileDigestMap(path string) (map[string]string, error) {
  104. fl := make(map[string]string)
  105. fi, err := os.Stat(path)
  106. if err != nil {
  107. return nil, err
  108. }
  109. var files []string
  110. if fi.IsDir() {
  111. files, err = filesForModel(path)
  112. if err != nil {
  113. return nil, err
  114. }
  115. } else {
  116. files = []string{path}
  117. }
  118. for _, f := range files {
  119. digest, err := digestForFile(f)
  120. if err != nil {
  121. return nil, err
  122. }
  123. fl[f] = digest
  124. }
  125. return fl, nil
  126. }
  127. func digestForFile(filename string) (string, error) {
  128. filepath, err := filepath.EvalSymlinks(filename)
  129. if err != nil {
  130. return "", err
  131. }
  132. bin, err := os.Open(filepath)
  133. if err != nil {
  134. return "", err
  135. }
  136. defer bin.Close()
  137. hash := sha256.New()
  138. if _, err := io.Copy(hash, bin); err != nil {
  139. return "", err
  140. }
  141. return fmt.Sprintf("sha256:%x", hash.Sum(nil)), nil
  142. }
  143. func filesForModel(path string) ([]string, error) {
  144. detectContentType := func(path string) (string, error) {
  145. f, err := os.Open(path)
  146. if err != nil {
  147. return "", err
  148. }
  149. defer f.Close()
  150. var b bytes.Buffer
  151. b.Grow(512)
  152. if _, err := io.CopyN(&b, f, 512); err != nil && !errors.Is(err, io.EOF) {
  153. return "", err
  154. }
  155. contentType, _, _ := strings.Cut(http.DetectContentType(b.Bytes()), ";")
  156. return contentType, nil
  157. }
  158. glob := func(pattern, contentType string) ([]string, error) {
  159. matches, err := filepath.Glob(pattern)
  160. if err != nil {
  161. return nil, err
  162. }
  163. for _, safetensor := range matches {
  164. if ct, err := detectContentType(safetensor); err != nil {
  165. return nil, err
  166. } else if ct != contentType {
  167. return nil, fmt.Errorf("invalid content type: expected %s for %s", ct, safetensor)
  168. }
  169. }
  170. return matches, nil
  171. }
  172. var files []string
  173. if st, _ := glob(filepath.Join(path, "model*.safetensors"), "application/octet-stream"); len(st) > 0 {
  174. // safetensors files might be unresolved git lfs references; skip if they are
  175. // covers model-x-of-y.safetensors, model.fp32-x-of-y.safetensors, model.safetensors
  176. files = append(files, st...)
  177. } else if st, _ := glob(filepath.Join(path, "adapters.safetensors"), "application/octet-stream"); len(st) > 0 {
  178. // covers adapters.safetensors
  179. files = append(files, st...)
  180. } else if st, _ := glob(filepath.Join(path, "adapter_model.safetensors"), "application/octet-stream"); len(st) > 0 {
  181. // covers adapter_model.safetensors
  182. files = append(files, st...)
  183. } else if pt, _ := glob(filepath.Join(path, "pytorch_model*.bin"), "application/zip"); len(pt) > 0 {
  184. // pytorch files might also be unresolved git lfs references; skip if they are
  185. // covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
  186. files = append(files, pt...)
  187. } else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/zip"); len(pt) > 0 {
  188. // pytorch files might also be unresolved git lfs references; skip if they are
  189. // covers consolidated.x.pth, consolidated.pth
  190. files = append(files, pt...)
  191. } else if gg, _ := glob(filepath.Join(path, "*.gguf"), "application/octet-stream"); len(gg) > 0 {
  192. // covers gguf files ending in .gguf
  193. files = append(files, gg...)
  194. } else if gg, _ := glob(filepath.Join(path, "*.bin"), "application/octet-stream"); len(gg) > 0 {
  195. // covers gguf files ending in .bin
  196. files = append(files, gg...)
  197. } else {
  198. return nil, ErrModelNotFound
  199. }
  200. // add configuration files, json files are detected as text/plain
  201. js, err := glob(filepath.Join(path, "*.json"), "text/plain")
  202. if err != nil {
  203. return nil, err
  204. }
  205. files = append(files, js...)
  206. // bert models require a nested config.json
  207. // TODO(mxyng): merge this with the glob above
  208. js, err = glob(filepath.Join(path, "**/*.json"), "text/plain")
  209. if err != nil {
  210. return nil, err
  211. }
  212. files = append(files, js...)
  213. if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
  214. // add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
  215. // tokenizer.model might be a unresolved git lfs reference; error if it is
  216. files = append(files, tks...)
  217. } else if tks, _ := glob(filepath.Join(path, "**/tokenizer.model"), "text/plain"); len(tks) > 0 {
  218. // some times tokenizer.model is in a subdirectory (e.g. meta-llama/Meta-Llama-3-8B)
  219. files = append(files, tks...)
  220. }
  221. return files, nil
  222. }
  223. type Command struct {
  224. Name string
  225. Args string
  226. }
  227. func (c Command) String() string {
  228. var sb strings.Builder
  229. switch c.Name {
  230. case "model":
  231. fmt.Fprintf(&sb, "FROM %s", c.Args)
  232. case "license", "template", "system", "adapter":
  233. fmt.Fprintf(&sb, "%s %s", strings.ToUpper(c.Name), quote(c.Args))
  234. case "message":
  235. role, message, _ := strings.Cut(c.Args, ": ")
  236. fmt.Fprintf(&sb, "MESSAGE %s %s", role, quote(message))
  237. default:
  238. fmt.Fprintf(&sb, "PARAMETER %s %s", c.Name, quote(c.Args))
  239. }
  240. return sb.String()
  241. }
  242. type state int
  243. const (
  244. stateNil state = iota
  245. stateName
  246. stateValue
  247. stateParameter
  248. stateMessage
  249. stateComment
  250. )
  251. var (
  252. errMissingFrom = errors.New("no FROM line")
  253. errInvalidMessageRole = errors.New("message role must be one of \"system\", \"user\", or \"assistant\"")
  254. errInvalidCommand = errors.New("command must be one of \"from\", \"license\", \"template\", \"system\", \"adapter\", \"parameter\", or \"message\"")
  255. )
  256. type ParserError struct {
  257. LineNumber int
  258. Msg string
  259. }
  260. func (e *ParserError) Error() string {
  261. if e.LineNumber > 0 {
  262. return fmt.Sprintf("(line %d): %s", e.LineNumber, e.Msg)
  263. }
  264. return e.Msg
  265. }
  266. func ParseFile(r io.Reader) (*Modelfile, error) {
  267. var cmd Command
  268. var curr state
  269. var currLine int = 1
  270. var b bytes.Buffer
  271. var role string
  272. var f Modelfile
  273. tr := unicode.BOMOverride(unicode.UTF8.NewDecoder())
  274. br := bufio.NewReader(transform.NewReader(r, tr))
  275. for {
  276. r, _, err := br.ReadRune()
  277. if errors.Is(err, io.EOF) {
  278. break
  279. } else if err != nil {
  280. return nil, err
  281. }
  282. if isNewline(r) {
  283. currLine++
  284. }
  285. next, r, err := parseRuneForState(r, curr)
  286. if errors.Is(err, io.ErrUnexpectedEOF) {
  287. return nil, fmt.Errorf("%w: %s", err, b.String())
  288. } else if err != nil {
  289. return nil, &ParserError{
  290. LineNumber: currLine,
  291. Msg: err.Error(),
  292. }
  293. }
  294. // process the state transition, some transitions need to be intercepted and redirected
  295. if next != curr {
  296. switch curr {
  297. case stateName:
  298. if !isValidCommand(b.String()) {
  299. return nil, &ParserError{
  300. LineNumber: currLine,
  301. Msg: errInvalidCommand.Error(),
  302. }
  303. }
  304. // next state sometimes depends on the current buffer value
  305. switch s := strings.ToLower(b.String()); s {
  306. case "from":
  307. cmd.Name = "model"
  308. case "parameter":
  309. // transition to stateParameter which sets command name
  310. next = stateParameter
  311. case "message":
  312. // transition to stateMessage which validates the message role
  313. next = stateMessage
  314. fallthrough
  315. default:
  316. cmd.Name = s
  317. }
  318. case stateParameter:
  319. cmd.Name = b.String()
  320. case stateMessage:
  321. if !isValidMessageRole(b.String()) {
  322. return nil, &ParserError{
  323. LineNumber: currLine,
  324. Msg: errInvalidMessageRole.Error(),
  325. }
  326. }
  327. role = b.String()
  328. case stateComment, stateNil:
  329. // pass
  330. case stateValue:
  331. s, ok := unquote(strings.TrimSpace(b.String()))
  332. if !ok || isSpace(r) {
  333. if _, err := b.WriteRune(r); err != nil {
  334. return nil, err
  335. }
  336. continue
  337. }
  338. if role != "" {
  339. s = role + ": " + s
  340. role = ""
  341. }
  342. cmd.Args = s
  343. f.Commands = append(f.Commands, cmd)
  344. }
  345. b.Reset()
  346. curr = next
  347. }
  348. if strconv.IsPrint(r) {
  349. if _, err := b.WriteRune(r); err != nil {
  350. return nil, err
  351. }
  352. }
  353. }
  354. // flush the buffer
  355. switch curr {
  356. case stateComment, stateNil:
  357. // pass; nothing to flush
  358. case stateValue:
  359. s, ok := unquote(strings.TrimSpace(b.String()))
  360. if !ok {
  361. return nil, io.ErrUnexpectedEOF
  362. }
  363. if role != "" {
  364. s = role + ": " + s
  365. }
  366. cmd.Args = s
  367. f.Commands = append(f.Commands, cmd)
  368. default:
  369. return nil, io.ErrUnexpectedEOF
  370. }
  371. for _, cmd := range f.Commands {
  372. if cmd.Name == "model" {
  373. return &f, nil
  374. }
  375. }
  376. return nil, errMissingFrom
  377. }
  378. func parseRuneForState(r rune, cs state) (state, rune, error) {
  379. switch cs {
  380. case stateNil:
  381. switch {
  382. case r == '#':
  383. return stateComment, 0, nil
  384. case isSpace(r), isNewline(r):
  385. return stateNil, 0, nil
  386. default:
  387. return stateName, r, nil
  388. }
  389. case stateName:
  390. switch {
  391. case isAlpha(r):
  392. return stateName, r, nil
  393. case isSpace(r):
  394. return stateValue, 0, nil
  395. default:
  396. return stateNil, 0, errInvalidCommand
  397. }
  398. case stateValue:
  399. switch {
  400. case isNewline(r):
  401. return stateNil, r, nil
  402. case isSpace(r):
  403. return stateNil, r, nil
  404. default:
  405. return stateValue, r, nil
  406. }
  407. case stateParameter:
  408. switch {
  409. case isAlpha(r), isNumber(r), r == '_':
  410. return stateParameter, r, nil
  411. case isSpace(r):
  412. return stateValue, 0, nil
  413. default:
  414. return stateNil, 0, io.ErrUnexpectedEOF
  415. }
  416. case stateMessage:
  417. switch {
  418. case isAlpha(r):
  419. return stateMessage, r, nil
  420. case isSpace(r):
  421. return stateValue, 0, nil
  422. default:
  423. return stateNil, 0, io.ErrUnexpectedEOF
  424. }
  425. case stateComment:
  426. switch {
  427. case isNewline(r):
  428. return stateNil, 0, nil
  429. default:
  430. return stateComment, 0, nil
  431. }
  432. default:
  433. return stateNil, 0, errors.New("")
  434. }
  435. }
  436. func quote(s string) string {
  437. if strings.Contains(s, "\n") || strings.HasPrefix(s, " ") || strings.HasSuffix(s, " ") {
  438. if strings.Contains(s, "\"") {
  439. return `"""` + s + `"""`
  440. }
  441. return `"` + s + `"`
  442. }
  443. return s
  444. }
  445. func unquote(s string) (string, bool) {
  446. // TODO: single quotes
  447. if len(s) >= 3 && s[:3] == `"""` {
  448. if len(s) >= 6 && s[len(s)-3:] == `"""` {
  449. return s[3 : len(s)-3], true
  450. }
  451. return "", false
  452. }
  453. if len(s) >= 1 && s[0] == '"' {
  454. if len(s) >= 2 && s[len(s)-1] == '"' {
  455. return s[1 : len(s)-1], true
  456. }
  457. return "", false
  458. }
  459. return s, true
  460. }
  461. func isAlpha(r rune) bool {
  462. return r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z'
  463. }
  464. func isNumber(r rune) bool {
  465. return r >= '0' && r <= '9'
  466. }
  467. func isSpace(r rune) bool {
  468. return r == ' ' || r == '\t'
  469. }
  470. func isNewline(r rune) bool {
  471. return r == '\r' || r == '\n'
  472. }
  473. func isValidMessageRole(role string) bool {
  474. return role == "system" || role == "user" || role == "assistant"
  475. }
  476. func isValidCommand(cmd string) bool {
  477. switch strings.ToLower(cmd) {
  478. case "from", "license", "template", "system", "adapter", "parameter", "message":
  479. return true
  480. default:
  481. return false
  482. }
  483. }
  484. func expandPathImpl(path, relativeDir string, currentUserFunc func() (*user.User, error), lookupUserFunc func(string) (*user.User, error)) (string, error) {
  485. if filepath.IsAbs(path) || strings.HasPrefix(path, "\\") || strings.HasPrefix(path, "/") {
  486. return filepath.Abs(path)
  487. } else if strings.HasPrefix(path, "~") {
  488. var homeDir string
  489. if path == "~" || strings.HasPrefix(path, "~/") {
  490. // Current user's home directory
  491. currentUser, err := currentUserFunc()
  492. if err != nil {
  493. return "", fmt.Errorf("failed to get current user: %w", err)
  494. }
  495. homeDir = currentUser.HomeDir
  496. path = strings.TrimPrefix(path, "~")
  497. } else {
  498. // Specific user's home directory
  499. parts := strings.SplitN(path[1:], "/", 2)
  500. userInfo, err := lookupUserFunc(parts[0])
  501. if err != nil {
  502. return "", fmt.Errorf("failed to find user '%s': %w", parts[0], err)
  503. }
  504. homeDir = userInfo.HomeDir
  505. if len(parts) > 1 {
  506. path = "/" + parts[1]
  507. } else {
  508. path = ""
  509. }
  510. }
  511. path = filepath.Join(homeDir, path)
  512. } else {
  513. path = filepath.Join(relativeDir, path)
  514. }
  515. return filepath.Abs(path)
  516. }
  517. func expandPath(path, relativeDir string) (string, error) {
  518. return expandPathImpl(path, relativeDir, user.Current, user.Lookup)
  519. }