parser_test.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675
  1. package parser
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "strings"
  9. "testing"
  10. "unicode/utf16"
  11. "github.com/stretchr/testify/assert"
  12. "github.com/stretchr/testify/require"
  13. "golang.org/x/text/encoding"
  14. "golang.org/x/text/encoding/unicode"
  15. )
  16. func TestParseFileFile(t *testing.T) {
  17. input := `
  18. FROM model1
  19. ADAPTER adapter1
  20. LICENSE MIT
  21. PARAMETER param1 value1
  22. PARAMETER param2 value2
  23. TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
  24. {{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
  25. {{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
  26. {{ .Response }}<|eot_id|>"""
  27. `
  28. reader := strings.NewReader(input)
  29. modelfile, err := ParseFile(reader)
  30. require.NoError(t, err)
  31. expectedCommands := []Command{
  32. {Name: "model", Args: "model1"},
  33. {Name: "adapter", Args: "adapter1"},
  34. {Name: "license", Args: "MIT"},
  35. {Name: "param1", Args: "value1"},
  36. {Name: "param2", Args: "value2"},
  37. {Name: "template", Args: "{{ if .System }}<|start_header_id|>system<|end_header_id|>\n\n{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>\n\n{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>\n\n{{ .Response }}<|eot_id|>"},
  38. }
  39. assert.Equal(t, expectedCommands, modelfile.Commands)
  40. }
  41. func TestParseFileTrimSpace(t *testing.T) {
  42. input := `
  43. FROM " model 1"
  44. ADAPTER adapter3
  45. LICENSE "MIT "
  46. PARAMETER param1 value1
  47. PARAMETER param2 value2
  48. TEMPLATE """ {{ if .System }}<|start_header_id|>system<|end_header_id|>
  49. {{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
  50. {{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
  51. {{ .Response }}<|eot_id|> """
  52. `
  53. reader := strings.NewReader(input)
  54. modelfile, err := ParseFile(reader)
  55. require.NoError(t, err)
  56. expectedCommands := []Command{
  57. {Name: "model", Args: " model 1"},
  58. {Name: "adapter", Args: "adapter3"},
  59. {Name: "license", Args: "MIT "},
  60. {Name: "param1", Args: "value1"},
  61. {Name: "param2", Args: "value2"},
  62. {Name: "template", Args: " {{ if .System }}<|start_header_id|>system<|end_header_id|>\n\n{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>\n\n{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>\n\n{{ .Response }}<|eot_id|> "},
  63. }
  64. assert.Equal(t, expectedCommands, modelfile.Commands)
  65. }
  66. func TestParseFileFrom(t *testing.T) {
  67. cases := []struct {
  68. input string
  69. expected []Command
  70. err error
  71. }{
  72. {
  73. "FROM \"FOO BAR \"",
  74. []Command{{Name: "model", Args: "FOO BAR "}},
  75. nil,
  76. },
  77. {
  78. "FROM \"FOO BAR\"\nPARAMETER param1 value1",
  79. []Command{{Name: "model", Args: "FOO BAR"}, {Name: "param1", Args: "value1"}},
  80. nil,
  81. },
  82. {
  83. "FROM FOOO BAR ",
  84. []Command{{Name: "model", Args: "FOOO BAR"}},
  85. nil,
  86. },
  87. {
  88. "FROM /what/is/the path ",
  89. []Command{{Name: "model", Args: "/what/is/the path"}},
  90. nil,
  91. },
  92. {
  93. "FROM foo",
  94. []Command{{Name: "model", Args: "foo"}},
  95. nil,
  96. },
  97. {
  98. "FROM /path/to/model",
  99. []Command{{Name: "model", Args: "/path/to/model"}},
  100. nil,
  101. },
  102. {
  103. "FROM /path/to/model/fp16.bin",
  104. []Command{{Name: "model", Args: "/path/to/model/fp16.bin"}},
  105. nil,
  106. },
  107. {
  108. "FROM llama3:latest",
  109. []Command{{Name: "model", Args: "llama3:latest"}},
  110. nil,
  111. },
  112. {
  113. "FROM llama3:7b-instruct-q4_K_M",
  114. []Command{{Name: "model", Args: "llama3:7b-instruct-q4_K_M"}},
  115. nil,
  116. },
  117. {
  118. "", nil, errMissingFrom,
  119. },
  120. {
  121. "PARAMETER param1 value1",
  122. nil,
  123. errMissingFrom,
  124. },
  125. {
  126. "PARAMETER param1 value1\nFROM foo",
  127. []Command{{Name: "param1", Args: "value1"}, {Name: "model", Args: "foo"}},
  128. nil,
  129. },
  130. {
  131. "PARAMETER what the \nFROM lemons make lemonade ",
  132. []Command{{Name: "what", Args: "the"}, {Name: "model", Args: "lemons make lemonade"}},
  133. nil,
  134. },
  135. }
  136. for _, c := range cases {
  137. t.Run("", func(t *testing.T) {
  138. modelfile, err := ParseFile(strings.NewReader(c.input))
  139. require.ErrorIs(t, err, c.err)
  140. if modelfile != nil {
  141. assert.Equal(t, c.expected, modelfile.Commands)
  142. }
  143. })
  144. }
  145. }
  146. func TestParseFileParametersMissingValue(t *testing.T) {
  147. input := `
  148. FROM foo
  149. PARAMETER param1
  150. `
  151. reader := strings.NewReader(input)
  152. _, err := ParseFile(reader)
  153. require.ErrorIs(t, err, io.ErrUnexpectedEOF)
  154. }
  155. func TestParseFileBadCommand(t *testing.T) {
  156. input := `
  157. FROM foo
  158. BADCOMMAND param1 value1
  159. `
  160. parserError := &ParserError{
  161. LineNumber: 3,
  162. Msg: errInvalidCommand.Error(),
  163. }
  164. _, err := ParseFile(strings.NewReader(input))
  165. if !errors.As(err, &parserError) {
  166. t.Errorf("unexpected error: expected: %s, actual: %s", parserError.Error(), err.Error())
  167. }
  168. }
  169. func TestParseFileMessages(t *testing.T) {
  170. cases := []struct {
  171. input string
  172. expected []Command
  173. err error
  174. }{
  175. {
  176. `
  177. FROM foo
  178. MESSAGE system You are a file parser. Always parse things.
  179. `,
  180. []Command{
  181. {Name: "model", Args: "foo"},
  182. {Name: "message", Args: "system: You are a file parser. Always parse things."},
  183. },
  184. nil,
  185. },
  186. {
  187. `
  188. FROM foo
  189. MESSAGE system You are a file parser. Always parse things.`,
  190. []Command{
  191. {Name: "model", Args: "foo"},
  192. {Name: "message", Args: "system: You are a file parser. Always parse things."},
  193. },
  194. nil,
  195. },
  196. {
  197. `
  198. FROM foo
  199. MESSAGE system You are a file parser. Always parse things.
  200. MESSAGE user Hey there!
  201. MESSAGE assistant Hello, I want to parse all the things!
  202. `,
  203. []Command{
  204. {Name: "model", Args: "foo"},
  205. {Name: "message", Args: "system: You are a file parser. Always parse things."},
  206. {Name: "message", Args: "user: Hey there!"},
  207. {Name: "message", Args: "assistant: Hello, I want to parse all the things!"},
  208. },
  209. nil,
  210. },
  211. {
  212. `
  213. FROM foo
  214. MESSAGE system """
  215. You are a multiline file parser. Always parse things.
  216. """
  217. `,
  218. []Command{
  219. {Name: "model", Args: "foo"},
  220. {Name: "message", Args: "system: \nYou are a multiline file parser. Always parse things.\n"},
  221. },
  222. nil,
  223. },
  224. {
  225. `
  226. FROM foo
  227. MESSAGE badguy I'm a bad guy!
  228. `,
  229. nil,
  230. &ParserError{
  231. LineNumber: 3,
  232. Msg: errInvalidMessageRole.Error(),
  233. },
  234. },
  235. {
  236. `
  237. FROM foo
  238. MESSAGE system
  239. `,
  240. nil,
  241. io.ErrUnexpectedEOF,
  242. },
  243. {
  244. `
  245. FROM foo
  246. MESSAGE system`,
  247. nil,
  248. io.ErrUnexpectedEOF,
  249. },
  250. }
  251. for _, tt := range cases {
  252. t.Run("", func(t *testing.T) {
  253. modelfile, err := ParseFile(strings.NewReader(tt.input))
  254. if modelfile != nil {
  255. assert.Equal(t, tt.expected, modelfile.Commands)
  256. }
  257. if tt.err == nil {
  258. if err != nil {
  259. t.Fatalf("expected no error, but got %v", err)
  260. }
  261. return
  262. }
  263. switch tt.err.(type) {
  264. case *ParserError:
  265. var pErr *ParserError
  266. if errors.As(err, &pErr) {
  267. // got the correct type of error
  268. return
  269. }
  270. }
  271. if errors.Is(err, tt.err) {
  272. return
  273. }
  274. t.Fatalf("unexpected error: expected: %v, actual: %v", tt.err, err)
  275. })
  276. }
  277. }
  278. func TestParseFileQuoted(t *testing.T) {
  279. cases := []struct {
  280. multiline string
  281. expected []Command
  282. err error
  283. }{
  284. {
  285. `
  286. FROM foo
  287. SYSTEM """
  288. This is a
  289. multiline system.
  290. """
  291. `,
  292. []Command{
  293. {Name: "model", Args: "foo"},
  294. {Name: "system", Args: "\nThis is a\nmultiline system.\n"},
  295. },
  296. nil,
  297. },
  298. {
  299. `
  300. FROM foo
  301. SYSTEM """
  302. This is a
  303. multiline system."""
  304. `,
  305. []Command{
  306. {Name: "model", Args: "foo"},
  307. {Name: "system", Args: "\nThis is a\nmultiline system."},
  308. },
  309. nil,
  310. },
  311. {
  312. `
  313. FROM foo
  314. SYSTEM """This is a
  315. multiline system."""
  316. `,
  317. []Command{
  318. {Name: "model", Args: "foo"},
  319. {Name: "system", Args: "This is a\nmultiline system."},
  320. },
  321. nil,
  322. },
  323. {
  324. `
  325. FROM foo
  326. SYSTEM """This is a multiline system."""
  327. `,
  328. []Command{
  329. {Name: "model", Args: "foo"},
  330. {Name: "system", Args: "This is a multiline system."},
  331. },
  332. nil,
  333. },
  334. {
  335. `
  336. FROM foo
  337. SYSTEM """This is a multiline system.""
  338. `,
  339. nil,
  340. io.ErrUnexpectedEOF,
  341. },
  342. {
  343. `
  344. FROM foo
  345. SYSTEM "
  346. `,
  347. nil,
  348. io.ErrUnexpectedEOF,
  349. },
  350. {
  351. `
  352. FROM foo
  353. SYSTEM """
  354. This is a multiline system with "quotes".
  355. """
  356. `,
  357. []Command{
  358. {Name: "model", Args: "foo"},
  359. {Name: "system", Args: "\nThis is a multiline system with \"quotes\".\n"},
  360. },
  361. nil,
  362. },
  363. {
  364. `
  365. FROM foo
  366. SYSTEM """"""
  367. `,
  368. []Command{
  369. {Name: "model", Args: "foo"},
  370. {Name: "system", Args: ""},
  371. },
  372. nil,
  373. },
  374. {
  375. `
  376. FROM foo
  377. SYSTEM ""
  378. `,
  379. []Command{
  380. {Name: "model", Args: "foo"},
  381. {Name: "system", Args: ""},
  382. },
  383. nil,
  384. },
  385. {
  386. `
  387. FROM foo
  388. SYSTEM "'"
  389. `,
  390. []Command{
  391. {Name: "model", Args: "foo"},
  392. {Name: "system", Args: "'"},
  393. },
  394. nil,
  395. },
  396. {
  397. `
  398. FROM foo
  399. SYSTEM """''"'""'""'"'''''""'""'"""
  400. `,
  401. []Command{
  402. {Name: "model", Args: "foo"},
  403. {Name: "system", Args: `''"'""'""'"'''''""'""'`},
  404. },
  405. nil,
  406. },
  407. {
  408. `
  409. FROM foo
  410. TEMPLATE """
  411. {{ .Prompt }}
  412. """`,
  413. []Command{
  414. {Name: "model", Args: "foo"},
  415. {Name: "template", Args: "\n{{ .Prompt }}\n"},
  416. },
  417. nil,
  418. },
  419. }
  420. for _, c := range cases {
  421. t.Run("", func(t *testing.T) {
  422. modelfile, err := ParseFile(strings.NewReader(c.multiline))
  423. require.ErrorIs(t, err, c.err)
  424. if modelfile != nil {
  425. assert.Equal(t, c.expected, modelfile.Commands)
  426. }
  427. })
  428. }
  429. }
  430. func TestParseFileParameters(t *testing.T) {
  431. cases := map[string]struct {
  432. name, value string
  433. }{
  434. "numa true": {"numa", "true"},
  435. "num_ctx 1": {"num_ctx", "1"},
  436. "num_batch 1": {"num_batch", "1"},
  437. "num_gqa 1": {"num_gqa", "1"},
  438. "num_gpu 1": {"num_gpu", "1"},
  439. "main_gpu 1": {"main_gpu", "1"},
  440. "low_vram true": {"low_vram", "true"},
  441. "logits_all true": {"logits_all", "true"},
  442. "vocab_only true": {"vocab_only", "true"},
  443. "use_mmap true": {"use_mmap", "true"},
  444. "use_mlock true": {"use_mlock", "true"},
  445. "num_thread 1": {"num_thread", "1"},
  446. "num_keep 1": {"num_keep", "1"},
  447. "seed 1": {"seed", "1"},
  448. "num_predict 1": {"num_predict", "1"},
  449. "top_k 1": {"top_k", "1"},
  450. "top_p 1.0": {"top_p", "1.0"},
  451. "min_p 0.05": {"min_p", "0.05"},
  452. "tfs_z 1.0": {"tfs_z", "1.0"},
  453. "typical_p 1.0": {"typical_p", "1.0"},
  454. "repeat_last_n 1": {"repeat_last_n", "1"},
  455. "temperature 1.0": {"temperature", "1.0"},
  456. "repeat_penalty 1.0": {"repeat_penalty", "1.0"},
  457. "presence_penalty 1.0": {"presence_penalty", "1.0"},
  458. "frequency_penalty 1.0": {"frequency_penalty", "1.0"},
  459. "mirostat 1": {"mirostat", "1"},
  460. "mirostat_tau 1.0": {"mirostat_tau", "1.0"},
  461. "mirostat_eta 1.0": {"mirostat_eta", "1.0"},
  462. "penalize_newline true": {"penalize_newline", "true"},
  463. "stop ### User:": {"stop", "### User:"},
  464. "stop ### User: ": {"stop", "### User:"},
  465. "stop \"### User:\"": {"stop", "### User:"},
  466. "stop \"### User: \"": {"stop", "### User: "},
  467. "stop \"\"\"### User:\"\"\"": {"stop", "### User:"},
  468. "stop \"\"\"### User:\n\"\"\"": {"stop", "### User:\n"},
  469. "stop <|endoftext|>": {"stop", "<|endoftext|>"},
  470. "stop <|eot_id|>": {"stop", "<|eot_id|>"},
  471. "stop </s>": {"stop", "</s>"},
  472. }
  473. for k, v := range cases {
  474. t.Run(k, func(t *testing.T) {
  475. var b bytes.Buffer
  476. fmt.Fprintln(&b, "FROM foo")
  477. fmt.Fprintln(&b, "PARAMETER", k)
  478. modelfile, err := ParseFile(&b)
  479. require.NoError(t, err)
  480. assert.Equal(t, []Command{
  481. {Name: "model", Args: "foo"},
  482. {Name: v.name, Args: v.value},
  483. }, modelfile.Commands)
  484. })
  485. }
  486. }
  487. func TestParseFileComments(t *testing.T) {
  488. cases := []struct {
  489. input string
  490. expected []Command
  491. }{
  492. {
  493. `
  494. # comment
  495. FROM foo
  496. `,
  497. []Command{
  498. {Name: "model", Args: "foo"},
  499. },
  500. },
  501. }
  502. for _, c := range cases {
  503. t.Run("", func(t *testing.T) {
  504. modelfile, err := ParseFile(strings.NewReader(c.input))
  505. require.NoError(t, err)
  506. assert.Equal(t, c.expected, modelfile.Commands)
  507. })
  508. }
  509. }
  510. func TestParseFileFormatParseFile(t *testing.T) {
  511. cases := []string{
  512. `
  513. FROM foo
  514. ADAPTER adapter1
  515. LICENSE MIT
  516. PARAMETER param1 value1
  517. PARAMETER param2 value2
  518. TEMPLATE template1
  519. MESSAGE system You are a file parser. Always parse things.
  520. MESSAGE user Hey there!
  521. MESSAGE assistant Hello, I want to parse all the things!
  522. `,
  523. `
  524. FROM foo
  525. ADAPTER adapter1
  526. LICENSE MIT
  527. PARAMETER param1 value1
  528. PARAMETER param2 value2
  529. TEMPLATE template1
  530. MESSAGE system """
  531. You are a store greeter. Always respond with "Hello!".
  532. """
  533. MESSAGE user Hey there!
  534. MESSAGE assistant Hello, I want to parse all the things!
  535. `,
  536. `
  537. FROM foo
  538. ADAPTER adapter1
  539. LICENSE """
  540. Very long and boring legal text.
  541. Blah blah blah.
  542. "Oh look, a quote!"
  543. """
  544. PARAMETER param1 value1
  545. PARAMETER param2 value2
  546. TEMPLATE template1
  547. MESSAGE system """
  548. You are a store greeter. Always respond with "Hello!".
  549. """
  550. MESSAGE user Hey there!
  551. MESSAGE assistant Hello, I want to parse all the things!
  552. `,
  553. `
  554. FROM foo
  555. SYSTEM ""
  556. `,
  557. }
  558. for _, c := range cases {
  559. t.Run("", func(t *testing.T) {
  560. modelfile, err := ParseFile(strings.NewReader(c))
  561. require.NoError(t, err)
  562. modelfile2, err := ParseFile(strings.NewReader(modelfile.String()))
  563. require.NoError(t, err)
  564. assert.Equal(t, modelfile, modelfile2)
  565. })
  566. }
  567. }
  568. func TestParseFileUTF16ParseFile(t *testing.T) {
  569. data := `FROM bob
  570. PARAMETER param1 1
  571. PARAMETER param2 4096
  572. SYSTEM You are a utf16 file.
  573. `
  574. expected := []Command{
  575. {Name: "model", Args: "bob"},
  576. {Name: "param1", Args: "1"},
  577. {Name: "param2", Args: "4096"},
  578. {Name: "system", Args: "You are a utf16 file."},
  579. }
  580. t.Run("le", func(t *testing.T) {
  581. var b bytes.Buffer
  582. require.NoError(t, binary.Write(&b, binary.LittleEndian, []byte{0xff, 0xfe}))
  583. require.NoError(t, binary.Write(&b, binary.LittleEndian, utf16.Encode([]rune(data))))
  584. actual, err := ParseFile(&b)
  585. require.NoError(t, err)
  586. assert.Equal(t, expected, actual.Commands)
  587. })
  588. t.Run("be", func(t *testing.T) {
  589. var b bytes.Buffer
  590. require.NoError(t, binary.Write(&b, binary.BigEndian, []byte{0xfe, 0xff}))
  591. require.NoError(t, binary.Write(&b, binary.BigEndian, utf16.Encode([]rune(data))))
  592. actual, err := ParseFile(&b)
  593. require.NoError(t, err)
  594. assert.Equal(t, expected, actual.Commands)
  595. })
  596. }
  597. func TestParseMultiByte(t *testing.T) {
  598. input := `FROM test
  599. SYSTEM 你好👋`
  600. expect := []Command{
  601. {Name: "model", Args: "test"},
  602. {Name: "system", Args: "你好👋"},
  603. }
  604. encodings := []encoding.Encoding{
  605. unicode.UTF8,
  606. unicode.UTF16(unicode.LittleEndian, unicode.UseBOM),
  607. unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
  608. }
  609. for _, encoding := range encodings {
  610. t.Run(fmt.Sprintf("%s", encoding), func(t *testing.T) {
  611. s, err := encoding.NewEncoder().String(input)
  612. require.NoError(t, err)
  613. actual, err := ParseFile(strings.NewReader(s))
  614. require.NoError(t, err)
  615. assert.Equal(t, expect, actual.Commands)
  616. })
  617. }
  618. }