parser_test.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609
  1. package parser
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "fmt"
  6. "io"
  7. "strings"
  8. "testing"
  9. "unicode/utf16"
  10. "github.com/stretchr/testify/assert"
  11. "github.com/stretchr/testify/require"
  12. "golang.org/x/text/encoding"
  13. "golang.org/x/text/encoding/unicode"
  14. )
  15. func TestParseFileFile(t *testing.T) {
  16. input := `
  17. FROM model1
  18. ADAPTER adapter1
  19. LICENSE MIT
  20. PARAMETER param1 value1
  21. PARAMETER param2 value2
  22. TEMPLATE """{{ if .System }}<|start_header_id|>system<|end_header_id|>
  23. {{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>
  24. {{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>
  25. {{ .Response }}<|eot_id|>"""
  26. `
  27. reader := strings.NewReader(input)
  28. modelfile, err := ParseFile(reader)
  29. require.NoError(t, err)
  30. expectedCommands := []Command{
  31. {Name: "model", Args: "model1"},
  32. {Name: "adapter", Args: "adapter1"},
  33. {Name: "license", Args: "MIT"},
  34. {Name: "param1", Args: "value1"},
  35. {Name: "param2", Args: "value2"},
  36. {Name: "template", Args: "{{ if .System }}<|start_header_id|>system<|end_header_id|>\n\n{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>\n\n{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>\n\n{{ .Response }}<|eot_id|>"},
  37. }
  38. assert.Equal(t, expectedCommands, modelfile.Commands)
  39. }
  40. func TestParseFileFrom(t *testing.T) {
  41. var cases = []struct {
  42. input string
  43. expected []Command
  44. err error
  45. }{
  46. {
  47. "FROM \"FOO BAR \"",
  48. []Command{{Name: "model", Args: "FOO BAR "}},
  49. nil,
  50. },
  51. {
  52. "FROM \"FOO BAR\"\nPARAMETER param1 value1",
  53. []Command{{Name: "model", Args: "FOO BAR"}, {Name: "param1", Args: "value1"}},
  54. nil,
  55. },
  56. {
  57. "FROM FOOO BAR ",
  58. []Command{{Name: "model", Args: "FOOO BAR"}},
  59. nil,
  60. },
  61. {
  62. "FROM /what/is/the path ",
  63. []Command{{Name: "model", Args: "/what/is/the path"}},
  64. nil,
  65. },
  66. {
  67. "FROM foo",
  68. []Command{{Name: "model", Args: "foo"}},
  69. nil,
  70. },
  71. {
  72. "FROM /path/to/model",
  73. []Command{{Name: "model", Args: "/path/to/model"}},
  74. nil,
  75. },
  76. {
  77. "FROM /path/to/model/fp16.bin",
  78. []Command{{Name: "model", Args: "/path/to/model/fp16.bin"}},
  79. nil,
  80. },
  81. {
  82. "FROM llama3:latest",
  83. []Command{{Name: "model", Args: "llama3:latest"}},
  84. nil,
  85. },
  86. {
  87. "FROM llama3:7b-instruct-q4_K_M",
  88. []Command{{Name: "model", Args: "llama3:7b-instruct-q4_K_M"}},
  89. nil,
  90. },
  91. {
  92. "", nil, errMissingFrom,
  93. },
  94. {
  95. "PARAMETER param1 value1",
  96. nil,
  97. errMissingFrom,
  98. },
  99. {
  100. "PARAMETER param1 value1\nFROM foo",
  101. []Command{{Name: "param1", Args: "value1"}, {Name: "model", Args: "foo"}},
  102. nil,
  103. },
  104. {
  105. "PARAMETER what the \nFROM lemons make lemonade ",
  106. []Command{{Name: "what", Args: "the"}, {Name: "model", Args: "lemons make lemonade"}},
  107. nil,
  108. },
  109. }
  110. for _, c := range cases {
  111. t.Run("", func(t *testing.T) {
  112. modelfile, err := ParseFile(strings.NewReader(c.input))
  113. require.ErrorIs(t, err, c.err)
  114. if modelfile != nil {
  115. assert.Equal(t, c.expected, modelfile.Commands)
  116. }
  117. })
  118. }
  119. }
  120. func TestParseFileParametersMissingValue(t *testing.T) {
  121. input := `
  122. FROM foo
  123. PARAMETER param1
  124. `
  125. reader := strings.NewReader(input)
  126. _, err := ParseFile(reader)
  127. require.ErrorIs(t, err, io.ErrUnexpectedEOF)
  128. }
  129. func TestParseFileBadCommand(t *testing.T) {
  130. input := `
  131. FROM foo
  132. BADCOMMAND param1 value1
  133. `
  134. _, err := ParseFile(strings.NewReader(input))
  135. require.ErrorIs(t, err, errInvalidCommand)
  136. }
  137. func TestParseFileMessages(t *testing.T) {
  138. var cases = []struct {
  139. input string
  140. expected []Command
  141. err error
  142. }{
  143. {
  144. `
  145. FROM foo
  146. MESSAGE system You are a file parser. Always parse things.
  147. `,
  148. []Command{
  149. {Name: "model", Args: "foo"},
  150. {Name: "message", Args: "system: You are a file parser. Always parse things."},
  151. },
  152. nil,
  153. },
  154. {
  155. `
  156. FROM foo
  157. MESSAGE system You are a file parser. Always parse things.`,
  158. []Command{
  159. {Name: "model", Args: "foo"},
  160. {Name: "message", Args: "system: You are a file parser. Always parse things."},
  161. },
  162. nil,
  163. },
  164. {
  165. `
  166. FROM foo
  167. MESSAGE system You are a file parser. Always parse things.
  168. MESSAGE user Hey there!
  169. MESSAGE assistant Hello, I want to parse all the things!
  170. `,
  171. []Command{
  172. {Name: "model", Args: "foo"},
  173. {Name: "message", Args: "system: You are a file parser. Always parse things."},
  174. {Name: "message", Args: "user: Hey there!"},
  175. {Name: "message", Args: "assistant: Hello, I want to parse all the things!"},
  176. },
  177. nil,
  178. },
  179. {
  180. `
  181. FROM foo
  182. MESSAGE system """
  183. You are a multiline file parser. Always parse things.
  184. """
  185. `,
  186. []Command{
  187. {Name: "model", Args: "foo"},
  188. {Name: "message", Args: "system: \nYou are a multiline file parser. Always parse things.\n"},
  189. },
  190. nil,
  191. },
  192. {
  193. `
  194. FROM foo
  195. MESSAGE badguy I'm a bad guy!
  196. `,
  197. nil,
  198. errInvalidMessageRole,
  199. },
  200. {
  201. `
  202. FROM foo
  203. MESSAGE system
  204. `,
  205. nil,
  206. io.ErrUnexpectedEOF,
  207. },
  208. {
  209. `
  210. FROM foo
  211. MESSAGE system`,
  212. nil,
  213. io.ErrUnexpectedEOF,
  214. },
  215. }
  216. for _, c := range cases {
  217. t.Run("", func(t *testing.T) {
  218. modelfile, err := ParseFile(strings.NewReader(c.input))
  219. require.ErrorIs(t, err, c.err)
  220. if modelfile != nil {
  221. assert.Equal(t, c.expected, modelfile.Commands)
  222. }
  223. })
  224. }
  225. }
  226. func TestParseFileQuoted(t *testing.T) {
  227. var cases = []struct {
  228. multiline string
  229. expected []Command
  230. err error
  231. }{
  232. {
  233. `
  234. FROM foo
  235. SYSTEM """
  236. This is a
  237. multiline system.
  238. """
  239. `,
  240. []Command{
  241. {Name: "model", Args: "foo"},
  242. {Name: "system", Args: "\nThis is a\nmultiline system.\n"},
  243. },
  244. nil,
  245. },
  246. {
  247. `
  248. FROM foo
  249. SYSTEM """
  250. This is a
  251. multiline system."""
  252. `,
  253. []Command{
  254. {Name: "model", Args: "foo"},
  255. {Name: "system", Args: "\nThis is a\nmultiline system."},
  256. },
  257. nil,
  258. },
  259. {
  260. `
  261. FROM foo
  262. SYSTEM """This is a
  263. multiline system."""
  264. `,
  265. []Command{
  266. {Name: "model", Args: "foo"},
  267. {Name: "system", Args: "This is a\nmultiline system."},
  268. },
  269. nil,
  270. },
  271. {
  272. `
  273. FROM foo
  274. SYSTEM """This is a multiline system."""
  275. `,
  276. []Command{
  277. {Name: "model", Args: "foo"},
  278. {Name: "system", Args: "This is a multiline system."},
  279. },
  280. nil,
  281. },
  282. {
  283. `
  284. FROM foo
  285. SYSTEM """This is a multiline system.""
  286. `,
  287. nil,
  288. io.ErrUnexpectedEOF,
  289. },
  290. {
  291. `
  292. FROM foo
  293. SYSTEM "
  294. `,
  295. nil,
  296. io.ErrUnexpectedEOF,
  297. },
  298. {
  299. `
  300. FROM foo
  301. SYSTEM """
  302. This is a multiline system with "quotes".
  303. """
  304. `,
  305. []Command{
  306. {Name: "model", Args: "foo"},
  307. {Name: "system", Args: "\nThis is a multiline system with \"quotes\".\n"},
  308. },
  309. nil,
  310. },
  311. {
  312. `
  313. FROM foo
  314. SYSTEM """"""
  315. `,
  316. []Command{
  317. {Name: "model", Args: "foo"},
  318. {Name: "system", Args: ""},
  319. },
  320. nil,
  321. },
  322. {
  323. `
  324. FROM foo
  325. SYSTEM ""
  326. `,
  327. []Command{
  328. {Name: "model", Args: "foo"},
  329. {Name: "system", Args: ""},
  330. },
  331. nil,
  332. },
  333. {
  334. `
  335. FROM foo
  336. SYSTEM "'"
  337. `,
  338. []Command{
  339. {Name: "model", Args: "foo"},
  340. {Name: "system", Args: "'"},
  341. },
  342. nil,
  343. },
  344. {
  345. `
  346. FROM foo
  347. SYSTEM """''"'""'""'"'''''""'""'"""
  348. `,
  349. []Command{
  350. {Name: "model", Args: "foo"},
  351. {Name: "system", Args: `''"'""'""'"'''''""'""'`},
  352. },
  353. nil,
  354. },
  355. {
  356. `
  357. FROM foo
  358. TEMPLATE """
  359. {{ .Prompt }}
  360. """`,
  361. []Command{
  362. {Name: "model", Args: "foo"},
  363. {Name: "template", Args: "\n{{ .Prompt }}\n"},
  364. },
  365. nil,
  366. },
  367. }
  368. for _, c := range cases {
  369. t.Run("", func(t *testing.T) {
  370. modelfile, err := ParseFile(strings.NewReader(c.multiline))
  371. require.ErrorIs(t, err, c.err)
  372. if modelfile != nil {
  373. assert.Equal(t, c.expected, modelfile.Commands)
  374. }
  375. })
  376. }
  377. }
  378. func TestParseFileParameters(t *testing.T) {
  379. var cases = map[string]struct {
  380. name, value string
  381. }{
  382. "numa true": {"numa", "true"},
  383. "num_ctx 1": {"num_ctx", "1"},
  384. "num_batch 1": {"num_batch", "1"},
  385. "num_gqa 1": {"num_gqa", "1"},
  386. "num_gpu 1": {"num_gpu", "1"},
  387. "main_gpu 1": {"main_gpu", "1"},
  388. "low_vram true": {"low_vram", "true"},
  389. "f16_kv true": {"f16_kv", "true"},
  390. "logits_all true": {"logits_all", "true"},
  391. "vocab_only true": {"vocab_only", "true"},
  392. "use_mmap true": {"use_mmap", "true"},
  393. "use_mlock true": {"use_mlock", "true"},
  394. "num_thread 1": {"num_thread", "1"},
  395. "num_keep 1": {"num_keep", "1"},
  396. "seed 1": {"seed", "1"},
  397. "num_predict 1": {"num_predict", "1"},
  398. "top_k 1": {"top_k", "1"},
  399. "top_p 1.0": {"top_p", "1.0"},
  400. "tfs_z 1.0": {"tfs_z", "1.0"},
  401. "typical_p 1.0": {"typical_p", "1.0"},
  402. "repeat_last_n 1": {"repeat_last_n", "1"},
  403. "temperature 1.0": {"temperature", "1.0"},
  404. "repeat_penalty 1.0": {"repeat_penalty", "1.0"},
  405. "presence_penalty 1.0": {"presence_penalty", "1.0"},
  406. "frequency_penalty 1.0": {"frequency_penalty", "1.0"},
  407. "mirostat 1": {"mirostat", "1"},
  408. "mirostat_tau 1.0": {"mirostat_tau", "1.0"},
  409. "mirostat_eta 1.0": {"mirostat_eta", "1.0"},
  410. "penalize_newline true": {"penalize_newline", "true"},
  411. "stop ### User:": {"stop", "### User:"},
  412. "stop ### User: ": {"stop", "### User:"},
  413. "stop \"### User:\"": {"stop", "### User:"},
  414. "stop \"### User: \"": {"stop", "### User: "},
  415. "stop \"\"\"### User:\"\"\"": {"stop", "### User:"},
  416. "stop \"\"\"### User:\n\"\"\"": {"stop", "### User:\n"},
  417. "stop <|endoftext|>": {"stop", "<|endoftext|>"},
  418. "stop <|eot_id|>": {"stop", "<|eot_id|>"},
  419. "stop </s>": {"stop", "</s>"},
  420. }
  421. for k, v := range cases {
  422. t.Run(k, func(t *testing.T) {
  423. var b bytes.Buffer
  424. fmt.Fprintln(&b, "FROM foo")
  425. fmt.Fprintln(&b, "PARAMETER", k)
  426. modelfile, err := ParseFile(&b)
  427. require.NoError(t, err)
  428. assert.Equal(t, []Command{
  429. {Name: "model", Args: "foo"},
  430. {Name: v.name, Args: v.value},
  431. }, modelfile.Commands)
  432. })
  433. }
  434. }
  435. func TestParseFileComments(t *testing.T) {
  436. var cases = []struct {
  437. input string
  438. expected []Command
  439. }{
  440. {
  441. `
  442. # comment
  443. FROM foo
  444. `,
  445. []Command{
  446. {Name: "model", Args: "foo"},
  447. },
  448. },
  449. }
  450. for _, c := range cases {
  451. t.Run("", func(t *testing.T) {
  452. modelfile, err := ParseFile(strings.NewReader(c.input))
  453. require.NoError(t, err)
  454. assert.Equal(t, c.expected, modelfile.Commands)
  455. })
  456. }
  457. }
  458. func TestParseFileFormatParseFile(t *testing.T) {
  459. var cases = []string{
  460. `
  461. FROM foo
  462. ADAPTER adapter1
  463. LICENSE MIT
  464. PARAMETER param1 value1
  465. PARAMETER param2 value2
  466. TEMPLATE template1
  467. MESSAGE system You are a file parser. Always parse things.
  468. MESSAGE user Hey there!
  469. MESSAGE assistant Hello, I want to parse all the things!
  470. `,
  471. `
  472. FROM foo
  473. ADAPTER adapter1
  474. LICENSE MIT
  475. PARAMETER param1 value1
  476. PARAMETER param2 value2
  477. TEMPLATE template1
  478. MESSAGE system """
  479. You are a store greeter. Always responsed with "Hello!".
  480. """
  481. MESSAGE user Hey there!
  482. MESSAGE assistant Hello, I want to parse all the things!
  483. `,
  484. `
  485. FROM foo
  486. ADAPTER adapter1
  487. LICENSE """
  488. Very long and boring legal text.
  489. Blah blah blah.
  490. "Oh look, a quote!"
  491. """
  492. PARAMETER param1 value1
  493. PARAMETER param2 value2
  494. TEMPLATE template1
  495. MESSAGE system """
  496. You are a store greeter. Always responsed with "Hello!".
  497. """
  498. MESSAGE user Hey there!
  499. MESSAGE assistant Hello, I want to parse all the things!
  500. `,
  501. `
  502. FROM foo
  503. SYSTEM ""
  504. `,
  505. }
  506. for _, c := range cases {
  507. t.Run("", func(t *testing.T) {
  508. modelfile, err := ParseFile(strings.NewReader(c))
  509. require.NoError(t, err)
  510. modelfile2, err := ParseFile(strings.NewReader(modelfile.String()))
  511. require.NoError(t, err)
  512. assert.Equal(t, modelfile, modelfile2)
  513. })
  514. }
  515. }
  516. func TestParseFileUTF16ParseFile(t *testing.T) {
  517. data := `FROM bob
  518. PARAMETER param1 1
  519. PARAMETER param2 4096
  520. SYSTEM You are a utf16 file.
  521. `
  522. expected := []Command{
  523. {Name: "model", Args: "bob"},
  524. {Name: "param1", Args: "1"},
  525. {Name: "param2", Args: "4096"},
  526. {Name: "system", Args: "You are a utf16 file."},
  527. }
  528. t.Run("le", func(t *testing.T) {
  529. var b bytes.Buffer
  530. require.NoError(t, binary.Write(&b, binary.LittleEndian, []byte{0xff, 0xfe}))
  531. require.NoError(t, binary.Write(&b, binary.LittleEndian, utf16.Encode([]rune(data))))
  532. actual, err := ParseFile(&b)
  533. require.NoError(t, err)
  534. assert.Equal(t, expected, actual.Commands)
  535. })
  536. t.Run("be", func(t *testing.T) {
  537. var b bytes.Buffer
  538. require.NoError(t, binary.Write(&b, binary.BigEndian, []byte{0xfe, 0xff}))
  539. require.NoError(t, binary.Write(&b, binary.BigEndian, utf16.Encode([]rune(data))))
  540. actual, err := ParseFile(&b)
  541. require.NoError(t, err)
  542. assert.Equal(t, expected, actual.Commands)
  543. })
  544. }
  545. func TestParseMultiByte(t *testing.T) {
  546. input := `FROM test
  547. SYSTEM 你好👋`
  548. expect := []Command{
  549. {Name: "model", Args: "test"},
  550. {Name: "system", Args: "你好👋"},
  551. }
  552. encodings := []encoding.Encoding{
  553. unicode.UTF8,
  554. unicode.UTF16(unicode.LittleEndian, unicode.UseBOM),
  555. unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
  556. }
  557. for _, encoding := range encodings {
  558. t.Run(fmt.Sprintf("%s", encoding), func(t *testing.T) {
  559. s, err := encoding.NewEncoder().String(input)
  560. require.NoError(t, err)
  561. actual, err := ParseFile(strings.NewReader(s))
  562. require.NoError(t, err)
  563. assert.Equal(t, expect, actual.Commands)
  564. })
  565. }
  566. }