parser_test.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603
  1. package parser
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "fmt"
  6. "io"
  7. "strings"
  8. "testing"
  9. "unicode/utf16"
  10. "github.com/stretchr/testify/assert"
  11. "github.com/stretchr/testify/require"
  12. "golang.org/x/text/encoding"
  13. "golang.org/x/text/encoding/unicode"
  14. )
  15. func TestParseFileFile(t *testing.T) {
  16. input := `
  17. FROM model1
  18. ADAPTER adapter1
  19. LICENSE MIT
  20. PARAMETER param1 value1
  21. PARAMETER param2 value2
  22. TEMPLATE template1
  23. `
  24. reader := strings.NewReader(input)
  25. modelfile, err := ParseFile(reader)
  26. require.NoError(t, err)
  27. expectedCommands := []Command{
  28. {Name: "model", Args: "model1"},
  29. {Name: "adapter", Args: "adapter1"},
  30. {Name: "license", Args: "MIT"},
  31. {Name: "param1", Args: "value1"},
  32. {Name: "param2", Args: "value2"},
  33. {Name: "template", Args: "template1"},
  34. }
  35. assert.Equal(t, expectedCommands, modelfile.Commands)
  36. }
  37. func TestParseFileFrom(t *testing.T) {
  38. var cases = []struct {
  39. input string
  40. expected []Command
  41. err error
  42. }{
  43. {
  44. "FROM \"FOO BAR \"",
  45. []Command{{Name: "model", Args: "FOO BAR "}},
  46. nil,
  47. },
  48. {
  49. "FROM \"FOO BAR\"\nPARAMETER param1 value1",
  50. []Command{{Name: "model", Args: "FOO BAR"}, {Name: "param1", Args: "value1"}},
  51. nil,
  52. },
  53. {
  54. "FROM FOOO BAR ",
  55. []Command{{Name: "model", Args: "FOOO BAR"}},
  56. nil,
  57. },
  58. {
  59. "FROM /what/is/the path ",
  60. []Command{{Name: "model", Args: "/what/is/the path"}},
  61. nil,
  62. },
  63. {
  64. "FROM foo",
  65. []Command{{Name: "model", Args: "foo"}},
  66. nil,
  67. },
  68. {
  69. "FROM /path/to/model",
  70. []Command{{Name: "model", Args: "/path/to/model"}},
  71. nil,
  72. },
  73. {
  74. "FROM /path/to/model/fp16.bin",
  75. []Command{{Name: "model", Args: "/path/to/model/fp16.bin"}},
  76. nil,
  77. },
  78. {
  79. "FROM llama3:latest",
  80. []Command{{Name: "model", Args: "llama3:latest"}},
  81. nil,
  82. },
  83. {
  84. "FROM llama3:7b-instruct-q4_K_M",
  85. []Command{{Name: "model", Args: "llama3:7b-instruct-q4_K_M"}},
  86. nil,
  87. },
  88. {
  89. "", nil, errMissingFrom,
  90. },
  91. {
  92. "PARAMETER param1 value1",
  93. nil,
  94. errMissingFrom,
  95. },
  96. {
  97. "PARAMETER param1 value1\nFROM foo",
  98. []Command{{Name: "param1", Args: "value1"}, {Name: "model", Args: "foo"}},
  99. nil,
  100. },
  101. {
  102. "PARAMETER what the \nFROM lemons make lemonade ",
  103. []Command{{Name: "what", Args: "the"}, {Name: "model", Args: "lemons make lemonade"}},
  104. nil,
  105. },
  106. }
  107. for _, c := range cases {
  108. t.Run("", func(t *testing.T) {
  109. modelfile, err := ParseFile(strings.NewReader(c.input))
  110. require.ErrorIs(t, err, c.err)
  111. if modelfile != nil {
  112. assert.Equal(t, c.expected, modelfile.Commands)
  113. }
  114. })
  115. }
  116. }
  117. func TestParseFileParametersMissingValue(t *testing.T) {
  118. input := `
  119. FROM foo
  120. PARAMETER param1
  121. `
  122. reader := strings.NewReader(input)
  123. _, err := ParseFile(reader)
  124. require.ErrorIs(t, err, io.ErrUnexpectedEOF)
  125. }
  126. func TestParseFileBadCommand(t *testing.T) {
  127. input := `
  128. FROM foo
  129. BADCOMMAND param1 value1
  130. `
  131. _, err := ParseFile(strings.NewReader(input))
  132. require.ErrorIs(t, err, errInvalidCommand)
  133. }
  134. func TestParseFileMessages(t *testing.T) {
  135. var cases = []struct {
  136. input string
  137. expected []Command
  138. err error
  139. }{
  140. {
  141. `
  142. FROM foo
  143. MESSAGE system You are a file parser. Always parse things.
  144. `,
  145. []Command{
  146. {Name: "model", Args: "foo"},
  147. {Name: "message", Args: "system: You are a file parser. Always parse things."},
  148. },
  149. nil,
  150. },
  151. {
  152. `
  153. FROM foo
  154. MESSAGE system You are a file parser. Always parse things.`,
  155. []Command{
  156. {Name: "model", Args: "foo"},
  157. {Name: "message", Args: "system: You are a file parser. Always parse things."},
  158. },
  159. nil,
  160. },
  161. {
  162. `
  163. FROM foo
  164. MESSAGE system You are a file parser. Always parse things.
  165. MESSAGE user Hey there!
  166. MESSAGE assistant Hello, I want to parse all the things!
  167. `,
  168. []Command{
  169. {Name: "model", Args: "foo"},
  170. {Name: "message", Args: "system: You are a file parser. Always parse things."},
  171. {Name: "message", Args: "user: Hey there!"},
  172. {Name: "message", Args: "assistant: Hello, I want to parse all the things!"},
  173. },
  174. nil,
  175. },
  176. {
  177. `
  178. FROM foo
  179. MESSAGE system """
  180. You are a multiline file parser. Always parse things.
  181. """
  182. `,
  183. []Command{
  184. {Name: "model", Args: "foo"},
  185. {Name: "message", Args: "system: \nYou are a multiline file parser. Always parse things.\n"},
  186. },
  187. nil,
  188. },
  189. {
  190. `
  191. FROM foo
  192. MESSAGE badguy I'm a bad guy!
  193. `,
  194. nil,
  195. errInvalidMessageRole,
  196. },
  197. {
  198. `
  199. FROM foo
  200. MESSAGE system
  201. `,
  202. nil,
  203. io.ErrUnexpectedEOF,
  204. },
  205. {
  206. `
  207. FROM foo
  208. MESSAGE system`,
  209. nil,
  210. io.ErrUnexpectedEOF,
  211. },
  212. }
  213. for _, c := range cases {
  214. t.Run("", func(t *testing.T) {
  215. modelfile, err := ParseFile(strings.NewReader(c.input))
  216. require.ErrorIs(t, err, c.err)
  217. if modelfile != nil {
  218. assert.Equal(t, c.expected, modelfile.Commands)
  219. }
  220. })
  221. }
  222. }
  223. func TestParseFileQuoted(t *testing.T) {
  224. var cases = []struct {
  225. multiline string
  226. expected []Command
  227. err error
  228. }{
  229. {
  230. `
  231. FROM foo
  232. SYSTEM """
  233. This is a
  234. multiline system.
  235. """
  236. `,
  237. []Command{
  238. {Name: "model", Args: "foo"},
  239. {Name: "system", Args: "\nThis is a\nmultiline system.\n"},
  240. },
  241. nil,
  242. },
  243. {
  244. `
  245. FROM foo
  246. SYSTEM """
  247. This is a
  248. multiline system."""
  249. `,
  250. []Command{
  251. {Name: "model", Args: "foo"},
  252. {Name: "system", Args: "\nThis is a\nmultiline system."},
  253. },
  254. nil,
  255. },
  256. {
  257. `
  258. FROM foo
  259. SYSTEM """This is a
  260. multiline system."""
  261. `,
  262. []Command{
  263. {Name: "model", Args: "foo"},
  264. {Name: "system", Args: "This is a\nmultiline system."},
  265. },
  266. nil,
  267. },
  268. {
  269. `
  270. FROM foo
  271. SYSTEM """This is a multiline system."""
  272. `,
  273. []Command{
  274. {Name: "model", Args: "foo"},
  275. {Name: "system", Args: "This is a multiline system."},
  276. },
  277. nil,
  278. },
  279. {
  280. `
  281. FROM foo
  282. SYSTEM """This is a multiline system.""
  283. `,
  284. nil,
  285. io.ErrUnexpectedEOF,
  286. },
  287. {
  288. `
  289. FROM foo
  290. SYSTEM "
  291. `,
  292. nil,
  293. io.ErrUnexpectedEOF,
  294. },
  295. {
  296. `
  297. FROM foo
  298. SYSTEM """
  299. This is a multiline system with "quotes".
  300. """
  301. `,
  302. []Command{
  303. {Name: "model", Args: "foo"},
  304. {Name: "system", Args: "\nThis is a multiline system with \"quotes\".\n"},
  305. },
  306. nil,
  307. },
  308. {
  309. `
  310. FROM foo
  311. SYSTEM """"""
  312. `,
  313. []Command{
  314. {Name: "model", Args: "foo"},
  315. {Name: "system", Args: ""},
  316. },
  317. nil,
  318. },
  319. {
  320. `
  321. FROM foo
  322. SYSTEM ""
  323. `,
  324. []Command{
  325. {Name: "model", Args: "foo"},
  326. {Name: "system", Args: ""},
  327. },
  328. nil,
  329. },
  330. {
  331. `
  332. FROM foo
  333. SYSTEM "'"
  334. `,
  335. []Command{
  336. {Name: "model", Args: "foo"},
  337. {Name: "system", Args: "'"},
  338. },
  339. nil,
  340. },
  341. {
  342. `
  343. FROM foo
  344. SYSTEM """''"'""'""'"'''''""'""'"""
  345. `,
  346. []Command{
  347. {Name: "model", Args: "foo"},
  348. {Name: "system", Args: `''"'""'""'"'''''""'""'`},
  349. },
  350. nil,
  351. },
  352. {
  353. `
  354. FROM foo
  355. TEMPLATE """
  356. {{ .Prompt }}
  357. """`,
  358. []Command{
  359. {Name: "model", Args: "foo"},
  360. {Name: "template", Args: "\n{{ .Prompt }}\n"},
  361. },
  362. nil,
  363. },
  364. }
  365. for _, c := range cases {
  366. t.Run("", func(t *testing.T) {
  367. modelfile, err := ParseFile(strings.NewReader(c.multiline))
  368. require.ErrorIs(t, err, c.err)
  369. if modelfile != nil {
  370. assert.Equal(t, c.expected, modelfile.Commands)
  371. }
  372. })
  373. }
  374. }
  375. func TestParseFileParameters(t *testing.T) {
  376. var cases = map[string]struct {
  377. name, value string
  378. }{
  379. "numa true": {"numa", "true"},
  380. "num_ctx 1": {"num_ctx", "1"},
  381. "num_batch 1": {"num_batch", "1"},
  382. "num_gqa 1": {"num_gqa", "1"},
  383. "num_gpu 1": {"num_gpu", "1"},
  384. "main_gpu 1": {"main_gpu", "1"},
  385. "low_vram true": {"low_vram", "true"},
  386. "f16_kv true": {"f16_kv", "true"},
  387. "logits_all true": {"logits_all", "true"},
  388. "vocab_only true": {"vocab_only", "true"},
  389. "use_mmap true": {"use_mmap", "true"},
  390. "use_mlock true": {"use_mlock", "true"},
  391. "num_thread 1": {"num_thread", "1"},
  392. "num_keep 1": {"num_keep", "1"},
  393. "seed 1": {"seed", "1"},
  394. "num_predict 1": {"num_predict", "1"},
  395. "top_k 1": {"top_k", "1"},
  396. "top_p 1.0": {"top_p", "1.0"},
  397. "tfs_z 1.0": {"tfs_z", "1.0"},
  398. "typical_p 1.0": {"typical_p", "1.0"},
  399. "repeat_last_n 1": {"repeat_last_n", "1"},
  400. "temperature 1.0": {"temperature", "1.0"},
  401. "repeat_penalty 1.0": {"repeat_penalty", "1.0"},
  402. "presence_penalty 1.0": {"presence_penalty", "1.0"},
  403. "frequency_penalty 1.0": {"frequency_penalty", "1.0"},
  404. "mirostat 1": {"mirostat", "1"},
  405. "mirostat_tau 1.0": {"mirostat_tau", "1.0"},
  406. "mirostat_eta 1.0": {"mirostat_eta", "1.0"},
  407. "penalize_newline true": {"penalize_newline", "true"},
  408. "stop ### User:": {"stop", "### User:"},
  409. "stop ### User: ": {"stop", "### User:"},
  410. "stop \"### User:\"": {"stop", "### User:"},
  411. "stop \"### User: \"": {"stop", "### User: "},
  412. "stop \"\"\"### User:\"\"\"": {"stop", "### User:"},
  413. "stop \"\"\"### User:\n\"\"\"": {"stop", "### User:\n"},
  414. "stop <|endoftext|>": {"stop", "<|endoftext|>"},
  415. "stop <|eot_id|>": {"stop", "<|eot_id|>"},
  416. "stop </s>": {"stop", "</s>"},
  417. }
  418. for k, v := range cases {
  419. t.Run(k, func(t *testing.T) {
  420. var b bytes.Buffer
  421. fmt.Fprintln(&b, "FROM foo")
  422. fmt.Fprintln(&b, "PARAMETER", k)
  423. modelfile, err := ParseFile(&b)
  424. require.NoError(t, err)
  425. assert.Equal(t, []Command{
  426. {Name: "model", Args: "foo"},
  427. {Name: v.name, Args: v.value},
  428. }, modelfile.Commands)
  429. })
  430. }
  431. }
  432. func TestParseFileComments(t *testing.T) {
  433. var cases = []struct {
  434. input string
  435. expected []Command
  436. }{
  437. {
  438. `
  439. # comment
  440. FROM foo
  441. `,
  442. []Command{
  443. {Name: "model", Args: "foo"},
  444. },
  445. },
  446. }
  447. for _, c := range cases {
  448. t.Run("", func(t *testing.T) {
  449. modelfile, err := ParseFile(strings.NewReader(c.input))
  450. require.NoError(t, err)
  451. assert.Equal(t, c.expected, modelfile.Commands)
  452. })
  453. }
  454. }
  455. func TestParseFileFormatParseFile(t *testing.T) {
  456. var cases = []string{
  457. `
  458. FROM foo
  459. ADAPTER adapter1
  460. LICENSE MIT
  461. PARAMETER param1 value1
  462. PARAMETER param2 value2
  463. TEMPLATE template1
  464. MESSAGE system You are a file parser. Always parse things.
  465. MESSAGE user Hey there!
  466. MESSAGE assistant Hello, I want to parse all the things!
  467. `,
  468. `
  469. FROM foo
  470. ADAPTER adapter1
  471. LICENSE MIT
  472. PARAMETER param1 value1
  473. PARAMETER param2 value2
  474. TEMPLATE template1
  475. MESSAGE system """
  476. You are a store greeter. Always responsed with "Hello!".
  477. """
  478. MESSAGE user Hey there!
  479. MESSAGE assistant Hello, I want to parse all the things!
  480. `,
  481. `
  482. FROM foo
  483. ADAPTER adapter1
  484. LICENSE """
  485. Very long and boring legal text.
  486. Blah blah blah.
  487. "Oh look, a quote!"
  488. """
  489. PARAMETER param1 value1
  490. PARAMETER param2 value2
  491. TEMPLATE template1
  492. MESSAGE system """
  493. You are a store greeter. Always responsed with "Hello!".
  494. """
  495. MESSAGE user Hey there!
  496. MESSAGE assistant Hello, I want to parse all the things!
  497. `,
  498. `
  499. FROM foo
  500. SYSTEM ""
  501. `,
  502. }
  503. for _, c := range cases {
  504. t.Run("", func(t *testing.T) {
  505. modelfile, err := ParseFile(strings.NewReader(c))
  506. require.NoError(t, err)
  507. modelfile2, err := ParseFile(strings.NewReader(modelfile.String()))
  508. require.NoError(t, err)
  509. assert.Equal(t, modelfile, modelfile2)
  510. })
  511. }
  512. }
  513. func TestParseFileUTF16ParseFile(t *testing.T) {
  514. data := `FROM bob
  515. PARAMETER param1 1
  516. PARAMETER param2 4096
  517. SYSTEM You are a utf16 file.
  518. `
  519. expected := []Command{
  520. {Name: "model", Args: "bob"},
  521. {Name: "param1", Args: "1"},
  522. {Name: "param2", Args: "4096"},
  523. {Name: "system", Args: "You are a utf16 file."},
  524. }
  525. t.Run("le", func(t *testing.T) {
  526. var b bytes.Buffer
  527. require.NoError(t, binary.Write(&b, binary.LittleEndian, []byte{0xff, 0xfe}))
  528. require.NoError(t, binary.Write(&b, binary.LittleEndian, utf16.Encode([]rune(data))))
  529. actual, err := ParseFile(&b)
  530. require.NoError(t, err)
  531. assert.Equal(t, expected, actual.Commands)
  532. })
  533. t.Run("be", func(t *testing.T) {
  534. var b bytes.Buffer
  535. require.NoError(t, binary.Write(&b, binary.BigEndian, []byte{0xfe, 0xff}))
  536. require.NoError(t, binary.Write(&b, binary.BigEndian, utf16.Encode([]rune(data))))
  537. actual, err := ParseFile(&b)
  538. require.NoError(t, err)
  539. assert.Equal(t, expected, actual.Commands)
  540. })
  541. }
  542. func TestParseMultiByte(t *testing.T) {
  543. input := `FROM test
  544. SYSTEM 你好👋`
  545. expect := []Command{
  546. {Name: "model", Args: "test"},
  547. {Name: "system", Args: "你好👋"},
  548. }
  549. encodings := []encoding.Encoding{
  550. unicode.UTF8,
  551. unicode.UTF16(unicode.LittleEndian, unicode.UseBOM),
  552. unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
  553. }
  554. for _, encoding := range encodings {
  555. t.Run(fmt.Sprintf("%s", encoding), func(t *testing.T) {
  556. s, err := encoding.NewEncoder().String(input)
  557. require.NoError(t, err)
  558. actual, err := ParseFile(strings.NewReader(s))
  559. require.NoError(t, err)
  560. assert.Equal(t, expect, actual.Commands)
  561. })
  562. }
  563. }