parser_test.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578
  1. package parser
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "fmt"
  6. "io"
  7. "strings"
  8. "testing"
  9. "unicode/utf16"
  10. "github.com/stretchr/testify/assert"
  11. "github.com/stretchr/testify/require"
  12. "golang.org/x/text/encoding"
  13. "golang.org/x/text/encoding/unicode"
  14. )
  15. func TestParseFileFile(t *testing.T) {
  16. input := `
  17. FROM model1
  18. ADAPTER adapter1
  19. LICENSE MIT
  20. PARAMETER param1 value1
  21. PARAMETER param2 value2
  22. TEMPLATE template1
  23. `
  24. reader := strings.NewReader(input)
  25. modelfile, err := ParseFile(reader)
  26. require.NoError(t, err)
  27. expectedCommands := []Command{
  28. {Name: "model", Args: "model1"},
  29. {Name: "adapter", Args: "adapter1"},
  30. {Name: "license", Args: "MIT"},
  31. {Name: "param1", Args: "value1"},
  32. {Name: "param2", Args: "value2"},
  33. {Name: "template", Args: "template1"},
  34. }
  35. assert.Equal(t, expectedCommands, modelfile.Commands)
  36. }
  37. func TestParseFileFrom(t *testing.T) {
  38. var cases = []struct {
  39. input string
  40. expected []Command
  41. err error
  42. }{
  43. {
  44. "FROM foo",
  45. []Command{{Name: "model", Args: "foo"}},
  46. nil,
  47. },
  48. {
  49. "FROM /path/to/model",
  50. []Command{{Name: "model", Args: "/path/to/model"}},
  51. nil,
  52. },
  53. {
  54. "FROM /path/to/model/fp16.bin",
  55. []Command{{Name: "model", Args: "/path/to/model/fp16.bin"}},
  56. nil,
  57. },
  58. {
  59. "FROM llama3:latest",
  60. []Command{{Name: "model", Args: "llama3:latest"}},
  61. nil,
  62. },
  63. {
  64. "FROM llama3:7b-instruct-q4_K_M",
  65. []Command{{Name: "model", Args: "llama3:7b-instruct-q4_K_M"}},
  66. nil,
  67. },
  68. {
  69. "", nil, errMissingFrom,
  70. },
  71. {
  72. "PARAMETER param1 value1",
  73. nil,
  74. errMissingFrom,
  75. },
  76. {
  77. "PARAMETER param1 value1\nFROM foo",
  78. []Command{{Name: "param1", Args: "value1"}, {Name: "model", Args: "foo"}},
  79. nil,
  80. },
  81. }
  82. for _, c := range cases {
  83. t.Run("", func(t *testing.T) {
  84. modelfile, err := ParseFile(strings.NewReader(c.input))
  85. require.ErrorIs(t, err, c.err)
  86. if modelfile != nil {
  87. assert.Equal(t, c.expected, modelfile.Commands)
  88. }
  89. })
  90. }
  91. }
  92. func TestParseFileParametersMissingValue(t *testing.T) {
  93. input := `
  94. FROM foo
  95. PARAMETER param1
  96. `
  97. reader := strings.NewReader(input)
  98. _, err := ParseFile(reader)
  99. require.ErrorIs(t, err, io.ErrUnexpectedEOF)
  100. }
  101. func TestParseFileBadCommand(t *testing.T) {
  102. input := `
  103. FROM foo
  104. BADCOMMAND param1 value1
  105. `
  106. _, err := ParseFile(strings.NewReader(input))
  107. require.ErrorIs(t, err, errInvalidCommand)
  108. }
  109. func TestParseFileMessages(t *testing.T) {
  110. var cases = []struct {
  111. input string
  112. expected []Command
  113. err error
  114. }{
  115. {
  116. `
  117. FROM foo
  118. MESSAGE system You are a file parser. Always parse things.
  119. `,
  120. []Command{
  121. {Name: "model", Args: "foo"},
  122. {Name: "message", Args: "system: You are a file parser. Always parse things."},
  123. },
  124. nil,
  125. },
  126. {
  127. `
  128. FROM foo
  129. MESSAGE system You are a file parser. Always parse things.`,
  130. []Command{
  131. {Name: "model", Args: "foo"},
  132. {Name: "message", Args: "system: You are a file parser. Always parse things."},
  133. },
  134. nil,
  135. },
  136. {
  137. `
  138. FROM foo
  139. MESSAGE system You are a file parser. Always parse things.
  140. MESSAGE user Hey there!
  141. MESSAGE assistant Hello, I want to parse all the things!
  142. `,
  143. []Command{
  144. {Name: "model", Args: "foo"},
  145. {Name: "message", Args: "system: You are a file parser. Always parse things."},
  146. {Name: "message", Args: "user: Hey there!"},
  147. {Name: "message", Args: "assistant: Hello, I want to parse all the things!"},
  148. },
  149. nil,
  150. },
  151. {
  152. `
  153. FROM foo
  154. MESSAGE system """
  155. You are a multiline file parser. Always parse things.
  156. """
  157. `,
  158. []Command{
  159. {Name: "model", Args: "foo"},
  160. {Name: "message", Args: "system: \nYou are a multiline file parser. Always parse things.\n"},
  161. },
  162. nil,
  163. },
  164. {
  165. `
  166. FROM foo
  167. MESSAGE badguy I'm a bad guy!
  168. `,
  169. nil,
  170. errInvalidMessageRole,
  171. },
  172. {
  173. `
  174. FROM foo
  175. MESSAGE system
  176. `,
  177. nil,
  178. io.ErrUnexpectedEOF,
  179. },
  180. {
  181. `
  182. FROM foo
  183. MESSAGE system`,
  184. nil,
  185. io.ErrUnexpectedEOF,
  186. },
  187. }
  188. for _, c := range cases {
  189. t.Run("", func(t *testing.T) {
  190. modelfile, err := ParseFile(strings.NewReader(c.input))
  191. require.ErrorIs(t, err, c.err)
  192. if modelfile != nil {
  193. assert.Equal(t, c.expected, modelfile.Commands)
  194. }
  195. })
  196. }
  197. }
  198. func TestParseFileQuoted(t *testing.T) {
  199. var cases = []struct {
  200. multiline string
  201. expected []Command
  202. err error
  203. }{
  204. {
  205. `
  206. FROM foo
  207. SYSTEM """
  208. This is a
  209. multiline system.
  210. """
  211. `,
  212. []Command{
  213. {Name: "model", Args: "foo"},
  214. {Name: "system", Args: "\nThis is a\nmultiline system.\n"},
  215. },
  216. nil,
  217. },
  218. {
  219. `
  220. FROM foo
  221. SYSTEM """
  222. This is a
  223. multiline system."""
  224. `,
  225. []Command{
  226. {Name: "model", Args: "foo"},
  227. {Name: "system", Args: "\nThis is a\nmultiline system."},
  228. },
  229. nil,
  230. },
  231. {
  232. `
  233. FROM foo
  234. SYSTEM """This is a
  235. multiline system."""
  236. `,
  237. []Command{
  238. {Name: "model", Args: "foo"},
  239. {Name: "system", Args: "This is a\nmultiline system."},
  240. },
  241. nil,
  242. },
  243. {
  244. `
  245. FROM foo
  246. SYSTEM """This is a multiline system."""
  247. `,
  248. []Command{
  249. {Name: "model", Args: "foo"},
  250. {Name: "system", Args: "This is a multiline system."},
  251. },
  252. nil,
  253. },
  254. {
  255. `
  256. FROM foo
  257. SYSTEM """This is a multiline system.""
  258. `,
  259. nil,
  260. io.ErrUnexpectedEOF,
  261. },
  262. {
  263. `
  264. FROM foo
  265. SYSTEM "
  266. `,
  267. nil,
  268. io.ErrUnexpectedEOF,
  269. },
  270. {
  271. `
  272. FROM foo
  273. SYSTEM """
  274. This is a multiline system with "quotes".
  275. """
  276. `,
  277. []Command{
  278. {Name: "model", Args: "foo"},
  279. {Name: "system", Args: "\nThis is a multiline system with \"quotes\".\n"},
  280. },
  281. nil,
  282. },
  283. {
  284. `
  285. FROM foo
  286. SYSTEM """"""
  287. `,
  288. []Command{
  289. {Name: "model", Args: "foo"},
  290. {Name: "system", Args: ""},
  291. },
  292. nil,
  293. },
  294. {
  295. `
  296. FROM foo
  297. SYSTEM ""
  298. `,
  299. []Command{
  300. {Name: "model", Args: "foo"},
  301. {Name: "system", Args: ""},
  302. },
  303. nil,
  304. },
  305. {
  306. `
  307. FROM foo
  308. SYSTEM "'"
  309. `,
  310. []Command{
  311. {Name: "model", Args: "foo"},
  312. {Name: "system", Args: "'"},
  313. },
  314. nil,
  315. },
  316. {
  317. `
  318. FROM foo
  319. SYSTEM """''"'""'""'"'''''""'""'"""
  320. `,
  321. []Command{
  322. {Name: "model", Args: "foo"},
  323. {Name: "system", Args: `''"'""'""'"'''''""'""'`},
  324. },
  325. nil,
  326. },
  327. {
  328. `
  329. FROM foo
  330. TEMPLATE """
  331. {{ .Prompt }}
  332. """`,
  333. []Command{
  334. {Name: "model", Args: "foo"},
  335. {Name: "template", Args: "\n{{ .Prompt }}\n"},
  336. },
  337. nil,
  338. },
  339. }
  340. for _, c := range cases {
  341. t.Run("", func(t *testing.T) {
  342. modelfile, err := ParseFile(strings.NewReader(c.multiline))
  343. require.ErrorIs(t, err, c.err)
  344. if modelfile != nil {
  345. assert.Equal(t, c.expected, modelfile.Commands)
  346. }
  347. })
  348. }
  349. }
  350. func TestParseFileParameters(t *testing.T) {
  351. var cases = map[string]struct {
  352. name, value string
  353. }{
  354. "numa true": {"numa", "true"},
  355. "num_ctx 1": {"num_ctx", "1"},
  356. "num_batch 1": {"num_batch", "1"},
  357. "num_gqa 1": {"num_gqa", "1"},
  358. "num_gpu 1": {"num_gpu", "1"},
  359. "main_gpu 1": {"main_gpu", "1"},
  360. "low_vram true": {"low_vram", "true"},
  361. "f16_kv true": {"f16_kv", "true"},
  362. "logits_all true": {"logits_all", "true"},
  363. "vocab_only true": {"vocab_only", "true"},
  364. "use_mmap true": {"use_mmap", "true"},
  365. "use_mlock true": {"use_mlock", "true"},
  366. "num_thread 1": {"num_thread", "1"},
  367. "num_keep 1": {"num_keep", "1"},
  368. "seed 1": {"seed", "1"},
  369. "num_predict 1": {"num_predict", "1"},
  370. "top_k 1": {"top_k", "1"},
  371. "top_p 1.0": {"top_p", "1.0"},
  372. "tfs_z 1.0": {"tfs_z", "1.0"},
  373. "typical_p 1.0": {"typical_p", "1.0"},
  374. "repeat_last_n 1": {"repeat_last_n", "1"},
  375. "temperature 1.0": {"temperature", "1.0"},
  376. "repeat_penalty 1.0": {"repeat_penalty", "1.0"},
  377. "presence_penalty 1.0": {"presence_penalty", "1.0"},
  378. "frequency_penalty 1.0": {"frequency_penalty", "1.0"},
  379. "mirostat 1": {"mirostat", "1"},
  380. "mirostat_tau 1.0": {"mirostat_tau", "1.0"},
  381. "mirostat_eta 1.0": {"mirostat_eta", "1.0"},
  382. "penalize_newline true": {"penalize_newline", "true"},
  383. "stop ### User:": {"stop", "### User:"},
  384. "stop ### User: ": {"stop", "### User: "},
  385. "stop \"### User:\"": {"stop", "### User:"},
  386. "stop \"### User: \"": {"stop", "### User: "},
  387. "stop \"\"\"### User:\"\"\"": {"stop", "### User:"},
  388. "stop \"\"\"### User:\n\"\"\"": {"stop", "### User:\n"},
  389. "stop <|endoftext|>": {"stop", "<|endoftext|>"},
  390. "stop <|eot_id|>": {"stop", "<|eot_id|>"},
  391. "stop </s>": {"stop", "</s>"},
  392. }
  393. for k, v := range cases {
  394. t.Run(k, func(t *testing.T) {
  395. var b bytes.Buffer
  396. fmt.Fprintln(&b, "FROM foo")
  397. fmt.Fprintln(&b, "PARAMETER", k)
  398. modelfile, err := ParseFile(&b)
  399. require.NoError(t, err)
  400. assert.Equal(t, []Command{
  401. {Name: "model", Args: "foo"},
  402. {Name: v.name, Args: v.value},
  403. }, modelfile.Commands)
  404. })
  405. }
  406. }
  407. func TestParseFileComments(t *testing.T) {
  408. var cases = []struct {
  409. input string
  410. expected []Command
  411. }{
  412. {
  413. `
  414. # comment
  415. FROM foo
  416. `,
  417. []Command{
  418. {Name: "model", Args: "foo"},
  419. },
  420. },
  421. }
  422. for _, c := range cases {
  423. t.Run("", func(t *testing.T) {
  424. modelfile, err := ParseFile(strings.NewReader(c.input))
  425. require.NoError(t, err)
  426. assert.Equal(t, c.expected, modelfile.Commands)
  427. })
  428. }
  429. }
  430. func TestParseFileFormatParseFile(t *testing.T) {
  431. var cases = []string{
  432. `
  433. FROM foo
  434. ADAPTER adapter1
  435. LICENSE MIT
  436. PARAMETER param1 value1
  437. PARAMETER param2 value2
  438. TEMPLATE template1
  439. MESSAGE system You are a file parser. Always parse things.
  440. MESSAGE user Hey there!
  441. MESSAGE assistant Hello, I want to parse all the things!
  442. `,
  443. `
  444. FROM foo
  445. ADAPTER adapter1
  446. LICENSE MIT
  447. PARAMETER param1 value1
  448. PARAMETER param2 value2
  449. TEMPLATE template1
  450. MESSAGE system """
  451. You are a store greeter. Always responsed with "Hello!".
  452. """
  453. MESSAGE user Hey there!
  454. MESSAGE assistant Hello, I want to parse all the things!
  455. `,
  456. `
  457. FROM foo
  458. ADAPTER adapter1
  459. LICENSE """
  460. Very long and boring legal text.
  461. Blah blah blah.
  462. "Oh look, a quote!"
  463. """
  464. PARAMETER param1 value1
  465. PARAMETER param2 value2
  466. TEMPLATE template1
  467. MESSAGE system """
  468. You are a store greeter. Always responsed with "Hello!".
  469. """
  470. MESSAGE user Hey there!
  471. MESSAGE assistant Hello, I want to parse all the things!
  472. `,
  473. `
  474. FROM foo
  475. SYSTEM ""
  476. `,
  477. }
  478. for _, c := range cases {
  479. t.Run("", func(t *testing.T) {
  480. modelfile, err := ParseFile(strings.NewReader(c))
  481. require.NoError(t, err)
  482. modelfile2, err := ParseFile(strings.NewReader(modelfile.String()))
  483. require.NoError(t, err)
  484. assert.Equal(t, modelfile, modelfile2)
  485. })
  486. }
  487. }
  488. func TestParseFileUTF16ParseFile(t *testing.T) {
  489. data := `FROM bob
  490. PARAMETER param1 1
  491. PARAMETER param2 4096
  492. SYSTEM You are a utf16 file.
  493. `
  494. expected := []Command{
  495. {Name: "model", Args: "bob"},
  496. {Name: "param1", Args: "1"},
  497. {Name: "param2", Args: "4096"},
  498. {Name: "system", Args: "You are a utf16 file."},
  499. }
  500. t.Run("le", func(t *testing.T) {
  501. var b bytes.Buffer
  502. require.NoError(t, binary.Write(&b, binary.LittleEndian, []byte{0xff, 0xfe}))
  503. require.NoError(t, binary.Write(&b, binary.LittleEndian, utf16.Encode([]rune(data))))
  504. actual, err := ParseFile(&b)
  505. require.NoError(t, err)
  506. assert.Equal(t, expected, actual.Commands)
  507. })
  508. t.Run("be", func(t *testing.T) {
  509. var b bytes.Buffer
  510. require.NoError(t, binary.Write(&b, binary.BigEndian, []byte{0xfe, 0xff}))
  511. require.NoError(t, binary.Write(&b, binary.BigEndian, utf16.Encode([]rune(data))))
  512. actual, err := ParseFile(&b)
  513. require.NoError(t, err)
  514. assert.Equal(t, expected, actual.Commands)
  515. })
  516. }
  517. func TestParseMultiByte(t *testing.T) {
  518. input := `FROM test
  519. SYSTEM 你好👋`
  520. expect := []Command{
  521. {Name: "model", Args: "test"},
  522. {Name: "system", Args: "你好👋"},
  523. }
  524. encodings := []encoding.Encoding{
  525. unicode.UTF8,
  526. unicode.UTF16(unicode.LittleEndian, unicode.UseBOM),
  527. unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
  528. }
  529. for _, encoding := range encodings {
  530. t.Run(fmt.Sprintf("%s", encoding), func(t *testing.T) {
  531. s, err := encoding.NewEncoder().String(input)
  532. require.NoError(t, err)
  533. actual, err := ParseFile(strings.NewReader(s))
  534. require.NoError(t, err)
  535. assert.Equal(t, expect, actual.Commands)
  536. })
  537. }
  538. }