parser_test.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. package parser
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "fmt"
  6. "io"
  7. "strings"
  8. "testing"
  9. "unicode/utf16"
  10. "github.com/stretchr/testify/assert"
  11. )
  12. func TestParseFileFile(t *testing.T) {
  13. input := `
  14. FROM model1
  15. ADAPTER adapter1
  16. LICENSE MIT
  17. PARAMETER param1 value1
  18. PARAMETER param2 value2
  19. TEMPLATE template1
  20. `
  21. reader := strings.NewReader(input)
  22. modelfile, err := ParseFile(reader)
  23. assert.NoError(t, err)
  24. expectedCommands := []Command{
  25. {Name: "model", Args: "model1"},
  26. {Name: "adapter", Args: "adapter1"},
  27. {Name: "license", Args: "MIT"},
  28. {Name: "param1", Args: "value1"},
  29. {Name: "param2", Args: "value2"},
  30. {Name: "template", Args: "template1"},
  31. }
  32. assert.Equal(t, expectedCommands, modelfile.Commands)
  33. }
  34. func TestParseFileFrom(t *testing.T) {
  35. var cases = []struct {
  36. input string
  37. expected []Command
  38. err error
  39. }{
  40. {
  41. "FROM foo",
  42. []Command{{Name: "model", Args: "foo"}},
  43. nil,
  44. },
  45. {
  46. "FROM /path/to/model",
  47. []Command{{Name: "model", Args: "/path/to/model"}},
  48. nil,
  49. },
  50. {
  51. "FROM /path/to/model/fp16.bin",
  52. []Command{{Name: "model", Args: "/path/to/model/fp16.bin"}},
  53. nil,
  54. },
  55. {
  56. "FROM llama3:latest",
  57. []Command{{Name: "model", Args: "llama3:latest"}},
  58. nil,
  59. },
  60. {
  61. "FROM llama3:7b-instruct-q4_K_M",
  62. []Command{{Name: "model", Args: "llama3:7b-instruct-q4_K_M"}},
  63. nil,
  64. },
  65. {
  66. "", nil, errMissingFrom,
  67. },
  68. {
  69. "PARAMETER param1 value1",
  70. nil,
  71. errMissingFrom,
  72. },
  73. {
  74. "PARAMETER param1 value1\nFROM foo",
  75. []Command{{Name: "param1", Args: "value1"}, {Name: "model", Args: "foo"}},
  76. nil,
  77. },
  78. }
  79. for _, c := range cases {
  80. t.Run("", func(t *testing.T) {
  81. modelfile, err := ParseFile(strings.NewReader(c.input))
  82. assert.ErrorIs(t, err, c.err)
  83. if modelfile != nil {
  84. assert.Equal(t, c.expected, modelfile.Commands)
  85. }
  86. })
  87. }
  88. }
  89. func TestParseFileParametersMissingValue(t *testing.T) {
  90. input := `
  91. FROM foo
  92. PARAMETER param1
  93. `
  94. reader := strings.NewReader(input)
  95. _, err := ParseFile(reader)
  96. assert.ErrorIs(t, err, io.ErrUnexpectedEOF)
  97. }
  98. func TestParseFileBadCommand(t *testing.T) {
  99. input := `
  100. FROM foo
  101. BADCOMMAND param1 value1
  102. `
  103. _, err := ParseFile(strings.NewReader(input))
  104. assert.ErrorIs(t, err, errInvalidCommand)
  105. }
  106. func TestParseFileMessages(t *testing.T) {
  107. var cases = []struct {
  108. input string
  109. expected []Command
  110. err error
  111. }{
  112. {
  113. `
  114. FROM foo
  115. MESSAGE system You are a file parser. Always parse things.
  116. `,
  117. []Command{
  118. {Name: "model", Args: "foo"},
  119. {Name: "message", Args: "system: You are a file parser. Always parse things."},
  120. },
  121. nil,
  122. },
  123. {
  124. `
  125. FROM foo
  126. MESSAGE system You are a file parser. Always parse things.`,
  127. []Command{
  128. {Name: "model", Args: "foo"},
  129. {Name: "message", Args: "system: You are a file parser. Always parse things."},
  130. },
  131. nil,
  132. },
  133. {
  134. `
  135. FROM foo
  136. MESSAGE system You are a file parser. Always parse things.
  137. MESSAGE user Hey there!
  138. MESSAGE assistant Hello, I want to parse all the things!
  139. `,
  140. []Command{
  141. {Name: "model", Args: "foo"},
  142. {Name: "message", Args: "system: You are a file parser. Always parse things."},
  143. {Name: "message", Args: "user: Hey there!"},
  144. {Name: "message", Args: "assistant: Hello, I want to parse all the things!"},
  145. },
  146. nil,
  147. },
  148. {
  149. `
  150. FROM foo
  151. MESSAGE system """
  152. You are a multiline file parser. Always parse things.
  153. """
  154. `,
  155. []Command{
  156. {Name: "model", Args: "foo"},
  157. {Name: "message", Args: "system: \nYou are a multiline file parser. Always parse things.\n"},
  158. },
  159. nil,
  160. },
  161. {
  162. `
  163. FROM foo
  164. MESSAGE badguy I'm a bad guy!
  165. `,
  166. nil,
  167. errInvalidMessageRole,
  168. },
  169. {
  170. `
  171. FROM foo
  172. MESSAGE system
  173. `,
  174. nil,
  175. io.ErrUnexpectedEOF,
  176. },
  177. {
  178. `
  179. FROM foo
  180. MESSAGE system`,
  181. nil,
  182. io.ErrUnexpectedEOF,
  183. },
  184. }
  185. for _, c := range cases {
  186. t.Run("", func(t *testing.T) {
  187. modelfile, err := ParseFile(strings.NewReader(c.input))
  188. assert.ErrorIs(t, err, c.err)
  189. if modelfile != nil {
  190. assert.Equal(t, c.expected, modelfile.Commands)
  191. }
  192. })
  193. }
  194. }
  195. func TestParseFileQuoted(t *testing.T) {
  196. var cases = []struct {
  197. multiline string
  198. expected []Command
  199. err error
  200. }{
  201. {
  202. `
  203. FROM foo
  204. SYSTEM """
  205. This is a
  206. multiline system.
  207. """
  208. `,
  209. []Command{
  210. {Name: "model", Args: "foo"},
  211. {Name: "system", Args: "\nThis is a\nmultiline system.\n"},
  212. },
  213. nil,
  214. },
  215. {
  216. `
  217. FROM foo
  218. SYSTEM """
  219. This is a
  220. multiline system."""
  221. `,
  222. []Command{
  223. {Name: "model", Args: "foo"},
  224. {Name: "system", Args: "\nThis is a\nmultiline system."},
  225. },
  226. nil,
  227. },
  228. {
  229. `
  230. FROM foo
  231. SYSTEM """This is a
  232. multiline system."""
  233. `,
  234. []Command{
  235. {Name: "model", Args: "foo"},
  236. {Name: "system", Args: "This is a\nmultiline system."},
  237. },
  238. nil,
  239. },
  240. {
  241. `
  242. FROM foo
  243. SYSTEM """This is a multiline system."""
  244. `,
  245. []Command{
  246. {Name: "model", Args: "foo"},
  247. {Name: "system", Args: "This is a multiline system."},
  248. },
  249. nil,
  250. },
  251. {
  252. `
  253. FROM foo
  254. SYSTEM """This is a multiline system.""
  255. `,
  256. nil,
  257. io.ErrUnexpectedEOF,
  258. },
  259. {
  260. `
  261. FROM foo
  262. SYSTEM "
  263. `,
  264. nil,
  265. io.ErrUnexpectedEOF,
  266. },
  267. {
  268. `
  269. FROM foo
  270. SYSTEM """
  271. This is a multiline system with "quotes".
  272. """
  273. `,
  274. []Command{
  275. {Name: "model", Args: "foo"},
  276. {Name: "system", Args: "\nThis is a multiline system with \"quotes\".\n"},
  277. },
  278. nil,
  279. },
  280. {
  281. `
  282. FROM foo
  283. SYSTEM """"""
  284. `,
  285. []Command{
  286. {Name: "model", Args: "foo"},
  287. {Name: "system", Args: ""},
  288. },
  289. nil,
  290. },
  291. {
  292. `
  293. FROM foo
  294. SYSTEM ""
  295. `,
  296. []Command{
  297. {Name: "model", Args: "foo"},
  298. {Name: "system", Args: ""},
  299. },
  300. nil,
  301. },
  302. {
  303. `
  304. FROM foo
  305. SYSTEM "'"
  306. `,
  307. []Command{
  308. {Name: "model", Args: "foo"},
  309. {Name: "system", Args: "'"},
  310. },
  311. nil,
  312. },
  313. {
  314. `
  315. FROM foo
  316. SYSTEM """''"'""'""'"'''''""'""'"""
  317. `,
  318. []Command{
  319. {Name: "model", Args: "foo"},
  320. {Name: "system", Args: `''"'""'""'"'''''""'""'`},
  321. },
  322. nil,
  323. },
  324. {
  325. `
  326. FROM foo
  327. TEMPLATE """
  328. {{ .Prompt }}
  329. """`,
  330. []Command{
  331. {Name: "model", Args: "foo"},
  332. {Name: "template", Args: "\n{{ .Prompt }}\n"},
  333. },
  334. nil,
  335. },
  336. }
  337. for _, c := range cases {
  338. t.Run("", func(t *testing.T) {
  339. modelfile, err := ParseFile(strings.NewReader(c.multiline))
  340. assert.ErrorIs(t, err, c.err)
  341. if modelfile != nil {
  342. assert.Equal(t, c.expected, modelfile.Commands)
  343. }
  344. })
  345. }
  346. }
  347. func TestParseFileParameters(t *testing.T) {
  348. var cases = map[string]struct {
  349. name, value string
  350. }{
  351. "numa true": {"numa", "true"},
  352. "num_ctx 1": {"num_ctx", "1"},
  353. "num_batch 1": {"num_batch", "1"},
  354. "num_gqa 1": {"num_gqa", "1"},
  355. "num_gpu 1": {"num_gpu", "1"},
  356. "main_gpu 1": {"main_gpu", "1"},
  357. "low_vram true": {"low_vram", "true"},
  358. "f16_kv true": {"f16_kv", "true"},
  359. "logits_all true": {"logits_all", "true"},
  360. "vocab_only true": {"vocab_only", "true"},
  361. "use_mmap true": {"use_mmap", "true"},
  362. "use_mlock true": {"use_mlock", "true"},
  363. "num_thread 1": {"num_thread", "1"},
  364. "num_keep 1": {"num_keep", "1"},
  365. "seed 1": {"seed", "1"},
  366. "num_predict 1": {"num_predict", "1"},
  367. "top_k 1": {"top_k", "1"},
  368. "top_p 1.0": {"top_p", "1.0"},
  369. "tfs_z 1.0": {"tfs_z", "1.0"},
  370. "typical_p 1.0": {"typical_p", "1.0"},
  371. "repeat_last_n 1": {"repeat_last_n", "1"},
  372. "temperature 1.0": {"temperature", "1.0"},
  373. "repeat_penalty 1.0": {"repeat_penalty", "1.0"},
  374. "presence_penalty 1.0": {"presence_penalty", "1.0"},
  375. "frequency_penalty 1.0": {"frequency_penalty", "1.0"},
  376. "mirostat 1": {"mirostat", "1"},
  377. "mirostat_tau 1.0": {"mirostat_tau", "1.0"},
  378. "mirostat_eta 1.0": {"mirostat_eta", "1.0"},
  379. "penalize_newline true": {"penalize_newline", "true"},
  380. "stop ### User:": {"stop", "### User:"},
  381. "stop ### User: ": {"stop", "### User: "},
  382. "stop \"### User:\"": {"stop", "### User:"},
  383. "stop \"### User: \"": {"stop", "### User: "},
  384. "stop \"\"\"### User:\"\"\"": {"stop", "### User:"},
  385. "stop \"\"\"### User:\n\"\"\"": {"stop", "### User:\n"},
  386. "stop <|endoftext|>": {"stop", "<|endoftext|>"},
  387. "stop <|eot_id|>": {"stop", "<|eot_id|>"},
  388. "stop </s>": {"stop", "</s>"},
  389. }
  390. for k, v := range cases {
  391. t.Run(k, func(t *testing.T) {
  392. var b bytes.Buffer
  393. fmt.Fprintln(&b, "FROM foo")
  394. fmt.Fprintln(&b, "PARAMETER", k)
  395. modelfile, err := ParseFile(&b)
  396. assert.NoError(t, err)
  397. assert.Equal(t, []Command{
  398. {Name: "model", Args: "foo"},
  399. {Name: v.name, Args: v.value},
  400. }, modelfile.Commands)
  401. })
  402. }
  403. }
  404. func TestParseFileComments(t *testing.T) {
  405. var cases = []struct {
  406. input string
  407. expected []Command
  408. }{
  409. {
  410. `
  411. # comment
  412. FROM foo
  413. `,
  414. []Command{
  415. {Name: "model", Args: "foo"},
  416. },
  417. },
  418. }
  419. for _, c := range cases {
  420. t.Run("", func(t *testing.T) {
  421. modelfile, err := ParseFile(strings.NewReader(c.input))
  422. assert.NoError(t, err)
  423. assert.Equal(t, c.expected, modelfile.Commands)
  424. })
  425. }
  426. }
  427. func TestParseFileFormatParseFile(t *testing.T) {
  428. var cases = []string{
  429. `
  430. FROM foo
  431. ADAPTER adapter1
  432. LICENSE MIT
  433. PARAMETER param1 value1
  434. PARAMETER param2 value2
  435. TEMPLATE template1
  436. MESSAGE system You are a file parser. Always parse things.
  437. MESSAGE user Hey there!
  438. MESSAGE assistant Hello, I want to parse all the things!
  439. `,
  440. `
  441. FROM foo
  442. ADAPTER adapter1
  443. LICENSE MIT
  444. PARAMETER param1 value1
  445. PARAMETER param2 value2
  446. TEMPLATE template1
  447. MESSAGE system """
  448. You are a store greeter. Always responsed with "Hello!".
  449. """
  450. MESSAGE user Hey there!
  451. MESSAGE assistant Hello, I want to parse all the things!
  452. `,
  453. `
  454. FROM foo
  455. ADAPTER adapter1
  456. LICENSE """
  457. Very long and boring legal text.
  458. Blah blah blah.
  459. "Oh look, a quote!"
  460. """
  461. PARAMETER param1 value1
  462. PARAMETER param2 value2
  463. TEMPLATE template1
  464. MESSAGE system """
  465. You are a store greeter. Always responsed with "Hello!".
  466. """
  467. MESSAGE user Hey there!
  468. MESSAGE assistant Hello, I want to parse all the things!
  469. `,
  470. `
  471. FROM foo
  472. SYSTEM ""
  473. `,
  474. }
  475. for _, c := range cases {
  476. t.Run("", func(t *testing.T) {
  477. modelfile, err := ParseFile(strings.NewReader(c))
  478. assert.NoError(t, err)
  479. modelfile2, err := ParseFile(strings.NewReader(modelfile.String()))
  480. assert.NoError(t, err)
  481. assert.Equal(t, modelfile, modelfile2)
  482. })
  483. }
  484. }
  485. func TestParseFileUTF16ParseFile(t *testing.T) {
  486. data := `FROM bob
  487. PARAMETER param1 1
  488. PARAMETER param2 4096
  489. SYSTEM You are a utf16 file.
  490. `
  491. // simulate a utf16 le file
  492. utf16File := utf16.Encode(append([]rune{'\ufffe'}, []rune(data)...))
  493. buf := new(bytes.Buffer)
  494. err := binary.Write(buf, binary.LittleEndian, utf16File)
  495. assert.NoError(t, err)
  496. actual, err := ParseFile(buf)
  497. assert.NoError(t, err)
  498. expected := []Command{
  499. {Name: "model", Args: "bob"},
  500. {Name: "param1", Args: "1"},
  501. {Name: "param2", Args: "4096"},
  502. {Name: "system", Args: "You are a utf16 file."},
  503. }
  504. assert.Equal(t, expected, actual.Commands)
  505. // simulate a utf16 be file
  506. buf = new(bytes.Buffer)
  507. err = binary.Write(buf, binary.BigEndian, utf16File)
  508. assert.NoError(t, err)
  509. actual, err = ParseFile(buf)
  510. assert.NoError(t, err)
  511. assert.Equal(t, expected, actual.Commands)
  512. }