convert_test.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476
  1. package convert
  2. import (
  3. "bytes"
  4. "crypto/sha256"
  5. "encoding/binary"
  6. "encoding/hex"
  7. "encoding/json"
  8. "flag"
  9. "fmt"
  10. "io"
  11. "io/fs"
  12. "log/slog"
  13. "math"
  14. "os"
  15. "path/filepath"
  16. "slices"
  17. "strings"
  18. "testing"
  19. "golang.org/x/exp/maps"
  20. "github.com/ollama/ollama/llm"
  21. )
  22. type tensorData struct {
  23. Offsets []int `json:"data_offsets"`
  24. Type string `json:"dtype"`
  25. Shape []int `json:"shape"`
  26. }
  27. func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) {
  28. t.Helper()
  29. f, err := os.CreateTemp(t.TempDir(), "f16")
  30. if err != nil {
  31. t.Fatal(err)
  32. }
  33. defer f.Close()
  34. if err := ConvertModel(fsys, f); err != nil {
  35. t.Fatal(err)
  36. }
  37. r, err := os.Open(f.Name())
  38. if err != nil {
  39. t.Fatal(err)
  40. }
  41. t.Cleanup(func() { r.Close() })
  42. m, _, err := llm.DecodeGGML(r, math.MaxInt)
  43. if err != nil {
  44. t.Fatal(err)
  45. }
  46. if _, err := r.Seek(0, io.SeekStart); err != nil {
  47. t.Fatal(err)
  48. }
  49. return r, m.KV(), m.Tensors()
  50. }
  51. func generateResultsJSON(t *testing.T, f *os.File, kv llm.KV, tensors llm.Tensors) map[string]string {
  52. actual := make(map[string]string)
  53. for k, v := range kv {
  54. if s, ok := v.(json.Marshaler); !ok {
  55. actual[k] = fmt.Sprintf("%v", v)
  56. } else {
  57. bts, err := json.Marshal(s)
  58. if err != nil {
  59. t.Fatal(err)
  60. }
  61. actual[k] = fmt.Sprintf("%x", sha256.Sum256(bts))
  62. }
  63. }
  64. for _, tensor := range tensors.Items {
  65. sha256sum := sha256.New()
  66. sr := io.NewSectionReader(f, int64(tensors.Offset+tensor.Offset), int64(tensor.Size()))
  67. if _, err := io.Copy(sha256sum, sr); err != nil {
  68. t.Fatal(err)
  69. }
  70. actual[tensor.Name] = hex.EncodeToString(sha256sum.Sum(nil))
  71. }
  72. return actual
  73. }
  74. func TestMain(m *testing.M) {
  75. var level slog.Level
  76. flag.TextVar(&level, "level", slog.LevelInfo, "log level")
  77. flag.Parse()
  78. slog.SetLogLoggerLevel(level)
  79. os.Exit(m.Run())
  80. }
  81. func TestConvertModel(t *testing.T) {
  82. cases := []string{
  83. "Meta-Llama-3-8B-Instruct",
  84. "Meta-Llama-3.1-8B-Instruct",
  85. "Mistral-7B-Instruct-v0.2",
  86. "Mixtral-8x7B-Instruct-v0.1",
  87. "gemma-2b-it",
  88. "gemma-2-2b-it",
  89. // microsoft/Phi-3-mini-128-instruct@d548c233192db00165d842bf8edff054bb3212f8
  90. "Phi-3-mini-128k-instruct",
  91. "all-MiniLM-L6-v2",
  92. "gemma-2-9b-it",
  93. }
  94. for i := range cases {
  95. tt := cases[i]
  96. t.Run(tt, func(t *testing.T) {
  97. t.Parallel()
  98. p := filepath.Join("testdata", tt)
  99. if testing.Short() {
  100. t.Skip("skipping in short mode")
  101. } else if _, err := os.Stat(p); err != nil {
  102. t.Skipf("%s not found", p)
  103. }
  104. f, kv, tensors := convertFull(t, os.DirFS(p))
  105. actual := generateResultsJSON(t, f, kv, tensors)
  106. expectFile, err := os.Open(filepath.Join("testdata", fmt.Sprintf("%s.json", tt)))
  107. if err != nil {
  108. t.Fatal(err)
  109. }
  110. var expect map[string]string
  111. if err := json.NewDecoder(expectFile).Decode(&expect); err != nil {
  112. t.Fatal(err)
  113. }
  114. keys := maps.Keys(expect)
  115. slices.Sort(keys)
  116. for _, k := range keys {
  117. if v, ok := actual[k]; !ok {
  118. t.Errorf("missing %s", k)
  119. } else if v != expect[k] {
  120. t.Errorf("unexpected %s: want %s, got %s", k, expect[k], v)
  121. }
  122. }
  123. })
  124. }
  125. }
  126. func TestConvertInvalidTensorNames(t *testing.T) {
  127. f, err := os.CreateTemp(t.TempDir(), "testmodel")
  128. if err != nil {
  129. t.Fatal(err)
  130. }
  131. defer f.Close()
  132. tempDir := t.TempDir()
  133. td := map[string]*tensorData{}
  134. offset := 4096
  135. td["model.layers.0.self_attn.q_proj.weight"] = &tensorData{
  136. Offsets: []int{0, offset},
  137. Type: "F32",
  138. Shape: []int{4096, 4096},
  139. }
  140. td["blk.0.attn_q.weight"] = &tensorData{
  141. Offsets: []int{offset, offset * 2},
  142. Type: "F32",
  143. Shape: []int{4096, 4096},
  144. }
  145. generateSafetensorTestData(t, tempDir, td)
  146. err = ConvertModel(os.DirFS(tempDir), f)
  147. if err == nil || !strings.HasPrefix(err.Error(), "duplicate tensor name") {
  148. t.Errorf("expected error but didn't get one")
  149. }
  150. }
  151. func TestConvertInvalidDatatype(t *testing.T) {
  152. f, err := os.CreateTemp(t.TempDir(), "testmodel")
  153. if err != nil {
  154. t.Fatal(err)
  155. }
  156. defer f.Close()
  157. tempDir := t.TempDir()
  158. td := map[string]*tensorData{}
  159. offset := 4096 * 14336
  160. td["model.layers.0.mlp.down_proj.weight"] = &tensorData{
  161. Offsets: []int{0, offset},
  162. Type: "I8",
  163. Shape: []int{4096, 14336},
  164. }
  165. td["model.layers.0.mlp.down_proj.weight_format"] = &tensorData{
  166. Offsets: []int{offset, offset},
  167. Type: "U8",
  168. Shape: []int{},
  169. }
  170. generateSafetensorTestData(t, tempDir, td)
  171. err = ConvertModel(os.DirFS(tempDir), f)
  172. if err == nil || err.Error() != "unsupported safetensors model" {
  173. t.Errorf("expected error but didn't get one")
  174. }
  175. }
  176. func generateSafetensorTestData(t *testing.T, tempDir string, tensorData map[string]*tensorData) {
  177. data, err := json.Marshal(tensorData)
  178. if err != nil {
  179. t.Fatal(err)
  180. }
  181. var buf bytes.Buffer
  182. l := int64(len(data))
  183. err = binary.Write(&buf, binary.LittleEndian, l)
  184. if err != nil {
  185. t.Fatal(err)
  186. }
  187. _, err = buf.Write(data)
  188. if err != nil {
  189. t.Fatal(err)
  190. }
  191. fdata, err := os.Create(filepath.Join(tempDir, "model-00001-of-00001.safetensors"))
  192. if err != nil {
  193. t.Fatal(err)
  194. }
  195. defer fdata.Close()
  196. _, err = fdata.Write(buf.Bytes())
  197. if err != nil {
  198. t.Fatal(err)
  199. }
  200. configData := `
  201. {
  202. "architectures": [
  203. "LlamaForCausalLM"
  204. ]
  205. }
  206. `
  207. f, err := os.Create(filepath.Join(tempDir, "config.json"))
  208. if err != nil {
  209. t.Fatal(err)
  210. }
  211. defer f.Close()
  212. _, err = f.WriteString(configData)
  213. if err != nil {
  214. t.Fatal(err)
  215. }
  216. tokenizerData := `
  217. {
  218. }
  219. `
  220. f, err = os.Create(filepath.Join(tempDir, "tokenizer.json"))
  221. if err != nil {
  222. t.Fatal(err)
  223. }
  224. defer f.Close()
  225. _, err = f.WriteString(tokenizerData)
  226. if err != nil {
  227. t.Fatal(err)
  228. }
  229. }
  230. func TestConvertAdapter(t *testing.T) {
  231. type AdapterCase struct {
  232. Name string
  233. BaseKV map[string]any
  234. Expected map[string]string
  235. }
  236. cases := []AdapterCase{
  237. {
  238. Name: "discollama",
  239. BaseKV: map[string]any{
  240. "general.architecture": "llama",
  241. "llama.attention.head_count": uint32(32),
  242. "llama.attention.head_count_kv": uint32(8),
  243. },
  244. Expected: map[string]string{
  245. "general.architecture": "llama",
  246. "general.file_type": "1",
  247. "general.parameter_count": "106496",
  248. "general.type": "adapter",
  249. "general.version": "v0.2",
  250. "adapter.lora.alpha": "16",
  251. "adapter.type": "lora",
  252. "llama.attention.head_count": "32",
  253. "llama.attention.head_count_kv": "8",
  254. "blk.31.attn_q.weight.lora_a": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
  255. "blk.31.attn_q.weight.lora_b": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
  256. "blk.31.attn_v.weight.lora_a": "0eb3318b02cd313429bcc7621b539fdbb10240fea190c56c9e5f93fcd37a4e50",
  257. "blk.31.attn_v.weight.lora_b": "071dcafe89df065d6e1c935ecb8fdf6479b3c202eb912e7da938597673ff5857",
  258. },
  259. },
  260. }
  261. for _, c := range cases {
  262. t.Run(c.Name, func(t *testing.T) {
  263. t.Parallel()
  264. f, err := os.CreateTemp(t.TempDir(), "f16")
  265. if err != nil {
  266. t.Fatal(err)
  267. }
  268. defer f.Close()
  269. tempDir := t.TempDir()
  270. generateLoraTestData(t, tempDir)
  271. if err = ConvertAdapter(os.DirFS(tempDir), f, c.BaseKV); err != nil {
  272. t.Fatal(err)
  273. }
  274. r, err := os.Open(f.Name())
  275. if err != nil {
  276. t.Fatal(err)
  277. }
  278. defer r.Close()
  279. m, _, err := llm.DecodeGGML(r, math.MaxInt)
  280. if err != nil {
  281. t.Fatal(err)
  282. }
  283. if _, err := r.Seek(0, io.SeekStart); err != nil {
  284. t.Fatal(err)
  285. }
  286. actual := generateResultsJSON(t, r, m.KV(), m.Tensors())
  287. keys := maps.Keys(c.Expected)
  288. slices.Sort(keys)
  289. for _, k := range keys {
  290. if v, ok := actual[k]; !ok {
  291. t.Errorf("missing %s", k)
  292. } else if v != c.Expected[k] {
  293. t.Errorf("unexpected %s: want %s, got %s", k, c.Expected[k], v)
  294. }
  295. }
  296. })
  297. }
  298. }
  299. func generateLoraTestData(t *testing.T, tempDir string) {
  300. offset := 4096 * 8 * 4
  301. td := map[string]*tensorData{"__metadata__": nil}
  302. td["model.layers.31.self_attn.q_proj.lora_a"] = &tensorData{
  303. Offsets: []int{0, offset},
  304. Type: "F32",
  305. Shape: []int{4096, 8},
  306. }
  307. td["model.layers.31.self_attn.q_proj.lora_b"] = &tensorData{
  308. Offsets: []int{offset, offset * 2},
  309. Type: "F32",
  310. Shape: []int{8, 4096},
  311. }
  312. td["model.layers.31.self_attn.v_proj.lora_a"] = &tensorData{
  313. Offsets: []int{offset * 2, offset * 3},
  314. Type: "F32",
  315. Shape: []int{4096, 8},
  316. }
  317. td["model.layers.31.self_attn.v_proj.lora_b"] = &tensorData{
  318. Offsets: []int{offset * 3, offset*3 + 8*1024*4},
  319. Type: "F32",
  320. Shape: []int{8, 1024},
  321. }
  322. data, err := json.Marshal(td)
  323. if err != nil {
  324. t.Fatal(err)
  325. }
  326. var buf bytes.Buffer
  327. l := int64(len(data))
  328. err = binary.Write(&buf, binary.LittleEndian, l)
  329. if err != nil {
  330. t.Fatal(err)
  331. }
  332. _, err = buf.Write(data)
  333. if err != nil {
  334. t.Fatal(err)
  335. }
  336. // write some data for the tensors
  337. ones := make([]float32, 4096*8)
  338. for i := range ones {
  339. ones[i] = float32(1)
  340. }
  341. for range 3 {
  342. err = binary.Write(&buf, binary.LittleEndian, ones)
  343. if err != nil {
  344. t.Fatal(err)
  345. }
  346. }
  347. ones = make([]float32, 1024*8)
  348. for i := range ones {
  349. ones[i] = float32(1)
  350. }
  351. err = binary.Write(&buf, binary.LittleEndian, ones)
  352. if err != nil {
  353. t.Fatal(err)
  354. }
  355. fdata, err := os.Create(filepath.Join(tempDir, "adapters.safetensors"))
  356. if err != nil {
  357. t.Fatal(err)
  358. }
  359. defer fdata.Close()
  360. _, err = fdata.Write(buf.Bytes())
  361. if err != nil {
  362. t.Fatal(err)
  363. }
  364. configData := `
  365. {
  366. "adapter_path": "adapters-test",
  367. "batch_size": 8,
  368. "config": "config-tiny.json",
  369. "data": "../discollama-completion",
  370. "grad_checkpoint": null,
  371. "iters": 1000,
  372. "learning_rate": 1e-05,
  373. "lora_layers": 1,
  374. "lora_parameters": {
  375. "rank": 8,
  376. "alpha": 16,
  377. "dropout": 0.0,
  378. "scale": 2.0
  379. },
  380. "lr_schedule": null,
  381. "max_seq_length": 2048,
  382. "model": "/Users/pdevine/git/Meta-Llama-3-8B-Instruct",
  383. "resume_adapter_file": null,
  384. "save_every": 100,
  385. "seed": 0,
  386. "steps_per_eval": 200,
  387. "steps_per_report": 10,
  388. "test": false,
  389. "test_batches": 500,
  390. "train": true,
  391. "use_dora": false,
  392. "val_batches": 25
  393. }
  394. `
  395. f, err := os.Create(filepath.Join(tempDir, "adapter_config.json"))
  396. if err != nil {
  397. t.Fatal(err)
  398. }
  399. defer f.Close()
  400. _, err = f.WriteString(configData)
  401. if err != nil {
  402. t.Fatal(err)
  403. }
  404. }