gguf.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653
  1. package llm
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "fmt"
  6. "io"
  7. "strings"
  8. "log/slog"
  9. )
  10. type containerGGUF struct {
  11. ByteOrder binary.ByteOrder
  12. Version uint32
  13. V1 struct {
  14. NumTensor uint32
  15. NumKV uint32
  16. }
  17. V2 struct {
  18. NumTensor uint64
  19. NumKV uint64
  20. }
  21. V3 struct {
  22. NumTensor uint64
  23. NumKV uint64
  24. }
  25. }
  26. func (c *containerGGUF) Name() string {
  27. return "gguf"
  28. }
  29. func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) {
  30. if err := binary.Read(rs, c.ByteOrder, &c.Version); err != nil {
  31. return nil, err
  32. }
  33. var err error
  34. switch c.Version {
  35. case 1:
  36. err = binary.Read(rs, c.ByteOrder, &c.V1)
  37. case 2:
  38. err = binary.Read(rs, c.ByteOrder, &c.V2)
  39. default:
  40. err = binary.Read(rs, c.ByteOrder, &c.V3)
  41. }
  42. if err != nil {
  43. return nil, err
  44. }
  45. model := newGGUF(c)
  46. slog.Debug(fmt.Sprintf("model = %#v", model))
  47. if err := model.Decode(rs); err != nil {
  48. return nil, err
  49. }
  50. return model, nil
  51. }
  52. const (
  53. ggufTypeUint8 uint32 = iota
  54. ggufTypeInt8
  55. ggufTypeUint16
  56. ggufTypeInt16
  57. ggufTypeUint32
  58. ggufTypeInt32
  59. ggufTypeFloat32
  60. ggufTypeBool
  61. ggufTypeString
  62. ggufTypeArray
  63. ggufTypeUint64
  64. ggufTypeInt64
  65. ggufTypeFloat64
  66. )
  67. type gguf struct {
  68. *containerGGUF
  69. kv KV
  70. tensors []*Tensor
  71. parameters uint64
  72. }
  73. func newGGUF(container *containerGGUF) *gguf {
  74. return &gguf{
  75. containerGGUF: container,
  76. kv: make(KV),
  77. }
  78. }
  79. func NewGGUFV3(bo binary.ByteOrder) *gguf {
  80. return newGGUF(&containerGGUF{ByteOrder: bo, Version: 3})
  81. }
  82. func (llm *gguf) KV() KV {
  83. return llm.kv
  84. }
  85. func (llm *gguf) Tensors() Tensors {
  86. return llm.tensors
  87. }
  88. func (llm *gguf) numTensor() uint64 {
  89. switch llm.Version {
  90. case 1:
  91. return uint64(llm.V1.NumTensor)
  92. case 2:
  93. return llm.V2.NumTensor
  94. default:
  95. return llm.V3.NumTensor
  96. }
  97. }
  98. func (llm *gguf) numKV() uint64 {
  99. switch llm.Version {
  100. case 1:
  101. return uint64(llm.V1.NumKV)
  102. case 2:
  103. return llm.V2.NumKV
  104. default:
  105. return llm.V3.NumKV
  106. }
  107. }
  108. func (llm *gguf) Decode(rs io.ReadSeeker) error {
  109. // decode key-values
  110. for i := 0; uint64(i) < llm.numKV(); i++ {
  111. k, err := readGGUFString(llm, rs)
  112. if err != nil {
  113. return err
  114. }
  115. t, err := readGGUF[uint32](llm, rs)
  116. if err != nil {
  117. return err
  118. }
  119. var v any
  120. switch t {
  121. case ggufTypeUint8:
  122. v, err = readGGUF[uint8](llm, rs)
  123. case ggufTypeInt8:
  124. v, err = readGGUF[int8](llm, rs)
  125. case ggufTypeUint16:
  126. v, err = readGGUF[uint16](llm, rs)
  127. case ggufTypeInt16:
  128. v, err = readGGUF[int16](llm, rs)
  129. case ggufTypeUint32:
  130. v, err = readGGUF[uint32](llm, rs)
  131. case ggufTypeInt32:
  132. v, err = readGGUF[int32](llm, rs)
  133. case ggufTypeUint64:
  134. v, err = readGGUF[uint64](llm, rs)
  135. case ggufTypeInt64:
  136. v, err = readGGUF[int64](llm, rs)
  137. case ggufTypeFloat32:
  138. v, err = readGGUF[float32](llm, rs)
  139. case ggufTypeFloat64:
  140. v, err = readGGUF[float64](llm, rs)
  141. case ggufTypeBool:
  142. v, err = readGGUF[bool](llm, rs)
  143. case ggufTypeString:
  144. v, err = readGGUFString(llm, rs)
  145. case ggufTypeArray:
  146. v, err = readGGUFArray(llm, rs)
  147. default:
  148. return fmt.Errorf("invalid type: %d", t)
  149. }
  150. if err != nil {
  151. return err
  152. }
  153. llm.kv[k] = v
  154. }
  155. // decode tensors
  156. for i := 0; uint64(i) < llm.numTensor(); i++ {
  157. name, err := readGGUFString(llm, rs)
  158. if err != nil {
  159. return err
  160. }
  161. // dims is the number of dimensions in the tensor
  162. dims, err := readGGUF[uint32](llm, rs)
  163. if err != nil {
  164. return err
  165. }
  166. shape := [4]uint64{1, 1, 1, 1}
  167. for i := 0; uint32(i) < dims; i++ {
  168. shape[i], err = readGGUF[uint64](llm, rs)
  169. if err != nil {
  170. return err
  171. }
  172. }
  173. kind, err := readGGUF[uint32](llm, rs)
  174. if err != nil {
  175. return err
  176. }
  177. offset, err := readGGUF[uint64](llm, rs)
  178. if err != nil {
  179. return err
  180. }
  181. tensor := Tensor{
  182. Name: name,
  183. Kind: kind,
  184. Offset: offset,
  185. Shape: shape[:],
  186. }
  187. llm.tensors = append(llm.tensors, &tensor)
  188. llm.parameters += tensor.parameters()
  189. }
  190. // patch KV with parameter count
  191. llm.kv["general.parameter_count"] = llm.parameters
  192. alignment, ok := llm.kv["general.alignment"].(uint32)
  193. if !ok {
  194. alignment = 32
  195. }
  196. offset, err := rs.Seek(0, io.SeekCurrent)
  197. if err != nil {
  198. return err
  199. }
  200. padding := llm.padding(offset, int64(alignment))
  201. if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
  202. return err
  203. }
  204. for _, tensor := range llm.tensors {
  205. if _, err := rs.Seek(int64(tensor.Size()), io.SeekCurrent); err != nil {
  206. return err
  207. }
  208. padding := llm.padding(int64(tensor.Size()), int64(alignment))
  209. if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
  210. return err
  211. }
  212. }
  213. return nil
  214. }
  215. func readGGUF[T any](llm *gguf, r io.Reader) (T, error) {
  216. var t T
  217. err := binary.Read(r, llm.ByteOrder, &t)
  218. return t, err
  219. }
  220. func writeGGUF[V any](llm *gguf, w io.Writer, t uint32, v V) error {
  221. if err := binary.Write(w, llm.ByteOrder, t); err != nil {
  222. return err
  223. }
  224. return binary.Write(w, llm.ByteOrder, v)
  225. }
  226. func readGGUFV1String(llm *gguf, r io.Reader) (string, error) {
  227. var length uint64
  228. if err := binary.Read(r, llm.ByteOrder, &length); err != nil {
  229. return "", err
  230. }
  231. var b bytes.Buffer
  232. if _, err := io.CopyN(&b, r, int64(length)); err != nil {
  233. return "", err
  234. }
  235. // gguf v1 strings are null-terminated
  236. b.Truncate(b.Len() - 1)
  237. return b.String(), nil
  238. }
  239. func readGGUFString(llm *gguf, r io.Reader) (string, error) {
  240. if llm.Version == 1 {
  241. return readGGUFV1String(llm, r)
  242. }
  243. var length uint64
  244. if err := binary.Read(r, llm.ByteOrder, &length); err != nil {
  245. return "", err
  246. }
  247. var b bytes.Buffer
  248. if _, err := io.CopyN(&b, r, int64(length)); err != nil {
  249. return "", err
  250. }
  251. return b.String(), nil
  252. }
  253. func writeGGUFString(llm *gguf, w io.Writer, s string) error {
  254. if err := binary.Write(w, llm.ByteOrder, ggufTypeString); err != nil {
  255. return err
  256. }
  257. if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
  258. return err
  259. }
  260. _, err := io.Copy(w, strings.NewReader(s))
  261. return err
  262. }
  263. func readGGUFV1Array(llm *gguf, r io.Reader) (a []any, err error) {
  264. t, err := readGGUF[uint32](llm, r)
  265. if err != nil {
  266. return nil, err
  267. }
  268. n, err := readGGUF[uint32](llm, r)
  269. if err != nil {
  270. return nil, err
  271. }
  272. for i := 0; uint32(i) < n; i++ {
  273. var e any
  274. switch t {
  275. case ggufTypeUint8:
  276. e, err = readGGUF[uint8](llm, r)
  277. case ggufTypeInt8:
  278. e, err = readGGUF[int8](llm, r)
  279. case ggufTypeUint16:
  280. e, err = readGGUF[uint16](llm, r)
  281. case ggufTypeInt16:
  282. e, err = readGGUF[int16](llm, r)
  283. case ggufTypeUint32:
  284. e, err = readGGUF[uint32](llm, r)
  285. case ggufTypeInt32:
  286. e, err = readGGUF[int32](llm, r)
  287. case ggufTypeUint64:
  288. e, err = readGGUF[uint64](llm, r)
  289. case ggufTypeInt64:
  290. e, err = readGGUF[int64](llm, r)
  291. case ggufTypeFloat32:
  292. e, err = readGGUF[float32](llm, r)
  293. case ggufTypeFloat64:
  294. e, err = readGGUF[float64](llm, r)
  295. case ggufTypeBool:
  296. e, err = readGGUF[bool](llm, r)
  297. case ggufTypeString:
  298. e, err = readGGUFV1String(llm, r)
  299. default:
  300. return nil, fmt.Errorf("invalid array type: %d", t)
  301. }
  302. if err != nil {
  303. return nil, err
  304. }
  305. a = append(a, e)
  306. }
  307. return
  308. }
  309. func readGGUFArray(llm *gguf, r io.Reader) (a []any, err error) {
  310. if llm.Version == 1 {
  311. return readGGUFV1Array(llm, r)
  312. }
  313. t, err := readGGUF[uint32](llm, r)
  314. if err != nil {
  315. return nil, err
  316. }
  317. n, err := readGGUF[uint64](llm, r)
  318. if err != nil {
  319. return nil, err
  320. }
  321. for i := 0; uint64(i) < n; i++ {
  322. var e any
  323. switch t {
  324. case ggufTypeUint8:
  325. e, err = readGGUF[uint8](llm, r)
  326. case ggufTypeInt8:
  327. e, err = readGGUF[int8](llm, r)
  328. case ggufTypeUint16:
  329. e, err = readGGUF[uint16](llm, r)
  330. case ggufTypeInt16:
  331. e, err = readGGUF[int16](llm, r)
  332. case ggufTypeUint32:
  333. e, err = readGGUF[uint32](llm, r)
  334. case ggufTypeInt32:
  335. e, err = readGGUF[int32](llm, r)
  336. case ggufTypeUint64:
  337. e, err = readGGUF[uint64](llm, r)
  338. case ggufTypeInt64:
  339. e, err = readGGUF[int64](llm, r)
  340. case ggufTypeFloat32:
  341. e, err = readGGUF[float32](llm, r)
  342. case ggufTypeFloat64:
  343. e, err = readGGUF[float64](llm, r)
  344. case ggufTypeBool:
  345. e, err = readGGUF[bool](llm, r)
  346. case ggufTypeString:
  347. e, err = readGGUFString(llm, r)
  348. default:
  349. return nil, fmt.Errorf("invalid array type: %d", t)
  350. }
  351. if err != nil {
  352. return nil, err
  353. }
  354. a = append(a, e)
  355. }
  356. return
  357. }
  358. func writeGGUFArray[S ~[]E, E any](llm *gguf, w io.Writer, t uint32, s S) error {
  359. if err := binary.Write(w, llm.ByteOrder, ggufTypeArray); err != nil {
  360. return err
  361. }
  362. if err := binary.Write(w, llm.ByteOrder, t); err != nil {
  363. return err
  364. }
  365. if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
  366. return err
  367. }
  368. for _, e := range s {
  369. if err := binary.Write(w, llm.ByteOrder, e); err != nil {
  370. return err
  371. }
  372. }
  373. return nil
  374. }
  375. var ggufKVOrder = map[string][]string{
  376. "llama": {
  377. "general.architecture",
  378. "general.name",
  379. "llama.vocab_size",
  380. "llama.context_length",
  381. "llama.embedding_length",
  382. "llama.block_count",
  383. "llama.feed_forward_length",
  384. "llama.attention.head_count",
  385. "llama.attention.head_count_kv",
  386. "llama.attention.layer_norm_rms_epsilon",
  387. "llama.rope.freq_base",
  388. "llama.rope.dimension_count",
  389. "llama.expert_count",
  390. "llama.expert_used_count",
  391. "gemma.context_length",
  392. "gemma.embedding_length",
  393. "gemma.block_count",
  394. "gemma.feed_forward_length",
  395. "gemma.attention.head_count",
  396. "gemma.attention.head_count_kv",
  397. "gemma.attention.layer_norm_rms_epsilon",
  398. "gemma.attention.key_length",
  399. "gemma.attention.value_length",
  400. "general.file_type",
  401. "tokenizer.ggml.pre",
  402. "tokenizer.ggml.model",
  403. "tokenizer.ggml.tokens",
  404. "tokenizer.ggml.scores",
  405. "tokenizer.ggml.merges",
  406. "tokenizer.ggml.token_type",
  407. "tokenizer.ggml.bos_token_id",
  408. "tokenizer.ggml.eos_token_id",
  409. "tokenizer.ggml.unknown_token_id",
  410. "tokenizer.ggml.padding_token_id",
  411. "tokenizer.ggml.add_bos_token",
  412. "tokenizer.ggml.add_eos_token",
  413. "tokenizer.chat_template",
  414. },
  415. }
  416. func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
  417. switch llm.Version {
  418. case 3:
  419. llm.V3.NumTensor = uint64(len(tensors))
  420. llm.V3.NumKV = uint64(len(kv))
  421. default:
  422. return fmt.Errorf("not implemented: ggufv%d", llm.Version)
  423. }
  424. if err := binary.Write(ws, llm.ByteOrder, []byte("GGUF")); err != nil {
  425. return err
  426. }
  427. if err := binary.Write(ws, llm.ByteOrder, llm.Version); err != nil {
  428. return err
  429. }
  430. if err := binary.Write(ws, llm.ByteOrder, llm.numTensor()); err != nil {
  431. return err
  432. }
  433. if err := binary.Write(ws, llm.ByteOrder, llm.numKV()); err != nil {
  434. return err
  435. }
  436. kvCheck := make(map[string]bool)
  437. for k := range kv {
  438. kvCheck[k] = false
  439. }
  440. for _, k := range ggufKVOrder["llama"] {
  441. v, ok := kv[k]
  442. if !ok {
  443. continue
  444. }
  445. kvCheck[k] = true
  446. if err := binary.Write(ws, llm.ByteOrder, uint64(len(k))); err != nil {
  447. return err
  448. }
  449. if err := binary.Write(ws, llm.ByteOrder, []byte(k)); err != nil {
  450. return err
  451. }
  452. var err error
  453. switch v := v.(type) {
  454. case uint32:
  455. err = writeGGUF(llm, ws, ggufTypeUint32, v)
  456. case float32:
  457. err = writeGGUF(llm, ws, ggufTypeFloat32, v)
  458. case bool:
  459. err = writeGGUF(llm, ws, ggufTypeBool, v)
  460. case string:
  461. err = writeGGUFString(llm, ws, v)
  462. case []int32:
  463. err = writeGGUFArray(llm, ws, ggufTypeInt32, v)
  464. case []uint32:
  465. err = writeGGUFArray(llm, ws, ggufTypeUint32, v)
  466. case []float32:
  467. err = writeGGUFArray(llm, ws, ggufTypeFloat32, v)
  468. case []string:
  469. if err := binary.Write(ws, llm.ByteOrder, ggufTypeArray); err != nil {
  470. return err
  471. }
  472. if err := binary.Write(ws, llm.ByteOrder, ggufTypeString); err != nil {
  473. return err
  474. }
  475. if err := binary.Write(ws, llm.ByteOrder, uint64(len(v))); err != nil {
  476. return err
  477. }
  478. for _, e := range v {
  479. if err := binary.Write(ws, llm.ByteOrder, uint64(len(e))); err != nil {
  480. return err
  481. }
  482. if err := binary.Write(ws, llm.ByteOrder, []byte(e)); err != nil {
  483. return err
  484. }
  485. }
  486. default:
  487. return fmt.Errorf("improper type for '%s'", k)
  488. }
  489. if err != nil {
  490. return err
  491. }
  492. }
  493. for k, v := range kvCheck {
  494. if !v {
  495. return fmt.Errorf("Didn't know how to write kv %s", k)
  496. }
  497. }
  498. for _, tensor := range tensors {
  499. if err := binary.Write(ws, llm.ByteOrder, uint64(len(tensor.Name))); err != nil {
  500. return err
  501. }
  502. if err := binary.Write(ws, llm.ByteOrder, []byte(tensor.Name)); err != nil {
  503. return err
  504. }
  505. dims := 0
  506. for cnt := 0; cnt < len(tensor.Shape); cnt++ {
  507. if tensor.Shape[cnt] > 0 {
  508. dims++
  509. }
  510. }
  511. if err := binary.Write(ws, llm.ByteOrder, uint32(dims)); err != nil {
  512. return err
  513. }
  514. for i := 0; i < dims; i++ {
  515. if err := binary.Write(ws, llm.ByteOrder, uint64(tensor.Shape[dims-1-i])); err != nil {
  516. return err
  517. }
  518. }
  519. if err := binary.Write(ws, llm.ByteOrder, tensor.Kind); err != nil {
  520. return err
  521. }
  522. if err := binary.Write(ws, llm.ByteOrder, tensor.Offset); err != nil {
  523. return err
  524. }
  525. }
  526. offset, err := ws.Seek(0, io.SeekCurrent)
  527. if err != nil {
  528. return err
  529. }
  530. var alignment int64 = 32
  531. padding := llm.padding(offset, alignment)
  532. if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
  533. return err
  534. }
  535. for _, tensor := range tensors {
  536. if _, err := tensor.WriteTo(ws); err != nil {
  537. return err
  538. }
  539. offset, err := ws.Seek(0, io.SeekCurrent)
  540. if err != nil {
  541. return err
  542. }
  543. padding := llm.padding(offset, alignment)
  544. if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
  545. return err
  546. }
  547. }
  548. return nil
  549. }
  550. func (gguf) padding(offset, align int64) int64 {
  551. return (align - offset%align) % align
  552. }