gguf.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656
  1. package llm
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "fmt"
  6. "io"
  7. "strings"
  8. "log/slog"
  9. )
  10. type containerGGUF struct {
  11. ByteOrder binary.ByteOrder
  12. Version uint32
  13. V1 struct {
  14. NumTensor uint32
  15. NumKV uint32
  16. }
  17. V2 struct {
  18. NumTensor uint64
  19. NumKV uint64
  20. }
  21. V3 struct {
  22. NumTensor uint64
  23. NumKV uint64
  24. }
  25. }
  26. func (c *containerGGUF) Name() string {
  27. return "gguf"
  28. }
  29. func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) {
  30. var version [4]byte
  31. if err := binary.Read(rs, c.ByteOrder, &version); err != nil {
  32. return nil, err
  33. }
  34. // if the lower 16 bits are 0, the byte order is probably wrong
  35. if c.ByteOrder.Uint32(version[:])&1<<4 == 0 {
  36. switch c.ByteOrder {
  37. case binary.LittleEndian:
  38. c.ByteOrder = binary.BigEndian
  39. case binary.BigEndian:
  40. c.ByteOrder = binary.LittleEndian
  41. }
  42. }
  43. c.Version = c.ByteOrder.Uint32(version[:])
  44. var err error
  45. switch c.Version {
  46. case 1:
  47. err = binary.Read(rs, c.ByteOrder, &c.V1)
  48. case 2:
  49. err = binary.Read(rs, c.ByteOrder, &c.V2)
  50. default:
  51. err = binary.Read(rs, c.ByteOrder, &c.V3)
  52. }
  53. if err != nil {
  54. return nil, err
  55. }
  56. model := newGGUF(c)
  57. slog.Debug(fmt.Sprintf("model = %#v", model))
  58. if err := model.Decode(rs); err != nil {
  59. return nil, err
  60. }
  61. return model, nil
  62. }
  63. const (
  64. ggufTypeUint8 uint32 = iota
  65. ggufTypeInt8
  66. ggufTypeUint16
  67. ggufTypeInt16
  68. ggufTypeUint32
  69. ggufTypeInt32
  70. ggufTypeFloat32
  71. ggufTypeBool
  72. ggufTypeString
  73. ggufTypeArray
  74. ggufTypeUint64
  75. ggufTypeInt64
  76. ggufTypeFloat64
  77. )
  78. type gguf struct {
  79. *containerGGUF
  80. kv KV
  81. tensors []*Tensor
  82. parameters uint64
  83. }
  84. func newGGUF(container *containerGGUF) *gguf {
  85. return &gguf{
  86. containerGGUF: container,
  87. kv: make(KV),
  88. }
  89. }
  90. func NewGGUFV3(bo binary.ByteOrder) *gguf {
  91. return newGGUF(&containerGGUF{ByteOrder: bo, Version: 3})
  92. }
  93. func (llm *gguf) KV() KV {
  94. return llm.kv
  95. }
  96. func (llm *gguf) Tensors() Tensors {
  97. return llm.tensors
  98. }
  99. func (llm *gguf) numTensor() uint64 {
  100. switch llm.Version {
  101. case 1:
  102. return uint64(llm.V1.NumTensor)
  103. case 2:
  104. return llm.V2.NumTensor
  105. default:
  106. return llm.V3.NumTensor
  107. }
  108. }
  109. func (llm *gguf) numKV() uint64 {
  110. switch llm.Version {
  111. case 1:
  112. return uint64(llm.V1.NumKV)
  113. case 2:
  114. return llm.V2.NumKV
  115. default:
  116. return llm.V3.NumKV
  117. }
  118. }
  119. func (llm *gguf) Decode(rs io.ReadSeeker) error {
  120. // decode key-values
  121. for i := 0; uint64(i) < llm.numKV(); i++ {
  122. k, err := readGGUFString(llm, rs)
  123. if err != nil {
  124. return err
  125. }
  126. t, err := readGGUF[uint32](llm, rs)
  127. if err != nil {
  128. return err
  129. }
  130. var v any
  131. switch t {
  132. case ggufTypeUint8:
  133. v, err = readGGUF[uint8](llm, rs)
  134. case ggufTypeInt8:
  135. v, err = readGGUF[int8](llm, rs)
  136. case ggufTypeUint16:
  137. v, err = readGGUF[uint16](llm, rs)
  138. case ggufTypeInt16:
  139. v, err = readGGUF[int16](llm, rs)
  140. case ggufTypeUint32:
  141. v, err = readGGUF[uint32](llm, rs)
  142. case ggufTypeInt32:
  143. v, err = readGGUF[int32](llm, rs)
  144. case ggufTypeUint64:
  145. v, err = readGGUF[uint64](llm, rs)
  146. case ggufTypeInt64:
  147. v, err = readGGUF[int64](llm, rs)
  148. case ggufTypeFloat32:
  149. v, err = readGGUF[float32](llm, rs)
  150. case ggufTypeFloat64:
  151. v, err = readGGUF[float64](llm, rs)
  152. case ggufTypeBool:
  153. v, err = readGGUF[bool](llm, rs)
  154. case ggufTypeString:
  155. v, err = readGGUFString(llm, rs)
  156. case ggufTypeArray:
  157. v, err = readGGUFArray(llm, rs)
  158. default:
  159. return fmt.Errorf("invalid type: %d", t)
  160. }
  161. if err != nil {
  162. return err
  163. }
  164. llm.kv[k] = v
  165. }
  166. // decode tensors
  167. for i := 0; uint64(i) < llm.numTensor(); i++ {
  168. name, err := readGGUFString(llm, rs)
  169. if err != nil {
  170. return err
  171. }
  172. // dims is the number of dimensions in the tensor
  173. dims, err := readGGUF[uint32](llm, rs)
  174. if err != nil {
  175. return err
  176. }
  177. shape := [4]uint64{1, 1, 1, 1}
  178. for i := 0; uint32(i) < dims; i++ {
  179. shape[i], err = readGGUF[uint64](llm, rs)
  180. if err != nil {
  181. return err
  182. }
  183. }
  184. kind, err := readGGUF[uint32](llm, rs)
  185. if err != nil {
  186. return err
  187. }
  188. offset, err := readGGUF[uint64](llm, rs)
  189. if err != nil {
  190. return err
  191. }
  192. tensor := Tensor{
  193. Name: name,
  194. Kind: kind,
  195. Offset: offset,
  196. Shape: shape[:],
  197. }
  198. llm.tensors = append(llm.tensors, &tensor)
  199. llm.parameters += tensor.parameters()
  200. }
  201. // patch KV with parameter count
  202. llm.kv["general.parameter_count"] = llm.parameters
  203. alignment, ok := llm.kv["general.alignment"].(uint32)
  204. if !ok {
  205. alignment = 32
  206. }
  207. offset, err := rs.Seek(0, io.SeekCurrent)
  208. if err != nil {
  209. return err
  210. }
  211. padding := llm.padding(offset, int64(alignment))
  212. if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
  213. return err
  214. }
  215. for _, tensor := range llm.tensors {
  216. if _, err := rs.Seek(int64(tensor.Size()), io.SeekCurrent); err != nil {
  217. return err
  218. }
  219. padding := llm.padding(int64(tensor.Size()), int64(alignment))
  220. if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
  221. return err
  222. }
  223. }
  224. return nil
  225. }
  226. func readGGUF[T any](llm *gguf, r io.Reader) (T, error) {
  227. var t T
  228. err := binary.Read(r, llm.ByteOrder, &t)
  229. return t, err
  230. }
  231. func writeGGUF[V any](llm *gguf, w io.Writer, t uint32, v V) error {
  232. if err := binary.Write(w, llm.ByteOrder, t); err != nil {
  233. return err
  234. }
  235. return binary.Write(w, llm.ByteOrder, v)
  236. }
  237. func readGGUFV1String(llm *gguf, r io.Reader) (string, error) {
  238. var length uint64
  239. if err := binary.Read(r, llm.ByteOrder, &length); err != nil {
  240. return "", err
  241. }
  242. var b bytes.Buffer
  243. if _, err := io.CopyN(&b, r, int64(length)); err != nil {
  244. return "", err
  245. }
  246. // gguf v1 strings are null-terminated
  247. b.Truncate(b.Len() - 1)
  248. return b.String(), nil
  249. }
  250. func readGGUFString(llm *gguf, r io.Reader) (string, error) {
  251. if llm.Version == 1 {
  252. return readGGUFV1String(llm, r)
  253. }
  254. var length uint64
  255. if err := binary.Read(r, llm.ByteOrder, &length); err != nil {
  256. return "", err
  257. }
  258. var b bytes.Buffer
  259. if _, err := io.CopyN(&b, r, int64(length)); err != nil {
  260. return "", err
  261. }
  262. return b.String(), nil
  263. }
  264. func writeGGUFString(llm *gguf, w io.Writer, s string) error {
  265. if err := binary.Write(w, llm.ByteOrder, ggufTypeString); err != nil {
  266. return err
  267. }
  268. if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
  269. return err
  270. }
  271. _, err := io.Copy(w, strings.NewReader(s))
  272. return err
  273. }
  274. func readGGUFV1Array(llm *gguf, r io.Reader) (a []any, err error) {
  275. t, err := readGGUF[uint32](llm, r)
  276. if err != nil {
  277. return nil, err
  278. }
  279. n, err := readGGUF[uint32](llm, r)
  280. if err != nil {
  281. return nil, err
  282. }
  283. for i := 0; uint32(i) < n; i++ {
  284. var e any
  285. switch t {
  286. case ggufTypeUint8:
  287. e, err = readGGUF[uint8](llm, r)
  288. case ggufTypeInt8:
  289. e, err = readGGUF[int8](llm, r)
  290. case ggufTypeUint16:
  291. e, err = readGGUF[uint16](llm, r)
  292. case ggufTypeInt16:
  293. e, err = readGGUF[int16](llm, r)
  294. case ggufTypeUint32:
  295. e, err = readGGUF[uint32](llm, r)
  296. case ggufTypeInt32:
  297. e, err = readGGUF[int32](llm, r)
  298. case ggufTypeUint64:
  299. e, err = readGGUF[uint64](llm, r)
  300. case ggufTypeInt64:
  301. e, err = readGGUF[int64](llm, r)
  302. case ggufTypeFloat32:
  303. e, err = readGGUF[float32](llm, r)
  304. case ggufTypeFloat64:
  305. e, err = readGGUF[float64](llm, r)
  306. case ggufTypeBool:
  307. e, err = readGGUF[bool](llm, r)
  308. case ggufTypeString:
  309. e, err = readGGUFV1String(llm, r)
  310. default:
  311. return nil, fmt.Errorf("invalid array type: %d", t)
  312. }
  313. if err != nil {
  314. return nil, err
  315. }
  316. a = append(a, e)
  317. }
  318. return
  319. }
  320. func readGGUFArray(llm *gguf, r io.Reader) (a []any, err error) {
  321. if llm.Version == 1 {
  322. return readGGUFV1Array(llm, r)
  323. }
  324. t, err := readGGUF[uint32](llm, r)
  325. if err != nil {
  326. return nil, err
  327. }
  328. n, err := readGGUF[uint64](llm, r)
  329. if err != nil {
  330. return nil, err
  331. }
  332. for i := 0; uint64(i) < n; i++ {
  333. var e any
  334. switch t {
  335. case ggufTypeUint8:
  336. e, err = readGGUF[uint8](llm, r)
  337. case ggufTypeInt8:
  338. e, err = readGGUF[int8](llm, r)
  339. case ggufTypeUint16:
  340. e, err = readGGUF[uint16](llm, r)
  341. case ggufTypeInt16:
  342. e, err = readGGUF[int16](llm, r)
  343. case ggufTypeUint32:
  344. e, err = readGGUF[uint32](llm, r)
  345. case ggufTypeInt32:
  346. e, err = readGGUF[int32](llm, r)
  347. case ggufTypeUint64:
  348. e, err = readGGUF[uint64](llm, r)
  349. case ggufTypeInt64:
  350. e, err = readGGUF[int64](llm, r)
  351. case ggufTypeFloat32:
  352. e, err = readGGUF[float32](llm, r)
  353. case ggufTypeFloat64:
  354. e, err = readGGUF[float64](llm, r)
  355. case ggufTypeBool:
  356. e, err = readGGUF[bool](llm, r)
  357. case ggufTypeString:
  358. e, err = readGGUFString(llm, r)
  359. default:
  360. return nil, fmt.Errorf("invalid array type: %d", t)
  361. }
  362. if err != nil {
  363. return nil, err
  364. }
  365. a = append(a, e)
  366. }
  367. return
  368. }
  369. func writeGGUFArray[S ~[]E, E any](llm *gguf, w io.Writer, t uint32, s S) error {
  370. if err := binary.Write(w, llm.ByteOrder, ggufTypeArray); err != nil {
  371. return err
  372. }
  373. if err := binary.Write(w, llm.ByteOrder, t); err != nil {
  374. return err
  375. }
  376. if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
  377. return err
  378. }
  379. for _, e := range s {
  380. if err := binary.Write(w, llm.ByteOrder, e); err != nil {
  381. return err
  382. }
  383. }
  384. return nil
  385. }
  386. var ggufKVOrder = map[string][]string{
  387. "llama": {
  388. "general.architecture",
  389. "general.name",
  390. "llama.vocab_size",
  391. "llama.context_length",
  392. "llama.embedding_length",
  393. "llama.block_count",
  394. "llama.feed_forward_length",
  395. "llama.attention.head_count",
  396. "llama.attention.head_count_kv",
  397. "llama.attention.layer_norm_rms_epsilon",
  398. "llama.rope.freq_base",
  399. "llama.rope.dimension_count",
  400. "llama.expert_count",
  401. "llama.expert_used_count",
  402. "gemma.context_length",
  403. "gemma.embedding_length",
  404. "gemma.block_count",
  405. "gemma.feed_forward_length",
  406. "gemma.attention.head_count",
  407. "gemma.attention.head_count_kv",
  408. "gemma.attention.layer_norm_rms_epsilon",
  409. "gemma.attention.key_length",
  410. "gemma.attention.value_length",
  411. "general.file_type",
  412. "tokenizer.ggml.pre",
  413. "tokenizer.ggml.model",
  414. "tokenizer.ggml.tokens",
  415. "tokenizer.ggml.scores",
  416. "tokenizer.ggml.merges",
  417. "tokenizer.ggml.token_type",
  418. "tokenizer.ggml.bos_token_id",
  419. "tokenizer.ggml.eos_token_id",
  420. "tokenizer.ggml.unknown_token_id",
  421. "tokenizer.ggml.padding_token_id",
  422. "tokenizer.ggml.add_bos_token",
  423. "tokenizer.ggml.add_eos_token",
  424. "tokenizer.chat_template",
  425. },
  426. }
  427. func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
  428. switch llm.Version {
  429. case 3:
  430. llm.V3.NumTensor = uint64(len(tensors))
  431. llm.V3.NumKV = uint64(len(kv))
  432. default:
  433. return fmt.Errorf("not implemented: ggufv%d", llm.Version)
  434. }
  435. if err := binary.Write(ws, llm.ByteOrder, []byte("GGUF")); err != nil {
  436. return err
  437. }
  438. if err := binary.Write(ws, llm.ByteOrder, llm.Version); err != nil {
  439. return err
  440. }
  441. if err := binary.Write(ws, llm.ByteOrder, llm.numTensor()); err != nil {
  442. return err
  443. }
  444. if err := binary.Write(ws, llm.ByteOrder, llm.numKV()); err != nil {
  445. return err
  446. }
  447. kvCheck := make(map[string]bool)
  448. for k := range kv {
  449. kvCheck[k] = false
  450. }
  451. for _, k := range ggufKVOrder["llama"] {
  452. v, ok := kv[k]
  453. if !ok {
  454. continue
  455. }
  456. kvCheck[k] = true
  457. if err := binary.Write(ws, llm.ByteOrder, uint64(len(k))); err != nil {
  458. return err
  459. }
  460. if err := binary.Write(ws, llm.ByteOrder, []byte(k)); err != nil {
  461. return err
  462. }
  463. var err error
  464. switch v := v.(type) {
  465. case uint32:
  466. err = writeGGUF(llm, ws, ggufTypeUint32, v)
  467. case float32:
  468. err = writeGGUF(llm, ws, ggufTypeFloat32, v)
  469. case bool:
  470. err = writeGGUF(llm, ws, ggufTypeBool, v)
  471. case string:
  472. err = writeGGUFString(llm, ws, v)
  473. case []int32:
  474. err = writeGGUFArray(llm, ws, ggufTypeInt32, v)
  475. case []uint32:
  476. err = writeGGUFArray(llm, ws, ggufTypeUint32, v)
  477. case []float32:
  478. err = writeGGUFArray(llm, ws, ggufTypeFloat32, v)
  479. case []string:
  480. if err := binary.Write(ws, llm.ByteOrder, ggufTypeArray); err != nil {
  481. return err
  482. }
  483. if err := binary.Write(ws, llm.ByteOrder, ggufTypeString); err != nil {
  484. return err
  485. }
  486. if err := binary.Write(ws, llm.ByteOrder, uint64(len(v))); err != nil {
  487. return err
  488. }
  489. for _, e := range v {
  490. if err := binary.Write(ws, llm.ByteOrder, uint64(len(e))); err != nil {
  491. return err
  492. }
  493. if err := binary.Write(ws, llm.ByteOrder, []byte(e)); err != nil {
  494. return err
  495. }
  496. }
  497. default:
  498. return fmt.Errorf("improper type for '%s'", k)
  499. }
  500. if err != nil {
  501. return err
  502. }
  503. }
  504. for k, v := range kvCheck {
  505. if !v {
  506. return fmt.Errorf("Didn't know how to write kv %s", k)
  507. }
  508. }
  509. for _, tensor := range tensors {
  510. if err := binary.Write(ws, llm.ByteOrder, uint64(len(tensor.Name))); err != nil {
  511. return err
  512. }
  513. if err := binary.Write(ws, llm.ByteOrder, []byte(tensor.Name)); err != nil {
  514. return err
  515. }
  516. var dims int
  517. for cnt := range len(tensor.Shape) {
  518. if tensor.Shape[cnt] > 0 {
  519. dims++
  520. }
  521. }
  522. if err := binary.Write(ws, llm.ByteOrder, uint32(dims)); err != nil {
  523. return err
  524. }
  525. for i := range dims {
  526. if err := binary.Write(ws, llm.ByteOrder, tensor.Shape[dims-1-i]); err != nil {
  527. return err
  528. }
  529. }
  530. if err := binary.Write(ws, llm.ByteOrder, tensor.Kind); err != nil {
  531. return err
  532. }
  533. if err := binary.Write(ws, llm.ByteOrder, tensor.Offset); err != nil {
  534. return err
  535. }
  536. }
  537. var alignment int64 = 32
  538. for _, tensor := range tensors {
  539. offset, err := ws.Seek(0, io.SeekCurrent)
  540. if err != nil {
  541. return err
  542. }
  543. padding := llm.padding(offset, alignment)
  544. if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
  545. return err
  546. }
  547. if _, err := tensor.WriteTo(ws); err != nil {
  548. return err
  549. }
  550. }
  551. return nil
  552. }
  553. func (gguf) padding(offset, align int64) int64 {
  554. return (align - offset%align) % align
  555. }