gguf.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668
  1. package llm
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "fmt"
  6. "io"
  7. "strings"
  8. "log/slog"
  9. )
  10. type containerGGUF struct {
  11. ByteOrder binary.ByteOrder
  12. Version uint32
  13. V1 struct {
  14. NumTensor uint32
  15. NumKV uint32
  16. }
  17. V2 struct {
  18. NumTensor uint64
  19. NumKV uint64
  20. }
  21. V3 struct {
  22. NumTensor uint64
  23. NumKV uint64
  24. }
  25. }
  26. func (c *containerGGUF) Name() string {
  27. return "gguf"
  28. }
  29. func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) {
  30. if err := binary.Read(rs, c.ByteOrder, &c.Version); err != nil {
  31. return nil, err
  32. }
  33. var err error
  34. switch c.Version {
  35. case 1:
  36. err = binary.Read(rs, c.ByteOrder, &c.V1)
  37. case 2:
  38. err = binary.Read(rs, c.ByteOrder, &c.V2)
  39. default:
  40. err = binary.Read(rs, c.ByteOrder, &c.V3)
  41. }
  42. if err != nil {
  43. return nil, err
  44. }
  45. model := newGGUF(c)
  46. slog.Debug(fmt.Sprintf("model = %#v", model))
  47. if err := model.decode(rs); err != nil {
  48. return nil, err
  49. }
  50. return model, nil
  51. }
  52. const (
  53. _ uint32 = iota
  54. GGUFTokenNormal
  55. GGUFTokenUnknown
  56. GGUFTokenControl
  57. GGUFTokenUserDefined
  58. GGUFTokenUnused
  59. GGUFTokenByte
  60. )
  61. const (
  62. ggufTypeUint8 uint32 = iota
  63. ggufTypeInt8
  64. ggufTypeUint16
  65. ggufTypeInt16
  66. ggufTypeUint32
  67. ggufTypeInt32
  68. ggufTypeFloat32
  69. ggufTypeBool
  70. ggufTypeString
  71. ggufTypeArray
  72. ggufTypeUint64
  73. ggufTypeInt64
  74. ggufTypeFloat64
  75. )
  76. type gguf struct {
  77. *containerGGUF
  78. offset int64
  79. kv KV
  80. tensors []*Tensor
  81. parameters uint64
  82. }
  83. func newGGUF(container *containerGGUF) *gguf {
  84. return &gguf{
  85. containerGGUF: container,
  86. kv: make(KV),
  87. }
  88. }
  89. func NewGGUFV3(bo binary.ByteOrder) *gguf {
  90. return newGGUF(&containerGGUF{ByteOrder: bo, Version: 3})
  91. }
  92. func (llm *gguf) KV() KV {
  93. return llm.kv
  94. }
  95. func (llm *gguf) Tensors() Tensors {
  96. return llm.tensors
  97. }
  98. func (llm *gguf) Offset() int64 {
  99. return llm.offset
  100. }
  101. func (llm *gguf) numTensor() uint64 {
  102. switch llm.Version {
  103. case 1:
  104. return uint64(llm.V1.NumTensor)
  105. case 2:
  106. return llm.V2.NumTensor
  107. default:
  108. return llm.V3.NumTensor
  109. }
  110. }
  111. func (llm *gguf) numKV() uint64 {
  112. switch llm.Version {
  113. case 1:
  114. return uint64(llm.V1.NumKV)
  115. case 2:
  116. return llm.V2.NumKV
  117. default:
  118. return llm.V3.NumKV
  119. }
  120. }
  121. func (llm *gguf) decode(rs io.ReadSeeker) error {
  122. // decode key-values
  123. for i := 0; uint64(i) < llm.numKV(); i++ {
  124. k, err := readGGUFString(llm, rs)
  125. if err != nil {
  126. return err
  127. }
  128. t, err := readGGUF[uint32](llm, rs)
  129. if err != nil {
  130. return err
  131. }
  132. var v any
  133. switch t {
  134. case ggufTypeUint8:
  135. v, err = readGGUF[uint8](llm, rs)
  136. case ggufTypeInt8:
  137. v, err = readGGUF[int8](llm, rs)
  138. case ggufTypeUint16:
  139. v, err = readGGUF[uint16](llm, rs)
  140. case ggufTypeInt16:
  141. v, err = readGGUF[int16](llm, rs)
  142. case ggufTypeUint32:
  143. v, err = readGGUF[uint32](llm, rs)
  144. case ggufTypeInt32:
  145. v, err = readGGUF[int32](llm, rs)
  146. case ggufTypeUint64:
  147. v, err = readGGUF[uint64](llm, rs)
  148. case ggufTypeInt64:
  149. v, err = readGGUF[int64](llm, rs)
  150. case ggufTypeFloat32:
  151. v, err = readGGUF[float32](llm, rs)
  152. case ggufTypeFloat64:
  153. v, err = readGGUF[float64](llm, rs)
  154. case ggufTypeBool:
  155. v, err = readGGUF[bool](llm, rs)
  156. case ggufTypeString:
  157. v, err = readGGUFString(llm, rs)
  158. case ggufTypeArray:
  159. v, err = readGGUFArray(llm, rs)
  160. default:
  161. return fmt.Errorf("invalid type: %d", t)
  162. }
  163. if err != nil {
  164. return err
  165. }
  166. llm.kv[k] = v
  167. }
  168. // decode tensors
  169. for i := 0; uint64(i) < llm.numTensor(); i++ {
  170. name, err := readGGUFString(llm, rs)
  171. if err != nil {
  172. return err
  173. }
  174. // dims is the number of dimensions in the tensor
  175. dims, err := readGGUF[uint32](llm, rs)
  176. if err != nil {
  177. return err
  178. }
  179. shape := [4]uint64{1, 1, 1, 1}
  180. for i := 0; uint32(i) < dims; i++ {
  181. shape[i], err = readGGUF[uint64](llm, rs)
  182. if err != nil {
  183. return err
  184. }
  185. }
  186. kind, err := readGGUF[uint32](llm, rs)
  187. if err != nil {
  188. return err
  189. }
  190. offset, err := readGGUF[uint64](llm, rs)
  191. if err != nil {
  192. return err
  193. }
  194. tensor := Tensor{
  195. Name: name,
  196. Kind: kind,
  197. Offset: offset,
  198. Shape: shape[:],
  199. }
  200. llm.tensors = append(llm.tensors, &tensor)
  201. llm.parameters += tensor.parameters()
  202. }
  203. // patch KV with parameter count
  204. llm.kv["general.parameter_count"] = llm.parameters
  205. alignment, ok := llm.kv["general.alignment"].(uint32)
  206. if !ok {
  207. alignment = 32
  208. }
  209. offset, err := rs.Seek(0, io.SeekCurrent)
  210. if err != nil {
  211. return err
  212. }
  213. padding := llm.padding(offset, int64(alignment))
  214. if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
  215. return err
  216. }
  217. llm.offset = offset + padding
  218. for _, tensor := range llm.tensors {
  219. if _, err := rs.Seek(int64(tensor.size()), io.SeekCurrent); err != nil {
  220. return err
  221. }
  222. padding := llm.padding(int64(tensor.size()), int64(alignment))
  223. if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
  224. return err
  225. }
  226. }
  227. return nil
  228. }
  229. func readGGUF[T any](llm *gguf, r io.Reader) (T, error) {
  230. var t T
  231. err := binary.Read(r, llm.ByteOrder, &t)
  232. return t, err
  233. }
  234. func writeGGUF[V any](llm *gguf, w io.Writer, t uint32, v V) error {
  235. if err := binary.Write(w, llm.ByteOrder, t); err != nil {
  236. return err
  237. }
  238. return binary.Write(w, llm.ByteOrder, v)
  239. }
  240. func readGGUFV1String(llm *gguf, r io.Reader) (string, error) {
  241. var length uint64
  242. if err := binary.Read(r, llm.ByteOrder, &length); err != nil {
  243. return "", err
  244. }
  245. var b bytes.Buffer
  246. if _, err := io.CopyN(&b, r, int64(length)); err != nil {
  247. return "", err
  248. }
  249. // gguf v1 strings are null-terminated
  250. b.Truncate(b.Len() - 1)
  251. return b.String(), nil
  252. }
  253. func readGGUFString(llm *gguf, r io.Reader) (string, error) {
  254. if llm.Version == 1 {
  255. return readGGUFV1String(llm, r)
  256. }
  257. var length uint64
  258. if err := binary.Read(r, llm.ByteOrder, &length); err != nil {
  259. return "", err
  260. }
  261. var b bytes.Buffer
  262. if _, err := io.CopyN(&b, r, int64(length)); err != nil {
  263. return "", err
  264. }
  265. return b.String(), nil
  266. }
  267. func writeGGUFString(llm *gguf, w io.Writer, s string) error {
  268. if err := binary.Write(w, llm.ByteOrder, ggufTypeString); err != nil {
  269. return err
  270. }
  271. if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
  272. return err
  273. }
  274. _, err := io.Copy(w, strings.NewReader(s))
  275. return err
  276. }
  277. func readGGUFV1Array(llm *gguf, r io.Reader) (a []any, err error) {
  278. t, err := readGGUF[uint32](llm, r)
  279. if err != nil {
  280. return nil, err
  281. }
  282. n, err := readGGUF[uint32](llm, r)
  283. if err != nil {
  284. return nil, err
  285. }
  286. for i := 0; uint32(i) < n; i++ {
  287. var e any
  288. switch t {
  289. case ggufTypeUint8:
  290. e, err = readGGUF[uint8](llm, r)
  291. case ggufTypeInt8:
  292. e, err = readGGUF[int8](llm, r)
  293. case ggufTypeUint16:
  294. e, err = readGGUF[uint16](llm, r)
  295. case ggufTypeInt16:
  296. e, err = readGGUF[int16](llm, r)
  297. case ggufTypeUint32:
  298. e, err = readGGUF[uint32](llm, r)
  299. case ggufTypeInt32:
  300. e, err = readGGUF[int32](llm, r)
  301. case ggufTypeUint64:
  302. e, err = readGGUF[uint64](llm, r)
  303. case ggufTypeInt64:
  304. e, err = readGGUF[int64](llm, r)
  305. case ggufTypeFloat32:
  306. e, err = readGGUF[float32](llm, r)
  307. case ggufTypeFloat64:
  308. e, err = readGGUF[float64](llm, r)
  309. case ggufTypeBool:
  310. e, err = readGGUF[bool](llm, r)
  311. case ggufTypeString:
  312. e, err = readGGUFV1String(llm, r)
  313. default:
  314. return nil, fmt.Errorf("invalid array type: %d", t)
  315. }
  316. if err != nil {
  317. return nil, err
  318. }
  319. a = append(a, e)
  320. }
  321. return
  322. }
  323. func readGGUFArray(llm *gguf, r io.Reader) (a []any, err error) {
  324. if llm.Version == 1 {
  325. return readGGUFV1Array(llm, r)
  326. }
  327. t, err := readGGUF[uint32](llm, r)
  328. if err != nil {
  329. return nil, err
  330. }
  331. n, err := readGGUF[uint64](llm, r)
  332. if err != nil {
  333. return nil, err
  334. }
  335. for i := 0; uint64(i) < n; i++ {
  336. var e any
  337. switch t {
  338. case ggufTypeUint8:
  339. e, err = readGGUF[uint8](llm, r)
  340. case ggufTypeInt8:
  341. e, err = readGGUF[int8](llm, r)
  342. case ggufTypeUint16:
  343. e, err = readGGUF[uint16](llm, r)
  344. case ggufTypeInt16:
  345. e, err = readGGUF[int16](llm, r)
  346. case ggufTypeUint32:
  347. e, err = readGGUF[uint32](llm, r)
  348. case ggufTypeInt32:
  349. e, err = readGGUF[int32](llm, r)
  350. case ggufTypeUint64:
  351. e, err = readGGUF[uint64](llm, r)
  352. case ggufTypeInt64:
  353. e, err = readGGUF[int64](llm, r)
  354. case ggufTypeFloat32:
  355. e, err = readGGUF[float32](llm, r)
  356. case ggufTypeFloat64:
  357. e, err = readGGUF[float64](llm, r)
  358. case ggufTypeBool:
  359. e, err = readGGUF[bool](llm, r)
  360. case ggufTypeString:
  361. e, err = readGGUFString(llm, r)
  362. default:
  363. return nil, fmt.Errorf("invalid array type: %d", t)
  364. }
  365. if err != nil {
  366. return nil, err
  367. }
  368. a = append(a, e)
  369. }
  370. return
  371. }
  372. func writeGGUFArray[S ~[]E, E any](llm *gguf, w io.Writer, t uint32, s S) error {
  373. if err := binary.Write(w, llm.ByteOrder, ggufTypeArray); err != nil {
  374. return err
  375. }
  376. if err := binary.Write(w, llm.ByteOrder, t); err != nil {
  377. return err
  378. }
  379. if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
  380. return err
  381. }
  382. for _, e := range s {
  383. if err := binary.Write(w, llm.ByteOrder, e); err != nil {
  384. return err
  385. }
  386. }
  387. return nil
  388. }
  389. var ggufKVOrder = map[string][]string{
  390. "llama": {
  391. "general.architecture",
  392. "general.name",
  393. "llama.vocab_size",
  394. "llama.context_length",
  395. "llama.embedding_length",
  396. "llama.block_count",
  397. "llama.feed_forward_length",
  398. "llama.attention.head_count",
  399. "llama.attention.head_count_kv",
  400. "llama.attention.layer_norm_rms_epsilon",
  401. "llama.rope.freq_base",
  402. "llama.rope.dimension_count",
  403. "llama.expert_count",
  404. "llama.expert_used_count",
  405. "gemma.context_length",
  406. "gemma.embedding_length",
  407. "gemma.block_count",
  408. "gemma.feed_forward_length",
  409. "gemma.attention.head_count",
  410. "gemma.attention.head_count_kv",
  411. "gemma.attention.layer_norm_rms_epsilon",
  412. "gemma.attention.key_length",
  413. "gemma.attention.value_length",
  414. "general.file_type",
  415. "tokenizer.ggml.model",
  416. "tokenizer.ggml.tokens",
  417. "tokenizer.ggml.scores",
  418. "tokenizer.ggml.token_type",
  419. "tokenizer.ggml.bos_token_id",
  420. "tokenizer.ggml.eos_token_id",
  421. "tokenizer.ggml.unknown_token_id",
  422. "tokenizer.ggml.padding_token_id",
  423. "tokenizer.ggml.add_bos_token",
  424. "tokenizer.ggml.add_eos_token",
  425. "tokenizer.chat_template",
  426. },
  427. }
  428. func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
  429. switch llm.Version {
  430. case 3:
  431. llm.V3.NumTensor = uint64(len(tensors))
  432. llm.V3.NumKV = uint64(len(kv))
  433. default:
  434. return fmt.Errorf("not implemented: ggufv%d", llm.Version)
  435. }
  436. if err := binary.Write(ws, llm.ByteOrder, []byte("GGUF")); err != nil {
  437. return err
  438. }
  439. if err := binary.Write(ws, llm.ByteOrder, llm.Version); err != nil {
  440. return err
  441. }
  442. if err := binary.Write(ws, llm.ByteOrder, llm.numTensor()); err != nil {
  443. return err
  444. }
  445. if err := binary.Write(ws, llm.ByteOrder, llm.numKV()); err != nil {
  446. return err
  447. }
  448. kvCheck := make(map[string]bool)
  449. for k := range kv {
  450. kvCheck[k] = false
  451. }
  452. for _, k := range ggufKVOrder["llama"] {
  453. v, ok := kv[k]
  454. if !ok {
  455. continue
  456. }
  457. kvCheck[k] = true
  458. if err := binary.Write(ws, llm.ByteOrder, uint64(len(k))); err != nil {
  459. return err
  460. }
  461. if err := binary.Write(ws, llm.ByteOrder, []byte(k)); err != nil {
  462. return err
  463. }
  464. var err error
  465. switch v := v.(type) {
  466. case uint32:
  467. err = writeGGUF(llm, ws, ggufTypeUint32, v)
  468. case float32:
  469. err = writeGGUF(llm, ws, ggufTypeFloat32, v)
  470. case bool:
  471. err = writeGGUF(llm, ws, ggufTypeBool, v)
  472. case string:
  473. err = writeGGUFString(llm, ws, v)
  474. case []int32:
  475. err = writeGGUFArray(llm, ws, ggufTypeInt32, v)
  476. case []uint32:
  477. err = writeGGUFArray(llm, ws, ggufTypeUint32, v)
  478. case []float32:
  479. err = writeGGUFArray(llm, ws, ggufTypeFloat32, v)
  480. case []string:
  481. if err := binary.Write(ws, llm.ByteOrder, ggufTypeArray); err != nil {
  482. return err
  483. }
  484. if err := binary.Write(ws, llm.ByteOrder, ggufTypeString); err != nil {
  485. return err
  486. }
  487. if err := binary.Write(ws, llm.ByteOrder, uint64(len(v))); err != nil {
  488. return err
  489. }
  490. for _, e := range v {
  491. if err := binary.Write(ws, llm.ByteOrder, uint64(len(e))); err != nil {
  492. return err
  493. }
  494. if err := binary.Write(ws, llm.ByteOrder, []byte(e)); err != nil {
  495. return err
  496. }
  497. }
  498. default:
  499. return fmt.Errorf("improper type for '%s'", k)
  500. }
  501. if err != nil {
  502. return err
  503. }
  504. }
  505. for k, v := range kvCheck {
  506. if !v {
  507. return fmt.Errorf("Didn't know how to write kv %s", k)
  508. }
  509. }
  510. for _, tensor := range tensors {
  511. if err := binary.Write(ws, llm.ByteOrder, uint64(len(tensor.Name))); err != nil {
  512. return err
  513. }
  514. if err := binary.Write(ws, llm.ByteOrder, []byte(tensor.Name)); err != nil {
  515. return err
  516. }
  517. dims := 0
  518. for cnt := 0; cnt < len(tensor.Shape); cnt++ {
  519. if tensor.Shape[cnt] > 0 {
  520. dims++
  521. }
  522. }
  523. if err := binary.Write(ws, llm.ByteOrder, uint32(dims)); err != nil {
  524. return err
  525. }
  526. for i := 0; i < dims; i++ {
  527. if err := binary.Write(ws, llm.ByteOrder, uint64(tensor.Shape[dims-1-i])); err != nil {
  528. return err
  529. }
  530. }
  531. if err := binary.Write(ws, llm.ByteOrder, tensor.Kind); err != nil {
  532. return err
  533. }
  534. if err := binary.Write(ws, llm.ByteOrder, tensor.Offset); err != nil {
  535. return err
  536. }
  537. }
  538. offset, err := ws.Seek(0, io.SeekCurrent)
  539. if err != nil {
  540. return err
  541. }
  542. var alignment int64 = 32
  543. padding := llm.padding(offset, alignment)
  544. if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
  545. return err
  546. }
  547. for _, tensor := range tensors {
  548. if _, err := tensor.WriteTo(ws); err != nil {
  549. return err
  550. }
  551. offset, err := ws.Seek(0, io.SeekCurrent)
  552. if err != nil {
  553. return err
  554. }
  555. padding := llm.padding(offset, alignment)
  556. if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
  557. return err
  558. }
  559. }
  560. return nil
  561. }
  562. func (gguf) padding(offset, align int64) int64 {
  563. return (align - offset%align) % align
  564. }