gguf.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634
  1. package llm
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "fmt"
  6. "io"
  7. "strings"
  8. )
  9. type containerGGUF struct {
  10. ByteOrder binary.ByteOrder
  11. Version uint32
  12. V1 struct {
  13. NumTensor uint32
  14. NumKV uint32
  15. }
  16. V2 struct {
  17. NumTensor uint64
  18. NumKV uint64
  19. }
  20. V3 struct {
  21. NumTensor uint64
  22. NumKV uint64
  23. }
  24. }
  25. func (c *containerGGUF) Name() string {
  26. return "gguf"
  27. }
  28. func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) {
  29. if err := binary.Read(rs, c.ByteOrder, &c.Version); err != nil {
  30. return nil, err
  31. }
  32. var err error
  33. switch c.Version {
  34. case 1:
  35. err = binary.Read(rs, c.ByteOrder, &c.V1)
  36. case 2:
  37. err = binary.Read(rs, c.ByteOrder, &c.V2)
  38. default:
  39. err = binary.Read(rs, c.ByteOrder, &c.V3)
  40. }
  41. if err != nil {
  42. return nil, err
  43. }
  44. model := newGGUF(c)
  45. if err := model.Decode(rs); err != nil {
  46. return nil, err
  47. }
  48. return model, nil
  49. }
  50. const (
  51. _ uint32 = iota
  52. GGUFTokenNormal
  53. GGUFTokenUnknown
  54. GGUFTokenControl
  55. GGUFTokenUserDefined
  56. GGUFTokenUnused
  57. GGUFTokenByte
  58. )
  59. const (
  60. ggufTypeUint8 uint32 = iota
  61. ggufTypeInt8
  62. ggufTypeUint16
  63. ggufTypeInt16
  64. ggufTypeUint32
  65. ggufTypeInt32
  66. ggufTypeFloat32
  67. ggufTypeBool
  68. ggufTypeString
  69. ggufTypeArray
  70. ggufTypeUint64
  71. ggufTypeInt64
  72. ggufTypeFloat64
  73. )
  74. type gguf struct {
  75. *containerGGUF
  76. kv KV
  77. tensors []*Tensor
  78. parameters uint64
  79. }
  80. func newGGUF(container *containerGGUF) *gguf {
  81. return &gguf{
  82. containerGGUF: container,
  83. kv: make(KV),
  84. }
  85. }
  86. func NewGGUFV3(bo binary.ByteOrder) *gguf {
  87. return newGGUF(&containerGGUF{ByteOrder: bo, Version: 3})
  88. }
  89. func (llm *gguf) KV() KV {
  90. return llm.kv
  91. }
  92. func (llm *gguf) Tensors() Tensors {
  93. return llm.tensors
  94. }
  95. func (llm *gguf) numTensor() uint64 {
  96. switch llm.Version {
  97. case 1:
  98. return uint64(llm.V1.NumTensor)
  99. case 2:
  100. return llm.V2.NumTensor
  101. default:
  102. return llm.V3.NumTensor
  103. }
  104. }
  105. func (llm *gguf) numKV() uint64 {
  106. switch llm.Version {
  107. case 1:
  108. return uint64(llm.V1.NumKV)
  109. case 2:
  110. return llm.V2.NumKV
  111. default:
  112. return llm.V3.NumKV
  113. }
  114. }
  115. func (llm *gguf) Decode(rs io.ReadSeeker) error {
  116. // decode key-values
  117. for i := 0; uint64(i) < llm.numKV(); i++ {
  118. k, err := readGGUFString(llm, rs)
  119. if err != nil {
  120. return err
  121. }
  122. t, err := readGGUF[uint32](llm, rs)
  123. if err != nil {
  124. return err
  125. }
  126. var v any
  127. switch t {
  128. case ggufTypeUint8:
  129. v, err = readGGUF[uint8](llm, rs)
  130. case ggufTypeInt8:
  131. v, err = readGGUF[int8](llm, rs)
  132. case ggufTypeUint16:
  133. v, err = readGGUF[uint16](llm, rs)
  134. case ggufTypeInt16:
  135. v, err = readGGUF[int16](llm, rs)
  136. case ggufTypeUint32:
  137. v, err = readGGUF[uint32](llm, rs)
  138. case ggufTypeInt32:
  139. v, err = readGGUF[int32](llm, rs)
  140. case ggufTypeUint64:
  141. v, err = readGGUF[uint64](llm, rs)
  142. case ggufTypeInt64:
  143. v, err = readGGUF[int64](llm, rs)
  144. case ggufTypeFloat32:
  145. v, err = readGGUF[float32](llm, rs)
  146. case ggufTypeFloat64:
  147. v, err = readGGUF[float64](llm, rs)
  148. case ggufTypeBool:
  149. v, err = readGGUF[bool](llm, rs)
  150. case ggufTypeString:
  151. v, err = readGGUFString(llm, rs)
  152. case ggufTypeArray:
  153. v, err = readGGUFArray(llm, rs)
  154. default:
  155. return fmt.Errorf("invalid type: %d", t)
  156. }
  157. if err != nil {
  158. return err
  159. }
  160. llm.kv[k] = v
  161. }
  162. // decode tensors
  163. for i := 0; uint64(i) < llm.numTensor(); i++ {
  164. name, err := readGGUFString(llm, rs)
  165. if err != nil {
  166. return err
  167. }
  168. // dims is the number of dimensions in the tensor
  169. dims, err := readGGUF[uint32](llm, rs)
  170. if err != nil {
  171. return err
  172. }
  173. shape := [4]uint64{1, 1, 1, 1}
  174. for i := 0; uint32(i) < dims; i++ {
  175. shape[i], err = readGGUF[uint64](llm, rs)
  176. if err != nil {
  177. return err
  178. }
  179. }
  180. kind, err := readGGUF[uint32](llm, rs)
  181. if err != nil {
  182. return err
  183. }
  184. offset, err := readGGUF[uint64](llm, rs)
  185. if err != nil {
  186. return err
  187. }
  188. tensor := Tensor{
  189. Name: name,
  190. Kind: kind,
  191. Offset: offset,
  192. Shape: shape[:],
  193. }
  194. llm.tensors = append(llm.tensors, &tensor)
  195. llm.parameters += tensor.parameters()
  196. }
  197. // patch KV with parameter count
  198. llm.kv["general.parameter_count"] = llm.parameters
  199. alignment, ok := llm.kv["general.alignment"].(uint32)
  200. if !ok {
  201. alignment = 32
  202. }
  203. offset, err := rs.Seek(0, io.SeekCurrent)
  204. if err != nil {
  205. return err
  206. }
  207. padding := llm.padding(offset, int64(alignment))
  208. if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
  209. return err
  210. }
  211. for _, tensor := range llm.tensors {
  212. padded := (int64(tensor.size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
  213. if _, err := rs.Seek(padded, io.SeekCurrent); err != nil {
  214. return err
  215. }
  216. }
  217. return nil
  218. }
  219. func readGGUF[T any](llm *gguf, r io.Reader) (T, error) {
  220. var t T
  221. err := binary.Read(r, llm.ByteOrder, &t)
  222. return t, err
  223. }
  224. func writeGGUF[V any](llm *gguf, w io.Writer, t uint32, v V) error {
  225. if err := binary.Write(w, llm.ByteOrder, t); err != nil {
  226. return err
  227. }
  228. return binary.Write(w, llm.ByteOrder, v)
  229. }
  230. func readGGUFV1String(llm *gguf, r io.Reader) (string, error) {
  231. var length uint64
  232. if err := binary.Read(r, llm.ByteOrder, &length); err != nil {
  233. return "", err
  234. }
  235. var b bytes.Buffer
  236. if _, err := io.CopyN(&b, r, int64(length)); err != nil {
  237. return "", err
  238. }
  239. // gguf v1 strings are null-terminated
  240. b.Truncate(b.Len() - 1)
  241. return b.String(), nil
  242. }
  243. func readGGUFString(llm *gguf, r io.Reader) (string, error) {
  244. if llm.Version == 1 {
  245. return readGGUFV1String(llm, r)
  246. }
  247. var length uint64
  248. if err := binary.Read(r, llm.ByteOrder, &length); err != nil {
  249. return "", err
  250. }
  251. var b bytes.Buffer
  252. if _, err := io.CopyN(&b, r, int64(length)); err != nil {
  253. return "", err
  254. }
  255. return b.String(), nil
  256. }
  257. func writeGGUFString(llm *gguf, w io.Writer, s string) error {
  258. if err := binary.Write(w, llm.ByteOrder, ggufTypeString); err != nil {
  259. return err
  260. }
  261. if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
  262. return err
  263. }
  264. _, err := io.Copy(w, strings.NewReader(s))
  265. return err
  266. }
  267. func readGGUFV1Array(llm *gguf, r io.Reader) (a []any, err error) {
  268. t, err := readGGUF[uint32](llm, r)
  269. if err != nil {
  270. return nil, err
  271. }
  272. n, err := readGGUF[uint32](llm, r)
  273. if err != nil {
  274. return nil, err
  275. }
  276. for i := 0; uint32(i) < n; i++ {
  277. var e any
  278. switch t {
  279. case ggufTypeUint8:
  280. e, err = readGGUF[uint8](llm, r)
  281. case ggufTypeInt8:
  282. e, err = readGGUF[int8](llm, r)
  283. case ggufTypeUint16:
  284. e, err = readGGUF[uint16](llm, r)
  285. case ggufTypeInt16:
  286. e, err = readGGUF[int16](llm, r)
  287. case ggufTypeUint32:
  288. e, err = readGGUF[uint32](llm, r)
  289. case ggufTypeInt32:
  290. e, err = readGGUF[int32](llm, r)
  291. case ggufTypeUint64:
  292. e, err = readGGUF[uint64](llm, r)
  293. case ggufTypeInt64:
  294. e, err = readGGUF[int64](llm, r)
  295. case ggufTypeFloat32:
  296. e, err = readGGUF[float32](llm, r)
  297. case ggufTypeFloat64:
  298. e, err = readGGUF[float64](llm, r)
  299. case ggufTypeBool:
  300. e, err = readGGUF[bool](llm, r)
  301. case ggufTypeString:
  302. e, err = readGGUFV1String(llm, r)
  303. default:
  304. return nil, fmt.Errorf("invalid array type: %d", t)
  305. }
  306. if err != nil {
  307. return nil, err
  308. }
  309. a = append(a, e)
  310. }
  311. return
  312. }
  313. func readGGUFArray(llm *gguf, r io.Reader) (a []any, err error) {
  314. if llm.Version == 1 {
  315. return readGGUFV1Array(llm, r)
  316. }
  317. t, err := readGGUF[uint32](llm, r)
  318. if err != nil {
  319. return nil, err
  320. }
  321. n, err := readGGUF[uint64](llm, r)
  322. if err != nil {
  323. return nil, err
  324. }
  325. for i := 0; uint64(i) < n; i++ {
  326. var e any
  327. switch t {
  328. case ggufTypeUint8:
  329. e, err = readGGUF[uint8](llm, r)
  330. case ggufTypeInt8:
  331. e, err = readGGUF[int8](llm, r)
  332. case ggufTypeUint16:
  333. e, err = readGGUF[uint16](llm, r)
  334. case ggufTypeInt16:
  335. e, err = readGGUF[int16](llm, r)
  336. case ggufTypeUint32:
  337. e, err = readGGUF[uint32](llm, r)
  338. case ggufTypeInt32:
  339. e, err = readGGUF[int32](llm, r)
  340. case ggufTypeUint64:
  341. e, err = readGGUF[uint64](llm, r)
  342. case ggufTypeInt64:
  343. e, err = readGGUF[int64](llm, r)
  344. case ggufTypeFloat32:
  345. e, err = readGGUF[float32](llm, r)
  346. case ggufTypeFloat64:
  347. e, err = readGGUF[float64](llm, r)
  348. case ggufTypeBool:
  349. e, err = readGGUF[bool](llm, r)
  350. case ggufTypeString:
  351. e, err = readGGUFString(llm, r)
  352. default:
  353. return nil, fmt.Errorf("invalid array type: %d", t)
  354. }
  355. if err != nil {
  356. return nil, err
  357. }
  358. a = append(a, e)
  359. }
  360. return
  361. }
  362. func writeGGUFArray[S ~[]E, E any](llm *gguf, w io.Writer, t uint32, s S) error {
  363. if err := binary.Write(w, llm.ByteOrder, ggufTypeArray); err != nil {
  364. return err
  365. }
  366. if err := binary.Write(w, llm.ByteOrder, t); err != nil {
  367. return err
  368. }
  369. if err := binary.Write(w, llm.ByteOrder, uint64(len(s))); err != nil {
  370. return err
  371. }
  372. for _, e := range s {
  373. if err := binary.Write(w, llm.ByteOrder, e); err != nil {
  374. return err
  375. }
  376. }
  377. return nil
  378. }
  379. var ggufKVOrder = map[string][]string{
  380. "llama": {
  381. "general.architecture",
  382. "general.name",
  383. "llama.context_length",
  384. "llama.embedding_length",
  385. "llama.block_count",
  386. "llama.feed_forward_length",
  387. "llama.rope.dimension_count",
  388. "llama.attention.head_count",
  389. "llama.attention.head_count_kv",
  390. "llama.attention.layer_norm_rms_epsilon",
  391. "llama.rope.freq_base",
  392. "gemma.context_length",
  393. "gemma.embedding_length",
  394. "gemma.block_count",
  395. "gemma.feed_forward_length",
  396. "gemma.attention.head_count",
  397. "gemma.attention.head_count_kv",
  398. "gemma.attention.layer_norm_rms_epsilon",
  399. "gemma.attention.key_length",
  400. "gemma.attention.value_length",
  401. "general.file_type",
  402. "tokenizer.ggml.model",
  403. "tokenizer.ggml.tokens",
  404. "tokenizer.ggml.scores",
  405. "tokenizer.ggml.token_type",
  406. "tokenizer.ggml.bos_token_id",
  407. "tokenizer.ggml.eos_token_id",
  408. "tokenizer.ggml.unknown_token_id",
  409. "tokenizer.ggml.padding_token_id",
  410. "tokenizer.ggml.add_bos_token",
  411. "tokenizer.ggml.add_eos_token",
  412. "tokenizer.chat_template",
  413. },
  414. }
  415. func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
  416. switch llm.Version {
  417. case 3:
  418. llm.V3.NumTensor = uint64(len(tensors))
  419. llm.V3.NumKV = uint64(len(kv))
  420. default:
  421. return fmt.Errorf("not implemented: ggufv%d", llm.Version)
  422. }
  423. if err := binary.Write(ws, llm.ByteOrder, []byte("GGUF")); err != nil {
  424. return err
  425. }
  426. if err := binary.Write(ws, llm.ByteOrder, llm.Version); err != nil {
  427. return err
  428. }
  429. if err := binary.Write(ws, llm.ByteOrder, llm.numTensor()); err != nil {
  430. return err
  431. }
  432. if err := binary.Write(ws, llm.ByteOrder, llm.numKV()); err != nil {
  433. return err
  434. }
  435. for _, k := range ggufKVOrder["llama"] {
  436. v, ok := kv[k]
  437. if !ok {
  438. continue
  439. }
  440. if err := binary.Write(ws, llm.ByteOrder, uint64(len(k))); err != nil {
  441. return err
  442. }
  443. if err := binary.Write(ws, llm.ByteOrder, []byte(k)); err != nil {
  444. return err
  445. }
  446. var err error
  447. switch v := v.(type) {
  448. case uint32:
  449. err = writeGGUF(llm, ws, ggufTypeUint32, v)
  450. case float32:
  451. err = writeGGUF(llm, ws, ggufTypeFloat32, v)
  452. case bool:
  453. err = writeGGUF(llm, ws, ggufTypeBool, v)
  454. case string:
  455. err = writeGGUFString(llm, ws, v)
  456. case []int32:
  457. err = writeGGUFArray(llm, ws, ggufTypeInt32, v)
  458. case []uint32:
  459. err = writeGGUFArray(llm, ws, ggufTypeUint32, v)
  460. case []float32:
  461. err = writeGGUFArray(llm, ws, ggufTypeFloat32, v)
  462. case []string:
  463. if err := binary.Write(ws, llm.ByteOrder, ggufTypeArray); err != nil {
  464. return err
  465. }
  466. if err := binary.Write(ws, llm.ByteOrder, ggufTypeString); err != nil {
  467. return err
  468. }
  469. if err := binary.Write(ws, llm.ByteOrder, uint64(len(v))); err != nil {
  470. return err
  471. }
  472. for _, e := range v {
  473. if err := binary.Write(ws, llm.ByteOrder, uint64(len(e))); err != nil {
  474. return err
  475. }
  476. if err := binary.Write(ws, llm.ByteOrder, []byte(e)); err != nil {
  477. return err
  478. }
  479. }
  480. }
  481. if err != nil {
  482. return err
  483. }
  484. }
  485. for _, tensor := range tensors {
  486. if err := binary.Write(ws, llm.ByteOrder, uint64(len(tensor.Name))); err != nil {
  487. return err
  488. }
  489. if err := binary.Write(ws, llm.ByteOrder, []byte(tensor.Name)); err != nil {
  490. return err
  491. }
  492. dims := 1
  493. if tensor.Shape[1] > 0 {
  494. dims = 2
  495. }
  496. if err := binary.Write(ws, llm.ByteOrder, uint32(dims)); err != nil {
  497. return err
  498. }
  499. for i := 0; i < dims; i++ {
  500. if err := binary.Write(ws, llm.ByteOrder, uint64(tensor.Shape[dims-1-i])); err != nil {
  501. return err
  502. }
  503. }
  504. if err := binary.Write(ws, llm.ByteOrder, tensor.Kind); err != nil {
  505. return err
  506. }
  507. if err := binary.Write(ws, llm.ByteOrder, tensor.Offset); err != nil {
  508. return err
  509. }
  510. }
  511. offset, err := ws.Seek(0, io.SeekCurrent)
  512. if err != nil {
  513. return err
  514. }
  515. padding := llm.padding(offset, 32)
  516. if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding-offset))); err != nil {
  517. return err
  518. }
  519. for _, tensor := range tensors {
  520. if _, err := tensor.WriteTo(ws); err != nil {
  521. return err
  522. }
  523. offset, err := ws.Seek(0, io.SeekCurrent)
  524. if err != nil {
  525. return err
  526. }
  527. padding := llm.padding(offset, 32)
  528. if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding-offset))); err != nil {
  529. return err
  530. }
  531. }
  532. return nil
  533. }
  534. func (gguf) padding(offset, align int64) int64 {
  535. return (offset + align - 1) / align * align
  536. }