gguf.go 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975
  1. package llm
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "log/slog"
  9. "os"
  10. "regexp"
  11. "github.com/d4l3k/go-bfloat16"
  12. "github.com/pdevine/tensor"
  13. "github.com/pdevine/tensor/native"
  14. "github.com/x448/float16"
  15. "github.com/ollama/ollama/format"
  16. )
  17. type ContainerGGUF struct {
  18. ByteOrder binary.ByteOrder
  19. Version uint32
  20. V1 struct {
  21. NumTensor uint32
  22. NumKV uint32
  23. }
  24. V2 struct {
  25. NumTensor uint64
  26. NumKV uint64
  27. }
  28. V3 struct {
  29. NumTensor uint64
  30. NumKV uint64
  31. }
  32. }
  33. func (c *ContainerGGUF) Name() string {
  34. return "gguf"
  35. }
  36. func (c *ContainerGGUF) Decode(rs io.ReadSeeker) (model, error) {
  37. binary.Read(rs, c.ByteOrder, &c.Version)
  38. switch c.Version {
  39. case 1:
  40. binary.Read(rs, c.ByteOrder, &c.V1)
  41. default:
  42. binary.Read(rs, c.ByteOrder, &c.V2)
  43. }
  44. model := NewGGUFModel(c)
  45. if err := model.Decode(rs); err != nil {
  46. return nil, err
  47. }
  48. return model, nil
  49. }
  50. const (
  51. _ uint32 = iota
  52. GGUFTokenNormal
  53. GGUFTokenUnknown
  54. GGUFTokenControl
  55. GGUFTokenUserDefined
  56. GGUFTokenUnused
  57. GGUFTokenByte
  58. )
  59. const (
  60. GGUFTypeUint8 uint32 = iota
  61. GGUFTypeInt8
  62. GGUFTypeUint16
  63. GGUFTypeInt16
  64. GGUFTypeUint32
  65. GGUFTypeInt32
  66. GGUFTypeFloat32
  67. GGUFTypeBool
  68. GGUFTypeString
  69. GGUFTypeArray
  70. GGUFTypeUint64
  71. GGUFTypeInt64
  72. GGUFTypeFloat64
  73. )
  74. type KV map[string]any
  75. type Tensor struct {
  76. Name string
  77. Kind uint32
  78. Offset uint64
  79. // shape is the number of elements in each dimension
  80. Shape []uint64
  81. FileName string
  82. OffsetPadding uint64
  83. FileOffsets []uint64
  84. }
  85. func (t Tensor) BlockSize() uint64 {
  86. switch {
  87. case t.Kind < 2:
  88. return 1
  89. case t.Kind < 10:
  90. return 32
  91. default:
  92. return 256
  93. }
  94. }
  95. func (t Tensor) TypeSize() uint64 {
  96. blockSize := t.BlockSize()
  97. switch t.Kind {
  98. case 0: // FP32
  99. return 4
  100. case 1: // FP16
  101. return 2
  102. case 2: // Q4_0
  103. return 2 + blockSize/2
  104. case 3: // Q4_1
  105. return 2 + 2 + blockSize/2
  106. case 6: // Q5_0
  107. return 2 + 4 + blockSize/2
  108. case 7: // Q5_1
  109. return 2 + 2 + 4 + blockSize/2
  110. case 8: // Q8_0
  111. return 2 + blockSize
  112. case 9: // Q8_1
  113. return 4 + 4 + blockSize
  114. case 10: // Q2_K
  115. return blockSize/16 + blockSize/4 + 2 + 2
  116. case 11: // Q3_K
  117. return blockSize/8 + blockSize/4 + 12 + 2
  118. case 12: // Q4_K
  119. return 2 + 2 + 12 + blockSize/2
  120. case 13: // Q5_K
  121. return 2 + 2 + 12 + blockSize/8 + blockSize/2
  122. case 14: // Q6_K
  123. return blockSize/2 + blockSize/4 + blockSize/16 + 2
  124. case 15: // Q8_K
  125. return 2 + blockSize + 2*blockSize/16
  126. case 16: // IQ2_XXS
  127. return 2 + 2*blockSize/8
  128. case 17: // IQ2_XS
  129. return 2 + 2*blockSize/8 + blockSize/32
  130. case 18: // IQ3_XXS
  131. return 2 + 3*blockSize/8
  132. default:
  133. return 0
  134. }
  135. }
  136. func (t Tensor) Parameters() uint64 {
  137. var count uint64 = 1
  138. for _, n := range t.Shape {
  139. count *= n
  140. }
  141. return count
  142. }
  143. func (t Tensor) Size() uint64 {
  144. return t.Parameters() * t.TypeSize() / t.BlockSize()
  145. }
  146. func (t Tensor) Repack(data []uint16, heads int) ([]uint16, error) {
  147. n := tensor.New(tensor.WithShape(int(t.Shape[0]), int(t.Shape[1])), tensor.WithBacking(data))
  148. origShape := n.Shape().Clone()
  149. // reshape the tensor and swap axes 1 and 2 to unpack the layer for gguf
  150. if err := n.Reshape(heads, 2, origShape[0]/heads/2, origShape[1]); err != nil {
  151. return []uint16{}, err
  152. }
  153. if err := n.T(0, 2, 1, 3); err != nil {
  154. return []uint16{}, err
  155. }
  156. if err := n.Reshape(origShape...); err != nil {
  157. return []uint16{}, err
  158. }
  159. if err := n.Transpose(); err != nil {
  160. return []uint16{}, err
  161. }
  162. newN, err := native.SelectU16(n, 1)
  163. if err != nil {
  164. return []uint16{}, err
  165. }
  166. var fullTensor []uint16
  167. for _, v := range newN {
  168. fullTensor = append(fullTensor, v...)
  169. }
  170. return fullTensor, nil
  171. }
  172. type GGUFModel struct {
  173. *ContainerGGUF
  174. KV
  175. Tensors []Tensor
  176. parameters uint64
  177. }
  178. func NewGGUFModel(container *ContainerGGUF) *GGUFModel {
  179. return &GGUFModel{
  180. ContainerGGUF: container,
  181. KV: make(KV),
  182. }
  183. }
  184. func (llm *GGUFModel) NumTensor() uint64 {
  185. if llm.Version == 1 {
  186. return uint64(llm.V1.NumTensor)
  187. }
  188. return llm.V2.NumTensor
  189. }
  190. func (llm *GGUFModel) NumKV() uint64 {
  191. if llm.Version == 1 {
  192. return uint64(llm.V1.NumKV)
  193. }
  194. return llm.V2.NumKV
  195. }
  196. func (llm *GGUFModel) ModelFamily() string {
  197. if t, ok := llm.KV["general.architecture"].(string); ok {
  198. return t
  199. }
  200. return "unknown"
  201. }
  202. func (llm *GGUFModel) ModelType() string {
  203. if llm.parameters > 0 {
  204. return format.HumanNumber(llm.parameters)
  205. }
  206. return "unknown"
  207. }
  208. func (llm *GGUFModel) FileType() string {
  209. if t, ok := llm.KV["general.file_type"].(uint32); ok {
  210. return fileType(t)
  211. }
  212. return "unknown"
  213. }
  214. func (llm *GGUFModel) Encode(f *os.File) error {
  215. // this mimics the order of the llama.cpp convert script
  216. kOrder := []string{
  217. "general.architecture",
  218. "general.name",
  219. "llama.context_length",
  220. "llama.embedding_length",
  221. "llama.block_count",
  222. "llama.feed_forward_length",
  223. "llama.rope.dimension_count",
  224. "llama.attention.head_count",
  225. "llama.attention.head_count_kv",
  226. "llama.attention.layer_norm_rms_epsilon",
  227. "llama.rope.freq_base",
  228. "general.file_type",
  229. "tokenizer.ggml.model",
  230. "tokenizer.ggml.tokens",
  231. "tokenizer.ggml.scores",
  232. "tokenizer.ggml.token_type",
  233. "tokenizer.ggml.bos_token_id",
  234. "tokenizer.ggml.eos_token_id",
  235. "tokenizer.ggml.unknown_token_id",
  236. "tokenizer.ggml.add_bos_token",
  237. "tokenizer.ggml.add_eos_token",
  238. "tokenizer.chat_template",
  239. }
  240. if err := binary.Write(f, llm.ByteOrder, []byte("GGUF")); err != nil {
  241. return err
  242. }
  243. if err := binary.Write(f, llm.ByteOrder, uint32(3)); err != nil {
  244. return err
  245. }
  246. if err := binary.Write(f, llm.ByteOrder, uint64(llm.V3.NumTensor)); err != nil {
  247. return err
  248. }
  249. if err := binary.Write(f, llm.ByteOrder, uint64(llm.V3.NumKV)); err != nil {
  250. return err
  251. }
  252. for _, k := range kOrder {
  253. val, ok := llm.KV[k]
  254. if !ok {
  255. continue
  256. }
  257. if err := binary.Write(f, llm.ByteOrder, uint64(len(k))); err != nil {
  258. return err
  259. }
  260. if err := binary.Write(f, llm.ByteOrder, []byte(k)); err != nil {
  261. return err
  262. }
  263. switch v := val.(type) {
  264. case uint32:
  265. if err := binary.Write(f, llm.ByteOrder, GGUFTypeUint32); err != nil {
  266. return err
  267. }
  268. if err := llm.writeUint32(f, v); err != nil {
  269. return err
  270. }
  271. case float32:
  272. if err := binary.Write(f, llm.ByteOrder, GGUFTypeFloat32); err != nil {
  273. return err
  274. }
  275. if err := llm.writeF32(f, v); err != nil {
  276. return err
  277. }
  278. case bool:
  279. if err := binary.Write(f, llm.ByteOrder, GGUFTypeBool); err != nil {
  280. return err
  281. }
  282. if err := llm.writeBool(f, v); err != nil {
  283. return err
  284. }
  285. case string:
  286. if err := binary.Write(f, llm.ByteOrder, GGUFTypeString); err != nil {
  287. return err
  288. }
  289. if err := llm.writeString(f, v); err != nil {
  290. return err
  291. }
  292. case []int32:
  293. if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
  294. return err
  295. }
  296. if err := binary.Write(f, llm.ByteOrder, GGUFTypeInt32); err != nil {
  297. return err
  298. }
  299. if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
  300. return err
  301. }
  302. for _, i := range v {
  303. if err := llm.writeInt32(f, i); err != nil {
  304. return err
  305. }
  306. }
  307. case []uint32:
  308. if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
  309. return err
  310. }
  311. if err := binary.Write(f, llm.ByteOrder, GGUFTypeUint32); err != nil {
  312. return err
  313. }
  314. if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
  315. return err
  316. }
  317. for _, i := range v {
  318. if err := llm.writeUint32(f, i); err != nil {
  319. return err
  320. }
  321. }
  322. case []float32:
  323. if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
  324. return err
  325. }
  326. if err := binary.Write(f, llm.ByteOrder, GGUFTypeFloat32); err != nil {
  327. return err
  328. }
  329. if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
  330. return err
  331. }
  332. for _, fl := range v {
  333. if err := llm.writeF32(f, fl); err != nil {
  334. return err
  335. }
  336. }
  337. case []string:
  338. if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
  339. return err
  340. }
  341. if err := binary.Write(f, llm.ByteOrder, GGUFTypeString); err != nil {
  342. return err
  343. }
  344. if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
  345. return err
  346. }
  347. for _, s := range v {
  348. if err := llm.writeString(f, s); err != nil {
  349. return err
  350. }
  351. }
  352. }
  353. }
  354. // write layer metadata
  355. for _, t := range llm.Tensors {
  356. if err := llm.writeString(f, t.Name); err != nil {
  357. return err
  358. }
  359. // the dimensions of the tensor
  360. dims := 1
  361. if t.Shape[1] > 0 {
  362. dims = 2
  363. }
  364. if err := binary.Write(f, llm.ByteOrder, uint32(dims)); err != nil {
  365. return err
  366. }
  367. for i := 0; i < dims; i++ {
  368. if err := binary.Write(f, llm.ByteOrder, uint64(t.Shape[dims-1-i])); err != nil {
  369. return err
  370. }
  371. }
  372. if err := binary.Write(f, llm.ByteOrder, uint32(t.Kind)); err != nil {
  373. return err
  374. }
  375. if err := binary.Write(f, llm.ByteOrder, uint64(t.Offset)); err != nil {
  376. return err
  377. }
  378. }
  379. offset, terr := f.Seek(0, io.SeekCurrent)
  380. if terr != nil {
  381. return terr
  382. }
  383. slog.Debug(fmt.Sprintf("tensors offset = %x", offset))
  384. if err := llm.writePadding(f, 32); err != nil {
  385. return err
  386. }
  387. var dataFile *os.File
  388. var currentFile string
  389. var err error
  390. for _, t := range llm.Tensors {
  391. if currentFile != t.FileName {
  392. if f != nil {
  393. dataFile.Close()
  394. }
  395. currentFile = t.FileName
  396. dataFile, err = os.Open(t.FileName)
  397. if err != nil {
  398. fmt.Println(err)
  399. return err
  400. }
  401. }
  402. dataFile.Seek(int64(t.OffsetPadding+t.FileOffsets[0]), 0)
  403. pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
  404. re, err := regexp.Compile(pattern)
  405. if err != nil {
  406. return err
  407. }
  408. matches := re.FindAllStringSubmatch(t.Name, -1)
  409. if len(matches) > 0 {
  410. layerSize := t.FileOffsets[1] - t.FileOffsets[0]
  411. var err error
  412. tData := make([]uint16, layerSize/2)
  413. if err = binary.Read(dataFile, llm.ByteOrder, tData); err != nil {
  414. return err
  415. }
  416. layerType := matches[0][re.SubexpIndex("layer")]
  417. var heads uint32
  418. switch layerType {
  419. case "q":
  420. heads = llm.KV["llama.attention.head_count"].(uint32)
  421. case "k":
  422. heads = llm.KV["llama.attention.head_count_kv"].(uint32)
  423. if heads == 0 {
  424. heads = llm.KV["llama.attention.head_count"].(uint32)
  425. }
  426. }
  427. tData, err = t.Repack(tData, int(heads))
  428. if err != nil {
  429. return err
  430. }
  431. var buf []byte
  432. for _, n := range tData {
  433. buf = binary.LittleEndian.AppendUint16(buf, n)
  434. }
  435. tempBuf := make([]uint16, len(tData))
  436. tDataF32 := bfloat16.DecodeFloat32(buf)
  437. for cnt, v := range tDataF32 {
  438. tDataF16 := float16.Fromfloat32(v)
  439. tempBuf[cnt] = uint16(tDataF16)
  440. }
  441. if err = binary.Write(f, llm.ByteOrder, tempBuf); err != nil {
  442. return err
  443. }
  444. if err := llm.writePadding(f, 32); err != nil {
  445. return err
  446. }
  447. continue
  448. }
  449. remaining := t.FileOffsets[1] - t.FileOffsets[0]
  450. bufSize := uint64(10240)
  451. var finished bool
  452. for {
  453. data := make([]byte, min(bufSize, remaining))
  454. b, err := io.ReadFull(dataFile, data)
  455. remaining -= uint64(b)
  456. if errors.Is(err, io.EOF) || remaining <= 0 {
  457. finished = true
  458. } else if err != nil {
  459. return err
  460. }
  461. // convert bfloat16 -> ieee float32
  462. tDataF32 := bfloat16.DecodeFloat32(data)
  463. switch t.Kind {
  464. case 0:
  465. if err := binary.Write(f, llm.ByteOrder, tDataF32); err != nil {
  466. return err
  467. }
  468. case 1:
  469. // convert float32 -> float16
  470. tempBuf := make([]uint16, len(data)/2)
  471. for cnt, v := range tDataF32 {
  472. tDataF16 := float16.Fromfloat32(v)
  473. tempBuf[cnt] = uint16(tDataF16)
  474. }
  475. if err := binary.Write(f, llm.ByteOrder, tempBuf); err != nil {
  476. return err
  477. }
  478. }
  479. if finished {
  480. break
  481. }
  482. }
  483. if err := llm.writePadding(f, 32); err != nil {
  484. return err
  485. }
  486. }
  487. f.Close()
  488. return nil
  489. }
  490. func (llm *GGUFModel) writePadding(f *os.File, align int64) error {
  491. // gguf file padding is defined in https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#file-structure
  492. offset, err := f.Seek(0, io.SeekCurrent)
  493. if err != nil {
  494. return err
  495. }
  496. padding := ((offset + align - 1) / align) * align
  497. buf := make([]byte, padding-offset)
  498. if err := binary.Write(f, llm.ByteOrder, buf); err != nil {
  499. return err
  500. }
  501. return nil
  502. }
  503. func (llm *GGUFModel) writeInt32(f *os.File, v int32) error {
  504. if err := binary.Write(f, llm.ByteOrder, v); err != nil {
  505. return err
  506. }
  507. return nil
  508. }
  509. func (llm *GGUFModel) writeUint32(f *os.File, v uint32) error {
  510. if err := binary.Write(f, llm.ByteOrder, v); err != nil {
  511. return err
  512. }
  513. return nil
  514. }
  515. func (llm *GGUFModel) writeF32(f *os.File, v float32) error {
  516. if err := binary.Write(f, llm.ByteOrder, v); err != nil {
  517. return err
  518. }
  519. return nil
  520. }
  521. func (llm *GGUFModel) writeBool(f *os.File, b bool) error {
  522. if err := binary.Write(f, llm.ByteOrder, b); err != nil {
  523. return err
  524. }
  525. return nil
  526. }
  527. func (llm *GGUFModel) writeString(f *os.File, s string) error {
  528. if err := binary.Write(f, llm.ByteOrder, uint64(len(s))); err != nil {
  529. return err
  530. }
  531. if err := binary.Write(f, llm.ByteOrder, []byte(s)); err != nil {
  532. return err
  533. }
  534. return nil
  535. }
  536. func (llm *GGUFModel) Decode(rs io.ReadSeeker) error {
  537. // decode key-values
  538. for i := 0; uint64(i) < llm.NumKV(); i++ {
  539. k, err := llm.readString(rs)
  540. if err != nil {
  541. return err
  542. }
  543. vtype := llm.readU32(rs)
  544. var v any
  545. switch vtype {
  546. case GGUFTypeUint8:
  547. v = llm.readU8(rs)
  548. case GGUFTypeInt8:
  549. v = llm.readI8(rs)
  550. case GGUFTypeUint16:
  551. v = llm.readU16(rs)
  552. case GGUFTypeInt16:
  553. v = llm.readI16(rs)
  554. case GGUFTypeUint32:
  555. v = llm.readU32(rs)
  556. case GGUFTypeInt32:
  557. v = llm.readI32(rs)
  558. case GGUFTypeUint64:
  559. v = llm.readU64(rs)
  560. case GGUFTypeInt64:
  561. v = llm.readI64(rs)
  562. case GGUFTypeFloat32:
  563. v = llm.readF32(rs)
  564. case GGUFTypeFloat64:
  565. v = llm.readF64(rs)
  566. case GGUFTypeBool:
  567. v = llm.readBool(rs)
  568. case GGUFTypeString:
  569. s, err := llm.readString(rs)
  570. if err != nil {
  571. return err
  572. }
  573. v = s
  574. case GGUFTypeArray:
  575. a, err := llm.readArray(rs)
  576. if err != nil {
  577. return err
  578. }
  579. v = a
  580. default:
  581. return fmt.Errorf("invalid type: %d", vtype)
  582. }
  583. llm.KV[k] = v
  584. }
  585. // decode tensors
  586. for i := 0; uint64(i) < llm.NumTensor(); i++ {
  587. name, err := llm.readString(rs)
  588. if err != nil {
  589. return err
  590. }
  591. // dims is the number of dimensions in the tensor
  592. dims := llm.readU32(rs)
  593. shape := [4]uint64{1, 1, 1, 1}
  594. for i := 0; uint32(i) < dims; i++ {
  595. shape[i] = llm.readU64(rs)
  596. }
  597. tensor := Tensor{
  598. Name: name,
  599. Kind: llm.readU32(rs),
  600. Offset: llm.readU64(rs),
  601. Shape: shape[:],
  602. }
  603. llm.Tensors = append(llm.Tensors, tensor)
  604. llm.parameters += tensor.Parameters()
  605. }
  606. alignment, ok := llm.KV["general.alignment"].(uint32)
  607. if !ok {
  608. alignment = 32
  609. }
  610. offset, err := rs.Seek(0, io.SeekCurrent)
  611. if err != nil {
  612. return err
  613. }
  614. if _, err := rs.Seek(int64(alignment)-offset%int64(alignment), io.SeekCurrent); err != nil {
  615. return err
  616. }
  617. for _, tensor := range llm.Tensors {
  618. padded := (int64(tensor.Size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
  619. if _, err := rs.Seek(padded, io.SeekCurrent); err != nil {
  620. return err
  621. }
  622. }
  623. return nil
  624. }
  625. func (llm *GGUFModel) NumLayers() uint32 {
  626. value, exists := llm.KV[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
  627. if !exists {
  628. return 0
  629. }
  630. return value.(uint32)
  631. }
  632. func (llm *GGUFModel) NumHead() uint32 {
  633. value, exists := llm.KV[fmt.Sprintf("%s.attention.head_count", llm.ModelFamily())]
  634. if !exists {
  635. return 0
  636. }
  637. return value.(uint32)
  638. }
  639. func (llm *GGUFModel) NumEmbed() uint32 {
  640. value, exists := llm.KV[fmt.Sprintf("%s.embedding_length", llm.ModelFamily())]
  641. if !exists {
  642. return 0
  643. }
  644. return value.(uint32)
  645. }
  646. func (llm *GGUFModel) NumHeadKv() uint32 {
  647. value, exists := llm.KV[fmt.Sprintf("%s.attention.head_count_kv", llm.ModelFamily())]
  648. if !exists {
  649. return 0
  650. }
  651. return value.(uint32)
  652. }
  653. func (llm *GGUFModel) NumCtx() uint32 {
  654. value, exists := llm.KV[fmt.Sprintf("%s.context_length", llm.ModelFamily())]
  655. if !exists {
  656. return 0
  657. }
  658. return value.(uint32)
  659. }
  660. func (llm *GGUFModel) NumGQA() uint32 {
  661. numHeadKv := llm.NumHeadKv()
  662. if numHeadKv == 0 {
  663. return 0
  664. }
  665. return llm.NumHead() / numHeadKv
  666. }
  667. func (llm GGUFModel) readU8(r io.Reader) uint8 {
  668. var u8 uint8
  669. binary.Read(r, llm.ByteOrder, &u8)
  670. return u8
  671. }
  672. func (llm GGUFModel) readI8(r io.Reader) int8 {
  673. var i8 int8
  674. binary.Read(r, llm.ByteOrder, &i8)
  675. return i8
  676. }
  677. func (llm GGUFModel) readU16(r io.Reader) uint16 {
  678. var u16 uint16
  679. binary.Read(r, llm.ByteOrder, &u16)
  680. return u16
  681. }
  682. func (llm GGUFModel) readI16(r io.Reader) int16 {
  683. var i16 int16
  684. binary.Read(r, llm.ByteOrder, &i16)
  685. return i16
  686. }
  687. func (llm GGUFModel) readU32(r io.Reader) uint32 {
  688. var u32 uint32
  689. binary.Read(r, llm.ByteOrder, &u32)
  690. return u32
  691. }
  692. func (llm GGUFModel) readI32(r io.Reader) int32 {
  693. var i32 int32
  694. binary.Read(r, llm.ByteOrder, &i32)
  695. return i32
  696. }
  697. func (llm GGUFModel) readU64(r io.Reader) uint64 {
  698. var u64 uint64
  699. binary.Read(r, llm.ByteOrder, &u64)
  700. return u64
  701. }
  702. func (llm GGUFModel) readI64(r io.Reader) int64 {
  703. var i64 int64
  704. binary.Read(r, llm.ByteOrder, &i64)
  705. return i64
  706. }
  707. func (llm GGUFModel) readF32(r io.Reader) float32 {
  708. var f32 float32
  709. binary.Read(r, llm.ByteOrder, &f32)
  710. return f32
  711. }
  712. func (llm GGUFModel) readF64(r io.Reader) float64 {
  713. var f64 float64
  714. binary.Read(r, llm.ByteOrder, &f64)
  715. return f64
  716. }
  717. func (llm GGUFModel) readBool(r io.Reader) bool {
  718. var b bool
  719. binary.Read(r, llm.ByteOrder, &b)
  720. return b
  721. }
  722. func (llm GGUFModel) readStringV1(r io.Reader) (string, error) {
  723. var nameLength uint32
  724. binary.Read(r, llm.ByteOrder, &nameLength)
  725. var b bytes.Buffer
  726. if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
  727. return "", err
  728. }
  729. // gguf v1 strings are null-terminated
  730. b.Truncate(b.Len() - 1)
  731. return b.String(), nil
  732. }
  733. func (llm GGUFModel) readString(r io.Reader) (string, error) {
  734. if llm.Version == 1 {
  735. return llm.readStringV1(r)
  736. }
  737. var nameLength uint64
  738. binary.Read(r, llm.ByteOrder, &nameLength)
  739. var b bytes.Buffer
  740. if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
  741. return "", err
  742. }
  743. return b.String(), nil
  744. }
  745. func (llm *GGUFModel) readArrayV1(r io.Reader) (arr []any, err error) {
  746. atype := llm.readU32(r)
  747. n := llm.readU32(r)
  748. for i := 0; uint32(i) < n; i++ {
  749. switch atype {
  750. case GGUFTypeUint8:
  751. arr = append(arr, llm.readU8(r))
  752. case GGUFTypeInt8:
  753. arr = append(arr, llm.readI8(r))
  754. case GGUFTypeUint16:
  755. arr = append(arr, llm.readU16(r))
  756. case GGUFTypeInt16:
  757. arr = append(arr, llm.readI16(r))
  758. case GGUFTypeUint32:
  759. arr = append(arr, llm.readU32(r))
  760. case GGUFTypeInt32:
  761. arr = append(arr, llm.readI32(r))
  762. case GGUFTypeFloat32:
  763. arr = append(arr, llm.readF32(r))
  764. case GGUFTypeBool:
  765. arr = append(arr, llm.readBool(r))
  766. case GGUFTypeString:
  767. s, err := llm.readStringV1(r)
  768. if err != nil {
  769. return nil, err
  770. }
  771. arr = append(arr, s)
  772. default:
  773. return nil, fmt.Errorf("invalid array type: %d", atype)
  774. }
  775. }
  776. return
  777. }
  778. func (llm *GGUFModel) readArray(r io.Reader) (arr []any, err error) {
  779. if llm.Version == 1 {
  780. return llm.readArrayV1(r)
  781. }
  782. atype := llm.readU32(r)
  783. n := llm.readU64(r)
  784. for i := 0; uint64(i) < n; i++ {
  785. switch atype {
  786. case GGUFTypeUint8:
  787. arr = append(arr, llm.readU8(r))
  788. case GGUFTypeInt8:
  789. arr = append(arr, llm.readI8(r))
  790. case GGUFTypeUint16:
  791. arr = append(arr, llm.readU16(r))
  792. case GGUFTypeInt16:
  793. arr = append(arr, llm.readI16(r))
  794. case GGUFTypeUint32:
  795. arr = append(arr, llm.readU32(r))
  796. case GGUFTypeInt32:
  797. arr = append(arr, llm.readI32(r))
  798. case GGUFTypeUint64:
  799. arr = append(arr, llm.readU64(r))
  800. case GGUFTypeInt64:
  801. arr = append(arr, llm.readI64(r))
  802. case GGUFTypeFloat32:
  803. arr = append(arr, llm.readF32(r))
  804. case GGUFTypeFloat64:
  805. arr = append(arr, llm.readF64(r))
  806. case GGUFTypeBool:
  807. arr = append(arr, llm.readBool(r))
  808. case GGUFTypeString:
  809. s, err := llm.readString(r)
  810. if err != nil {
  811. return nil, err
  812. }
  813. arr = append(arr, s)
  814. default:
  815. return nil, fmt.Errorf("invalid array type: %d", atype)
  816. }
  817. }
  818. return
  819. }