gguf.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960
  1. package llm
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "fmt"
  6. "io"
  7. "log/slog"
  8. "os"
  9. "regexp"
  10. "github.com/d4l3k/go-bfloat16"
  11. "github.com/pdevine/tensor"
  12. "github.com/pdevine/tensor/native"
  13. "github.com/x448/float16"
  14. "github.com/jmorganca/ollama/format"
  15. )
  16. type ContainerGGUF struct {
  17. ByteOrder binary.ByteOrder
  18. Version uint32
  19. V1 struct {
  20. NumTensor uint32
  21. NumKV uint32
  22. }
  23. V2 struct {
  24. NumTensor uint64
  25. NumKV uint64
  26. }
  27. V3 struct {
  28. NumTensor uint64
  29. NumKV uint64
  30. }
  31. }
  32. func (c *ContainerGGUF) Name() string {
  33. return "gguf"
  34. }
  35. func (c *ContainerGGUF) Decode(rso *readSeekOffset) (model, error) {
  36. binary.Read(rso, c.ByteOrder, &c.Version)
  37. switch c.Version {
  38. case 1:
  39. binary.Read(rso, c.ByteOrder, &c.V1)
  40. default:
  41. binary.Read(rso, c.ByteOrder, &c.V2)
  42. }
  43. model := NewGGUFModel(c)
  44. if err := model.Decode(rso); err != nil {
  45. return nil, err
  46. }
  47. return model, nil
  48. }
  49. const (
  50. _ uint32 = iota
  51. GGUFTokenNormal
  52. GGUFTokenUnknown
  53. GGUFTokenControl
  54. GGUFTokenUserDefined
  55. GGUFTokenUnused
  56. GGUFTokenByte
  57. )
  58. const (
  59. GGUFTypeUint8 uint32 = iota
  60. GGUFTypeInt8
  61. GGUFTypeUint16
  62. GGUFTypeInt16
  63. GGUFTypeUint32
  64. GGUFTypeInt32
  65. GGUFTypeFloat32
  66. GGUFTypeBool
  67. GGUFTypeString
  68. GGUFTypeArray
  69. GGUFTypeUint64
  70. GGUFTypeInt64
  71. GGUFTypeFloat64
  72. )
  73. type KV map[string]any
  74. type Tensor struct {
  75. Name string
  76. Kind uint32
  77. Offset uint64
  78. // shape is the number of elements in each dimension
  79. Shape [4]uint64
  80. FileName string
  81. OffsetPadding uint64
  82. FileOffsets []uint64
  83. }
  84. func (t Tensor) BlockSize() uint64 {
  85. switch {
  86. case t.Kind < 2:
  87. return 1
  88. case t.Kind < 10:
  89. return 32
  90. default:
  91. return 256
  92. }
  93. }
  94. func (t Tensor) TypeSize() uint64 {
  95. blockSize := t.BlockSize()
  96. switch t.Kind {
  97. case 0: // FP32
  98. return 4
  99. case 1: // FP16
  100. return 2
  101. case 2: // Q4_0
  102. return 2 + blockSize/2
  103. case 3: // Q4_1
  104. return 2 + 2 + blockSize/2
  105. case 6: // Q5_0
  106. return 2 + 4 + blockSize/2
  107. case 7: // Q5_1
  108. return 2 + 2 + 4 + blockSize/2
  109. case 8: // Q8_0
  110. return 2 + blockSize
  111. case 9: // Q8_1
  112. return 4 + 4 + blockSize
  113. case 10: // Q2_K
  114. return blockSize/16 + blockSize/4 + 2 + 2
  115. case 11: // Q3_K
  116. return blockSize/8 + blockSize/4 + 12 + 2
  117. case 12: // Q4_K
  118. return 2 + 2 + 12 + blockSize/2
  119. case 13: // Q5_K
  120. return 2 + 2 + 12 + blockSize/8 + blockSize/2
  121. case 14: // Q6_K
  122. return blockSize/2 + blockSize/4 + blockSize/16 + 2
  123. case 15: // Q8_K
  124. return 2 + blockSize + 2*blockSize/16
  125. case 16: // IQ2_XXS
  126. return 2 + 2*blockSize/8
  127. case 17: // IQ2_XS
  128. return 2 + 2*blockSize/8 + blockSize/32
  129. case 18: // IQ3_XXS
  130. return 2 + 3*blockSize/8
  131. default:
  132. return 0
  133. }
  134. }
  135. func (t Tensor) Parameters() uint64 {
  136. return t.Shape[0] * t.Shape[1] * t.Shape[2] * t.Shape[3]
  137. }
  138. func (t Tensor) Size() uint64 {
  139. return t.Parameters() * t.TypeSize() / t.BlockSize()
  140. }
  141. func (t Tensor) Repack(data []uint16, heads int) ([]uint16, error) {
  142. n := tensor.New(tensor.WithShape(int(t.Shape[0]), int(t.Shape[1])), tensor.WithBacking(data))
  143. origShape := n.Shape().Clone()
  144. // reshape the tensor and swap axes 1 and 2 to unpack the layer for gguf
  145. if err := n.Reshape(heads, 2, origShape[0]/heads/2, origShape[1]); err != nil {
  146. return []uint16{}, err
  147. }
  148. if err := n.T(0, 2, 1, 3); err != nil {
  149. return []uint16{}, err
  150. }
  151. if err := n.Reshape(origShape...); err != nil {
  152. return []uint16{}, err
  153. }
  154. if err := n.Transpose(); err != nil {
  155. return []uint16{}, err
  156. }
  157. newN, err := native.SelectU16(n, 1)
  158. if err != nil {
  159. return []uint16{}, err
  160. }
  161. var fullTensor []uint16
  162. for _, v := range newN {
  163. fullTensor = append(fullTensor, v...)
  164. }
  165. return fullTensor, nil
  166. }
  167. type GGUFModel struct {
  168. *ContainerGGUF
  169. KV
  170. Tensors []Tensor
  171. parameters uint64
  172. }
  173. func NewGGUFModel(container *ContainerGGUF) *GGUFModel {
  174. return &GGUFModel{
  175. ContainerGGUF: container,
  176. KV: make(KV),
  177. }
  178. }
  179. func (llm *GGUFModel) NumTensor() uint64 {
  180. if llm.Version == 1 {
  181. return uint64(llm.V1.NumTensor)
  182. }
  183. return llm.V2.NumTensor
  184. }
  185. func (llm *GGUFModel) NumKV() uint64 {
  186. if llm.Version == 1 {
  187. return uint64(llm.V1.NumKV)
  188. }
  189. return llm.V2.NumKV
  190. }
  191. func (llm *GGUFModel) ModelFamily() string {
  192. if t, ok := llm.KV["general.architecture"].(string); ok {
  193. return t
  194. }
  195. return "unknown"
  196. }
  197. func (llm *GGUFModel) ModelType() string {
  198. if llm.parameters > 0 {
  199. return format.HumanNumber(llm.parameters)
  200. }
  201. return "unknown"
  202. }
  203. func (llm *GGUFModel) FileType() string {
  204. if t, ok := llm.KV["general.file_type"].(uint32); ok {
  205. return fileType(t)
  206. }
  207. return "unknown"
  208. }
  209. func (llm *GGUFModel) Encode(f *os.File) error {
  210. // this mimics the order of the llama.cpp convert script
  211. kOrder := []string{
  212. "general.architecture",
  213. "general.name",
  214. "llama.context_length",
  215. "llama.embedding_length",
  216. "llama.block_count",
  217. "llama.feed_forward_length",
  218. "llama.rope.dimension_count",
  219. "llama.attention.head_count",
  220. "llama.attention.head_count_kv",
  221. "llama.attention.layer_norm_rms_epsilon",
  222. "llama.rope.freq_base",
  223. "general.file_type",
  224. "tokenizer.ggml.model",
  225. "tokenizer.ggml.tokens",
  226. "tokenizer.ggml.scores",
  227. "tokenizer.ggml.token_type",
  228. "tokenizer.ggml.bos_token_id",
  229. "tokenizer.ggml.eos_token_id",
  230. "tokenizer.ggml.unknown_token_id",
  231. "tokenizer.ggml.add_bos_token",
  232. "tokenizer.ggml.add_eos_token",
  233. "tokenizer.chat_template",
  234. }
  235. if err := binary.Write(f, llm.ByteOrder, []byte("GGUF")); err != nil {
  236. return err
  237. }
  238. if err := binary.Write(f, llm.ByteOrder, uint32(3)); err != nil {
  239. return err
  240. }
  241. if err := binary.Write(f, llm.ByteOrder, uint64(llm.V3.NumTensor)); err != nil {
  242. return err
  243. }
  244. if err := binary.Write(f, llm.ByteOrder, uint64(llm.V3.NumKV)); err != nil {
  245. return err
  246. }
  247. for _, k := range kOrder {
  248. val, ok := llm.KV[k]
  249. if !ok {
  250. continue
  251. }
  252. if err := binary.Write(f, llm.ByteOrder, uint64(len(k))); err != nil {
  253. return err
  254. }
  255. if err := binary.Write(f, llm.ByteOrder, []byte(k)); err != nil {
  256. return err
  257. }
  258. switch v := val.(type) {
  259. case uint32:
  260. if err := binary.Write(f, llm.ByteOrder, GGUFTypeUint32); err != nil {
  261. return err
  262. }
  263. if err := llm.writeUint32(f, v); err != nil {
  264. return err
  265. }
  266. case float32:
  267. if err := binary.Write(f, llm.ByteOrder, GGUFTypeFloat32); err != nil {
  268. return err
  269. }
  270. if err := llm.writeF32(f, v); err != nil {
  271. return err
  272. }
  273. case bool:
  274. if err := binary.Write(f, llm.ByteOrder, GGUFTypeBool); err != nil {
  275. return err
  276. }
  277. if err := llm.writeBool(f, v); err != nil {
  278. return err
  279. }
  280. case string:
  281. if err := binary.Write(f, llm.ByteOrder, GGUFTypeString); err != nil {
  282. return err
  283. }
  284. if err := llm.writeString(f, v); err != nil {
  285. return err
  286. }
  287. case []int32:
  288. if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
  289. return err
  290. }
  291. if err := binary.Write(f, llm.ByteOrder, GGUFTypeInt32); err != nil {
  292. return err
  293. }
  294. if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
  295. return err
  296. }
  297. for _, i := range v {
  298. if err := llm.writeInt32(f, i); err != nil {
  299. return err
  300. }
  301. }
  302. case []uint32:
  303. if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
  304. return err
  305. }
  306. if err := binary.Write(f, llm.ByteOrder, GGUFTypeUint32); err != nil {
  307. return err
  308. }
  309. if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
  310. return err
  311. }
  312. for _, i := range v {
  313. if err := llm.writeUint32(f, i); err != nil {
  314. return err
  315. }
  316. }
  317. case []float32:
  318. if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
  319. return err
  320. }
  321. if err := binary.Write(f, llm.ByteOrder, GGUFTypeFloat32); err != nil {
  322. return err
  323. }
  324. if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
  325. return err
  326. }
  327. for _, fl := range v {
  328. if err := llm.writeF32(f, fl); err != nil {
  329. return err
  330. }
  331. }
  332. case []string:
  333. if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
  334. return err
  335. }
  336. if err := binary.Write(f, llm.ByteOrder, GGUFTypeString); err != nil {
  337. return err
  338. }
  339. if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
  340. return err
  341. }
  342. for _, s := range v {
  343. if err := llm.writeString(f, s); err != nil {
  344. return err
  345. }
  346. }
  347. }
  348. }
  349. // write layer metadata
  350. for _, t := range llm.Tensors {
  351. if err := llm.writeString(f, t.Name); err != nil {
  352. return err
  353. }
  354. // the dimensions of the tensor
  355. dims := 1
  356. if t.Shape[1] > 0 {
  357. dims = 2
  358. }
  359. if err := binary.Write(f, llm.ByteOrder, uint32(dims)); err != nil {
  360. return err
  361. }
  362. for i := 0; i < dims; i++ {
  363. if err := binary.Write(f, llm.ByteOrder, uint64(t.Shape[dims-1-i])); err != nil {
  364. return err
  365. }
  366. }
  367. if err := binary.Write(f, llm.ByteOrder, uint32(t.Kind)); err != nil {
  368. return err
  369. }
  370. if err := binary.Write(f, llm.ByteOrder, uint64(t.Offset)); err != nil {
  371. return err
  372. }
  373. }
  374. offset, terr := f.Seek(0, io.SeekCurrent)
  375. if terr != nil {
  376. return terr
  377. }
  378. slog.Debug(fmt.Sprintf("tensors offset = %x", offset))
  379. if err := llm.writePadding(f, 32); err != nil {
  380. return err
  381. }
  382. var dataFile *os.File
  383. var currentFile string
  384. var err error
  385. for _, t := range llm.Tensors {
  386. if currentFile != t.FileName {
  387. if f != nil {
  388. dataFile.Close()
  389. }
  390. currentFile = t.FileName
  391. dataFile, err = os.Open(t.FileName)
  392. if err != nil {
  393. fmt.Println(err)
  394. return err
  395. }
  396. }
  397. dataFile.Seek(int64(t.OffsetPadding+t.FileOffsets[0]), 0)
  398. pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
  399. re, err := regexp.Compile(pattern)
  400. if err != nil {
  401. return err
  402. }
  403. matches := re.FindAllStringSubmatch(t.Name, -1)
  404. if len(matches) > 0 {
  405. layerSize := t.FileOffsets[1] - t.FileOffsets[0]
  406. var err error
  407. tData := make([]uint16, layerSize/2)
  408. if err = binary.Read(dataFile, llm.ByteOrder, tData); err != nil {
  409. return err
  410. }
  411. layerType := matches[0][re.SubexpIndex("layer")]
  412. var heads uint32
  413. switch layerType {
  414. case "q":
  415. heads = llm.KV["llama.attention.head_count"].(uint32)
  416. case "k":
  417. heads = llm.KV["llama.attention.head_count_kv"].(uint32)
  418. if heads == 0 {
  419. heads = llm.KV["llama.attention.head_count"].(uint32)
  420. }
  421. }
  422. tData, err = t.Repack(tData, int(heads))
  423. if err != nil {
  424. return err
  425. }
  426. var buf []byte
  427. for _, n := range tData {
  428. buf = binary.LittleEndian.AppendUint16(buf, n)
  429. }
  430. tempBuf := make([]uint16, len(tData))
  431. tDataF32 := bfloat16.DecodeFloat32(buf)
  432. for cnt, v := range tDataF32 {
  433. tDataF16 := float16.Fromfloat32(v)
  434. tempBuf[cnt] = uint16(tDataF16)
  435. }
  436. if err = binary.Write(f, llm.ByteOrder, tempBuf); err != nil {
  437. return err
  438. }
  439. if err := llm.writePadding(f, 32); err != nil {
  440. return err
  441. }
  442. continue
  443. }
  444. remaining := t.FileOffsets[1] - t.FileOffsets[0]
  445. bufSize := uint64(10240)
  446. var finished bool
  447. for {
  448. data := make([]byte, min(bufSize, remaining))
  449. b, err := io.ReadFull(dataFile, data)
  450. remaining -= uint64(b)
  451. if err == io.EOF || remaining <= 0 {
  452. finished = true
  453. } else if err != nil {
  454. return err
  455. }
  456. // convert bfloat16 -> ieee float32
  457. tDataF32 := bfloat16.DecodeFloat32(data)
  458. switch t.Kind {
  459. case 0:
  460. if err := binary.Write(f, llm.ByteOrder, tDataF32); err != nil {
  461. return err
  462. }
  463. case 1:
  464. // convert float32 -> float16
  465. tempBuf := make([]uint16, len(data)/2)
  466. for cnt, v := range tDataF32 {
  467. tDataF16 := float16.Fromfloat32(v)
  468. tempBuf[cnt] = uint16(tDataF16)
  469. }
  470. if err := binary.Write(f, llm.ByteOrder, tempBuf); err != nil {
  471. return err
  472. }
  473. }
  474. if finished {
  475. break
  476. }
  477. }
  478. if err := llm.writePadding(f, 32); err != nil {
  479. return err
  480. }
  481. }
  482. f.Close()
  483. return nil
  484. }
  485. func (llm *GGUFModel) writePadding(f *os.File, align int64) error {
  486. // gguf file padding is defined in https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#file-structure
  487. offset, err := f.Seek(0, io.SeekCurrent)
  488. if err != nil {
  489. return err
  490. }
  491. padding := ((offset + align - 1) / align) * align
  492. buf := make([]byte, padding-offset)
  493. if err := binary.Write(f, llm.ByteOrder, buf); err != nil {
  494. return err
  495. }
  496. return nil
  497. }
  498. func (llm *GGUFModel) writeInt32(f *os.File, v int32) error {
  499. if err := binary.Write(f, llm.ByteOrder, v); err != nil {
  500. return err
  501. }
  502. return nil
  503. }
  504. func (llm *GGUFModel) writeUint32(f *os.File, v uint32) error {
  505. if err := binary.Write(f, llm.ByteOrder, v); err != nil {
  506. return err
  507. }
  508. return nil
  509. }
  510. func (llm *GGUFModel) writeF32(f *os.File, v float32) error {
  511. if err := binary.Write(f, llm.ByteOrder, v); err != nil {
  512. return err
  513. }
  514. return nil
  515. }
  516. func (llm *GGUFModel) writeBool(f *os.File, b bool) error {
  517. if err := binary.Write(f, llm.ByteOrder, b); err != nil {
  518. return err
  519. }
  520. return nil
  521. }
  522. func (llm *GGUFModel) writeString(f *os.File, s string) error {
  523. if err := binary.Write(f, llm.ByteOrder, uint64(len(s))); err != nil {
  524. return err
  525. }
  526. if err := binary.Write(f, llm.ByteOrder, []byte(s)); err != nil {
  527. return err
  528. }
  529. return nil
  530. }
  531. func (llm *GGUFModel) Decode(rso *readSeekOffset) error {
  532. // decode key-values
  533. for i := 0; uint64(i) < llm.NumKV(); i++ {
  534. k, err := llm.readString(rso)
  535. if err != nil {
  536. return err
  537. }
  538. vtype := llm.readU32(rso)
  539. var v any
  540. switch vtype {
  541. case GGUFTypeUint8:
  542. v = llm.readU8(rso)
  543. case GGUFTypeInt8:
  544. v = llm.readI8(rso)
  545. case GGUFTypeUint16:
  546. v = llm.readU16(rso)
  547. case GGUFTypeInt16:
  548. v = llm.readI16(rso)
  549. case GGUFTypeUint32:
  550. v = llm.readU32(rso)
  551. case GGUFTypeInt32:
  552. v = llm.readI32(rso)
  553. case GGUFTypeUint64:
  554. v = llm.readU64(rso)
  555. case GGUFTypeInt64:
  556. v = llm.readI64(rso)
  557. case GGUFTypeFloat32:
  558. v = llm.readF32(rso)
  559. case GGUFTypeFloat64:
  560. v = llm.readF64(rso)
  561. case GGUFTypeBool:
  562. v = llm.readBool(rso)
  563. case GGUFTypeString:
  564. s, err := llm.readString(rso)
  565. if err != nil {
  566. return err
  567. }
  568. v = s
  569. case GGUFTypeArray:
  570. a, err := llm.readArray(rso)
  571. if err != nil {
  572. return err
  573. }
  574. v = a
  575. default:
  576. return fmt.Errorf("invalid type: %d", vtype)
  577. }
  578. llm.KV[k] = v
  579. }
  580. // decode tensors
  581. for i := 0; uint64(i) < llm.NumTensor(); i++ {
  582. name, err := llm.readString(rso)
  583. if err != nil {
  584. return err
  585. }
  586. // dims is the number of dimensions in the tensor
  587. dims := llm.readU32(rso)
  588. shape := [4]uint64{1, 1, 1, 1}
  589. for i := 0; uint32(i) < dims; i++ {
  590. shape[i] = llm.readU64(rso)
  591. }
  592. tensor := Tensor{
  593. Name: name,
  594. Kind: llm.readU32(rso),
  595. Offset: llm.readU64(rso),
  596. Shape: shape,
  597. }
  598. llm.Tensors = append(llm.Tensors, tensor)
  599. llm.parameters += tensor.Parameters()
  600. }
  601. alignment, ok := llm.KV["general.alignment"].(uint32)
  602. if !ok {
  603. alignment = 32
  604. }
  605. rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
  606. for _, tensor := range llm.Tensors {
  607. padded := (int64(tensor.Size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
  608. rso.Seek(padded, io.SeekCurrent)
  609. }
  610. return nil
  611. }
  612. func (llm *GGUFModel) NumLayers() uint32 {
  613. value, exists := llm.KV[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
  614. if !exists {
  615. return 0
  616. }
  617. return value.(uint32)
  618. }
  619. func (llm *GGUFModel) NumHead() uint32 {
  620. value, exists := llm.KV[fmt.Sprintf("%s.attention.head_count", llm.ModelFamily())]
  621. if !exists {
  622. return 0
  623. }
  624. return value.(uint32)
  625. }
  626. func (llm *GGUFModel) NumEmbed() uint32 {
  627. value, exists := llm.KV[fmt.Sprintf("%s.embedding_length", llm.ModelFamily())]
  628. if !exists {
  629. return 0
  630. }
  631. return value.(uint32)
  632. }
  633. func (llm *GGUFModel) NumHeadKv() uint32 {
  634. value, exists := llm.KV[fmt.Sprintf("%s.attention.head_count_kv", llm.ModelFamily())]
  635. if !exists {
  636. return 0
  637. }
  638. return value.(uint32)
  639. }
  640. func (llm *GGUFModel) NumCtx() uint32 {
  641. value, exists := llm.KV[fmt.Sprintf("%s.context_length", llm.ModelFamily())]
  642. if !exists {
  643. return 0
  644. }
  645. return value.(uint32)
  646. }
  647. func (llm *GGUFModel) NumGQA() uint32 {
  648. numHeadKv := llm.NumHeadKv()
  649. if numHeadKv == 0 {
  650. return 0
  651. }
  652. return llm.NumHead() / numHeadKv
  653. }
  654. func (llm GGUFModel) readU8(r io.Reader) uint8 {
  655. var u8 uint8
  656. binary.Read(r, llm.ByteOrder, &u8)
  657. return u8
  658. }
  659. func (llm GGUFModel) readI8(r io.Reader) int8 {
  660. var i8 int8
  661. binary.Read(r, llm.ByteOrder, &i8)
  662. return i8
  663. }
  664. func (llm GGUFModel) readU16(r io.Reader) uint16 {
  665. var u16 uint16
  666. binary.Read(r, llm.ByteOrder, &u16)
  667. return u16
  668. }
  669. func (llm GGUFModel) readI16(r io.Reader) int16 {
  670. var i16 int16
  671. binary.Read(r, llm.ByteOrder, &i16)
  672. return i16
  673. }
  674. func (llm GGUFModel) readU32(r io.Reader) uint32 {
  675. var u32 uint32
  676. binary.Read(r, llm.ByteOrder, &u32)
  677. return u32
  678. }
  679. func (llm GGUFModel) readI32(r io.Reader) int32 {
  680. var i32 int32
  681. binary.Read(r, llm.ByteOrder, &i32)
  682. return i32
  683. }
  684. func (llm GGUFModel) readU64(r io.Reader) uint64 {
  685. var u64 uint64
  686. binary.Read(r, llm.ByteOrder, &u64)
  687. return u64
  688. }
  689. func (llm GGUFModel) readI64(r io.Reader) int64 {
  690. var i64 int64
  691. binary.Read(r, llm.ByteOrder, &i64)
  692. return i64
  693. }
  694. func (llm GGUFModel) readF32(r io.Reader) float32 {
  695. var f32 float32
  696. binary.Read(r, llm.ByteOrder, &f32)
  697. return f32
  698. }
  699. func (llm GGUFModel) readF64(r io.Reader) float64 {
  700. var f64 float64
  701. binary.Read(r, llm.ByteOrder, &f64)
  702. return f64
  703. }
  704. func (llm GGUFModel) readBool(r io.Reader) bool {
  705. var b bool
  706. binary.Read(r, llm.ByteOrder, &b)
  707. return b
  708. }
  709. func (llm GGUFModel) readStringV1(r io.Reader) (string, error) {
  710. var nameLength uint32
  711. binary.Read(r, llm.ByteOrder, &nameLength)
  712. var b bytes.Buffer
  713. if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
  714. return "", err
  715. }
  716. // gguf v1 strings are null-terminated
  717. b.Truncate(b.Len() - 1)
  718. return b.String(), nil
  719. }
  720. func (llm GGUFModel) readString(r io.Reader) (string, error) {
  721. if llm.Version == 1 {
  722. return llm.readStringV1(r)
  723. }
  724. var nameLength uint64
  725. binary.Read(r, llm.ByteOrder, &nameLength)
  726. var b bytes.Buffer
  727. if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
  728. return "", err
  729. }
  730. return b.String(), nil
  731. }
  732. func (llm *GGUFModel) readArrayV1(r io.Reader) (arr []any, err error) {
  733. atype := llm.readU32(r)
  734. n := llm.readU32(r)
  735. for i := 0; uint32(i) < n; i++ {
  736. switch atype {
  737. case GGUFTypeUint8:
  738. arr = append(arr, llm.readU8(r))
  739. case GGUFTypeInt8:
  740. arr = append(arr, llm.readI8(r))
  741. case GGUFTypeUint16:
  742. arr = append(arr, llm.readU16(r))
  743. case GGUFTypeInt16:
  744. arr = append(arr, llm.readI16(r))
  745. case GGUFTypeUint32:
  746. arr = append(arr, llm.readU32(r))
  747. case GGUFTypeInt32:
  748. arr = append(arr, llm.readI32(r))
  749. case GGUFTypeFloat32:
  750. arr = append(arr, llm.readF32(r))
  751. case GGUFTypeBool:
  752. arr = append(arr, llm.readBool(r))
  753. case GGUFTypeString:
  754. s, err := llm.readStringV1(r)
  755. if err != nil {
  756. return nil, err
  757. }
  758. arr = append(arr, s)
  759. default:
  760. return nil, fmt.Errorf("invalid array type: %d", atype)
  761. }
  762. }
  763. return
  764. }
  765. func (llm *GGUFModel) readArray(r io.Reader) (arr []any, err error) {
  766. if llm.Version == 1 {
  767. return llm.readArrayV1(r)
  768. }
  769. atype := llm.readU32(r)
  770. n := llm.readU64(r)
  771. for i := 0; uint64(i) < n; i++ {
  772. switch atype {
  773. case GGUFTypeUint8:
  774. arr = append(arr, llm.readU8(r))
  775. case GGUFTypeInt8:
  776. arr = append(arr, llm.readI8(r))
  777. case GGUFTypeUint16:
  778. arr = append(arr, llm.readU16(r))
  779. case GGUFTypeInt16:
  780. arr = append(arr, llm.readI16(r))
  781. case GGUFTypeUint32:
  782. arr = append(arr, llm.readU32(r))
  783. case GGUFTypeInt32:
  784. arr = append(arr, llm.readI32(r))
  785. case GGUFTypeUint64:
  786. arr = append(arr, llm.readU64(r))
  787. case GGUFTypeInt64:
  788. arr = append(arr, llm.readI64(r))
  789. case GGUFTypeFloat32:
  790. arr = append(arr, llm.readF32(r))
  791. case GGUFTypeFloat64:
  792. arr = append(arr, llm.readF64(r))
  793. case GGUFTypeBool:
  794. arr = append(arr, llm.readBool(r))
  795. case GGUFTypeString:
  796. s, err := llm.readString(r)
  797. if err != nil {
  798. return nil, err
  799. }
  800. arr = append(arr, s)
  801. default:
  802. return nil, fmt.Errorf("invalid array type: %d", atype)
  803. }
  804. }
  805. return
  806. }