gguf.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964
  1. package llm
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "fmt"
  6. "io"
  7. "log/slog"
  8. "os"
  9. "regexp"
  10. "github.com/d4l3k/go-bfloat16"
  11. "github.com/pdevine/tensor"
  12. "github.com/pdevine/tensor/native"
  13. "github.com/x448/float16"
  14. "github.com/jmorganca/ollama/format"
  15. )
  16. type ContainerGGUF struct {
  17. ByteOrder binary.ByteOrder
  18. Version uint32
  19. V1 struct {
  20. NumTensor uint32
  21. NumKV uint32
  22. }
  23. V2 struct {
  24. NumTensor uint64
  25. NumKV uint64
  26. }
  27. V3 struct {
  28. NumTensor uint64
  29. NumKV uint64
  30. }
  31. }
  32. func (c *ContainerGGUF) Name() string {
  33. return "gguf"
  34. }
  35. func (c *ContainerGGUF) Decode(rso *readSeekOffset) (model, error) {
  36. binary.Read(rso, c.ByteOrder, &c.Version)
  37. switch c.Version {
  38. case 1:
  39. binary.Read(rso, c.ByteOrder, &c.V1)
  40. default:
  41. binary.Read(rso, c.ByteOrder, &c.V2)
  42. }
  43. model := NewGGUFModel(c)
  44. if err := model.Decode(rso); err != nil {
  45. return nil, err
  46. }
  47. return model, nil
  48. }
  49. const (
  50. _ uint32 = iota
  51. GGUFTokenNormal
  52. GGUFTokenUnknown
  53. GGUFTokenControl
  54. GGUFTokenUserDefined
  55. GGUFTokenUnused
  56. GGUFTokenByte
  57. )
  58. const (
  59. GGUFTypeUint8 uint32 = iota
  60. GGUFTypeInt8
  61. GGUFTypeUint16
  62. GGUFTypeInt16
  63. GGUFTypeUint32
  64. GGUFTypeInt32
  65. GGUFTypeFloat32
  66. GGUFTypeBool
  67. GGUFTypeString
  68. GGUFTypeArray
  69. GGUFTypeUint64
  70. GGUFTypeInt64
  71. GGUFTypeFloat64
  72. )
  73. type KV map[string]any
  74. type Tensor struct {
  75. Name string
  76. Kind uint32
  77. Offset uint64
  78. // shape is the number of elements in each dimension
  79. Shape []uint64
  80. FileName string
  81. OffsetPadding uint64
  82. FileOffsets []uint64
  83. }
  84. func (t Tensor) BlockSize() uint64 {
  85. switch {
  86. case t.Kind < 2:
  87. return 1
  88. case t.Kind < 10:
  89. return 32
  90. default:
  91. return 256
  92. }
  93. }
  94. func (t Tensor) TypeSize() uint64 {
  95. blockSize := t.BlockSize()
  96. switch t.Kind {
  97. case 0: // FP32
  98. return 4
  99. case 1: // FP16
  100. return 2
  101. case 2: // Q4_0
  102. return 2 + blockSize/2
  103. case 3: // Q4_1
  104. return 2 + 2 + blockSize/2
  105. case 6: // Q5_0
  106. return 2 + 4 + blockSize/2
  107. case 7: // Q5_1
  108. return 2 + 2 + 4 + blockSize/2
  109. case 8: // Q8_0
  110. return 2 + blockSize
  111. case 9: // Q8_1
  112. return 4 + 4 + blockSize
  113. case 10: // Q2_K
  114. return blockSize/16 + blockSize/4 + 2 + 2
  115. case 11: // Q3_K
  116. return blockSize/8 + blockSize/4 + 12 + 2
  117. case 12: // Q4_K
  118. return 2 + 2 + 12 + blockSize/2
  119. case 13: // Q5_K
  120. return 2 + 2 + 12 + blockSize/8 + blockSize/2
  121. case 14: // Q6_K
  122. return blockSize/2 + blockSize/4 + blockSize/16 + 2
  123. case 15: // Q8_K
  124. return 2 + blockSize + 2*blockSize/16
  125. case 16: // IQ2_XXS
  126. return 2 + 2*blockSize/8
  127. case 17: // IQ2_XS
  128. return 2 + 2*blockSize/8 + blockSize/32
  129. case 18: // IQ3_XXS
  130. return 2 + 3*blockSize/8
  131. default:
  132. return 0
  133. }
  134. }
  135. func (t Tensor) Parameters() uint64 {
  136. var count uint64 = 1
  137. for _, n := range t.Shape {
  138. count *= n
  139. }
  140. return count
  141. }
  142. func (t Tensor) Size() uint64 {
  143. return t.Parameters() * t.TypeSize() / t.BlockSize()
  144. }
  145. func (t Tensor) Repack(data []uint16, heads int) ([]uint16, error) {
  146. n := tensor.New(tensor.WithShape(int(t.Shape[0]), int(t.Shape[1])), tensor.WithBacking(data))
  147. origShape := n.Shape().Clone()
  148. // reshape the tensor and swap axes 1 and 2 to unpack the layer for gguf
  149. if err := n.Reshape(heads, 2, origShape[0]/heads/2, origShape[1]); err != nil {
  150. return []uint16{}, err
  151. }
  152. if err := n.T(0, 2, 1, 3); err != nil {
  153. return []uint16{}, err
  154. }
  155. if err := n.Reshape(origShape...); err != nil {
  156. return []uint16{}, err
  157. }
  158. if err := n.Transpose(); err != nil {
  159. return []uint16{}, err
  160. }
  161. newN, err := native.SelectU16(n, 1)
  162. if err != nil {
  163. return []uint16{}, err
  164. }
  165. var fullTensor []uint16
  166. for _, v := range newN {
  167. fullTensor = append(fullTensor, v...)
  168. }
  169. return fullTensor, nil
  170. }
  171. type GGUFModel struct {
  172. *ContainerGGUF
  173. KV
  174. Tensors []Tensor
  175. parameters uint64
  176. }
  177. func NewGGUFModel(container *ContainerGGUF) *GGUFModel {
  178. return &GGUFModel{
  179. ContainerGGUF: container,
  180. KV: make(KV),
  181. }
  182. }
  183. func (llm *GGUFModel) NumTensor() uint64 {
  184. if llm.Version == 1 {
  185. return uint64(llm.V1.NumTensor)
  186. }
  187. return llm.V2.NumTensor
  188. }
  189. func (llm *GGUFModel) NumKV() uint64 {
  190. if llm.Version == 1 {
  191. return uint64(llm.V1.NumKV)
  192. }
  193. return llm.V2.NumKV
  194. }
  195. func (llm *GGUFModel) ModelFamily() string {
  196. if t, ok := llm.KV["general.architecture"].(string); ok {
  197. return t
  198. }
  199. return "unknown"
  200. }
  201. func (llm *GGUFModel) ModelType() string {
  202. if llm.parameters > 0 {
  203. return format.HumanNumber(llm.parameters)
  204. }
  205. return "unknown"
  206. }
  207. func (llm *GGUFModel) FileType() string {
  208. if t, ok := llm.KV["general.file_type"].(uint32); ok {
  209. return fileType(t)
  210. }
  211. return "unknown"
  212. }
  213. func (llm *GGUFModel) Encode(f *os.File) error {
  214. // this mimics the order of the llama.cpp convert script
  215. kOrder := []string{
  216. "general.architecture",
  217. "general.name",
  218. "llama.context_length",
  219. "llama.embedding_length",
  220. "llama.block_count",
  221. "llama.feed_forward_length",
  222. "llama.rope.dimension_count",
  223. "llama.attention.head_count",
  224. "llama.attention.head_count_kv",
  225. "llama.attention.layer_norm_rms_epsilon",
  226. "llama.rope.freq_base",
  227. "general.file_type",
  228. "tokenizer.ggml.model",
  229. "tokenizer.ggml.tokens",
  230. "tokenizer.ggml.scores",
  231. "tokenizer.ggml.token_type",
  232. "tokenizer.ggml.bos_token_id",
  233. "tokenizer.ggml.eos_token_id",
  234. "tokenizer.ggml.unknown_token_id",
  235. "tokenizer.ggml.add_bos_token",
  236. "tokenizer.ggml.add_eos_token",
  237. "tokenizer.chat_template",
  238. }
  239. if err := binary.Write(f, llm.ByteOrder, []byte("GGUF")); err != nil {
  240. return err
  241. }
  242. if err := binary.Write(f, llm.ByteOrder, uint32(3)); err != nil {
  243. return err
  244. }
  245. if err := binary.Write(f, llm.ByteOrder, uint64(llm.V3.NumTensor)); err != nil {
  246. return err
  247. }
  248. if err := binary.Write(f, llm.ByteOrder, uint64(llm.V3.NumKV)); err != nil {
  249. return err
  250. }
  251. for _, k := range kOrder {
  252. val, ok := llm.KV[k]
  253. if !ok {
  254. continue
  255. }
  256. if err := binary.Write(f, llm.ByteOrder, uint64(len(k))); err != nil {
  257. return err
  258. }
  259. if err := binary.Write(f, llm.ByteOrder, []byte(k)); err != nil {
  260. return err
  261. }
  262. switch v := val.(type) {
  263. case uint32:
  264. if err := binary.Write(f, llm.ByteOrder, GGUFTypeUint32); err != nil {
  265. return err
  266. }
  267. if err := llm.writeUint32(f, v); err != nil {
  268. return err
  269. }
  270. case float32:
  271. if err := binary.Write(f, llm.ByteOrder, GGUFTypeFloat32); err != nil {
  272. return err
  273. }
  274. if err := llm.writeF32(f, v); err != nil {
  275. return err
  276. }
  277. case bool:
  278. if err := binary.Write(f, llm.ByteOrder, GGUFTypeBool); err != nil {
  279. return err
  280. }
  281. if err := llm.writeBool(f, v); err != nil {
  282. return err
  283. }
  284. case string:
  285. if err := binary.Write(f, llm.ByteOrder, GGUFTypeString); err != nil {
  286. return err
  287. }
  288. if err := llm.writeString(f, v); err != nil {
  289. return err
  290. }
  291. case []int32:
  292. if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
  293. return err
  294. }
  295. if err := binary.Write(f, llm.ByteOrder, GGUFTypeInt32); err != nil {
  296. return err
  297. }
  298. if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
  299. return err
  300. }
  301. for _, i := range v {
  302. if err := llm.writeInt32(f, i); err != nil {
  303. return err
  304. }
  305. }
  306. case []uint32:
  307. if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
  308. return err
  309. }
  310. if err := binary.Write(f, llm.ByteOrder, GGUFTypeUint32); err != nil {
  311. return err
  312. }
  313. if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
  314. return err
  315. }
  316. for _, i := range v {
  317. if err := llm.writeUint32(f, i); err != nil {
  318. return err
  319. }
  320. }
  321. case []float32:
  322. if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
  323. return err
  324. }
  325. if err := binary.Write(f, llm.ByteOrder, GGUFTypeFloat32); err != nil {
  326. return err
  327. }
  328. if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
  329. return err
  330. }
  331. for _, fl := range v {
  332. if err := llm.writeF32(f, fl); err != nil {
  333. return err
  334. }
  335. }
  336. case []string:
  337. if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
  338. return err
  339. }
  340. if err := binary.Write(f, llm.ByteOrder, GGUFTypeString); err != nil {
  341. return err
  342. }
  343. if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
  344. return err
  345. }
  346. for _, s := range v {
  347. if err := llm.writeString(f, s); err != nil {
  348. return err
  349. }
  350. }
  351. }
  352. }
  353. // write layer metadata
  354. for _, t := range llm.Tensors {
  355. if err := llm.writeString(f, t.Name); err != nil {
  356. return err
  357. }
  358. // the dimensions of the tensor
  359. dims := 1
  360. if t.Shape[1] > 0 {
  361. dims = 2
  362. }
  363. if err := binary.Write(f, llm.ByteOrder, uint32(dims)); err != nil {
  364. return err
  365. }
  366. for i := 0; i < dims; i++ {
  367. if err := binary.Write(f, llm.ByteOrder, uint64(t.Shape[dims-1-i])); err != nil {
  368. return err
  369. }
  370. }
  371. if err := binary.Write(f, llm.ByteOrder, uint32(t.Kind)); err != nil {
  372. return err
  373. }
  374. if err := binary.Write(f, llm.ByteOrder, uint64(t.Offset)); err != nil {
  375. return err
  376. }
  377. }
  378. offset, terr := f.Seek(0, io.SeekCurrent)
  379. if terr != nil {
  380. return terr
  381. }
  382. slog.Debug(fmt.Sprintf("tensors offset = %x", offset))
  383. if err := llm.writePadding(f, 32); err != nil {
  384. return err
  385. }
  386. var dataFile *os.File
  387. var currentFile string
  388. var err error
  389. for _, t := range llm.Tensors {
  390. if currentFile != t.FileName {
  391. if f != nil {
  392. dataFile.Close()
  393. }
  394. currentFile = t.FileName
  395. dataFile, err = os.Open(t.FileName)
  396. if err != nil {
  397. fmt.Println(err)
  398. return err
  399. }
  400. }
  401. dataFile.Seek(int64(t.OffsetPadding+t.FileOffsets[0]), 0)
  402. pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
  403. re, err := regexp.Compile(pattern)
  404. if err != nil {
  405. return err
  406. }
  407. matches := re.FindAllStringSubmatch(t.Name, -1)
  408. if len(matches) > 0 {
  409. layerSize := t.FileOffsets[1] - t.FileOffsets[0]
  410. var err error
  411. tData := make([]uint16, layerSize/2)
  412. if err = binary.Read(dataFile, llm.ByteOrder, tData); err != nil {
  413. return err
  414. }
  415. layerType := matches[0][re.SubexpIndex("layer")]
  416. var heads uint32
  417. switch layerType {
  418. case "q":
  419. heads = llm.KV["llama.attention.head_count"].(uint32)
  420. case "k":
  421. heads = llm.KV["llama.attention.head_count_kv"].(uint32)
  422. if heads == 0 {
  423. heads = llm.KV["llama.attention.head_count"].(uint32)
  424. }
  425. }
  426. tData, err = t.Repack(tData, int(heads))
  427. if err != nil {
  428. return err
  429. }
  430. var buf []byte
  431. for _, n := range tData {
  432. buf = binary.LittleEndian.AppendUint16(buf, n)
  433. }
  434. tempBuf := make([]uint16, len(tData))
  435. tDataF32 := bfloat16.DecodeFloat32(buf)
  436. for cnt, v := range tDataF32 {
  437. tDataF16 := float16.Fromfloat32(v)
  438. tempBuf[cnt] = uint16(tDataF16)
  439. }
  440. if err = binary.Write(f, llm.ByteOrder, tempBuf); err != nil {
  441. return err
  442. }
  443. if err := llm.writePadding(f, 32); err != nil {
  444. return err
  445. }
  446. continue
  447. }
  448. remaining := t.FileOffsets[1] - t.FileOffsets[0]
  449. bufSize := uint64(10240)
  450. var finished bool
  451. for {
  452. data := make([]byte, min(bufSize, remaining))
  453. b, err := io.ReadFull(dataFile, data)
  454. remaining -= uint64(b)
  455. if err == io.EOF || remaining <= 0 {
  456. finished = true
  457. } else if err != nil {
  458. return err
  459. }
  460. // convert bfloat16 -> ieee float32
  461. tDataF32 := bfloat16.DecodeFloat32(data)
  462. switch t.Kind {
  463. case 0:
  464. if err := binary.Write(f, llm.ByteOrder, tDataF32); err != nil {
  465. return err
  466. }
  467. case 1:
  468. // convert float32 -> float16
  469. tempBuf := make([]uint16, len(data)/2)
  470. for cnt, v := range tDataF32 {
  471. tDataF16 := float16.Fromfloat32(v)
  472. tempBuf[cnt] = uint16(tDataF16)
  473. }
  474. if err := binary.Write(f, llm.ByteOrder, tempBuf); err != nil {
  475. return err
  476. }
  477. }
  478. if finished {
  479. break
  480. }
  481. }
  482. if err := llm.writePadding(f, 32); err != nil {
  483. return err
  484. }
  485. }
  486. f.Close()
  487. return nil
  488. }
  489. func (llm *GGUFModel) writePadding(f *os.File, align int64) error {
  490. // gguf file padding is defined in https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#file-structure
  491. offset, err := f.Seek(0, io.SeekCurrent)
  492. if err != nil {
  493. return err
  494. }
  495. padding := ((offset + align - 1) / align) * align
  496. buf := make([]byte, padding-offset)
  497. if err := binary.Write(f, llm.ByteOrder, buf); err != nil {
  498. return err
  499. }
  500. return nil
  501. }
  502. func (llm *GGUFModel) writeInt32(f *os.File, v int32) error {
  503. if err := binary.Write(f, llm.ByteOrder, v); err != nil {
  504. return err
  505. }
  506. return nil
  507. }
  508. func (llm *GGUFModel) writeUint32(f *os.File, v uint32) error {
  509. if err := binary.Write(f, llm.ByteOrder, v); err != nil {
  510. return err
  511. }
  512. return nil
  513. }
  514. func (llm *GGUFModel) writeF32(f *os.File, v float32) error {
  515. if err := binary.Write(f, llm.ByteOrder, v); err != nil {
  516. return err
  517. }
  518. return nil
  519. }
  520. func (llm *GGUFModel) writeBool(f *os.File, b bool) error {
  521. if err := binary.Write(f, llm.ByteOrder, b); err != nil {
  522. return err
  523. }
  524. return nil
  525. }
  526. func (llm *GGUFModel) writeString(f *os.File, s string) error {
  527. if err := binary.Write(f, llm.ByteOrder, uint64(len(s))); err != nil {
  528. return err
  529. }
  530. if err := binary.Write(f, llm.ByteOrder, []byte(s)); err != nil {
  531. return err
  532. }
  533. return nil
  534. }
  535. func (llm *GGUFModel) Decode(rso *readSeekOffset) error {
  536. // decode key-values
  537. for i := 0; uint64(i) < llm.NumKV(); i++ {
  538. k, err := llm.readString(rso)
  539. if err != nil {
  540. return err
  541. }
  542. vtype := llm.readU32(rso)
  543. var v any
  544. switch vtype {
  545. case GGUFTypeUint8:
  546. v = llm.readU8(rso)
  547. case GGUFTypeInt8:
  548. v = llm.readI8(rso)
  549. case GGUFTypeUint16:
  550. v = llm.readU16(rso)
  551. case GGUFTypeInt16:
  552. v = llm.readI16(rso)
  553. case GGUFTypeUint32:
  554. v = llm.readU32(rso)
  555. case GGUFTypeInt32:
  556. v = llm.readI32(rso)
  557. case GGUFTypeUint64:
  558. v = llm.readU64(rso)
  559. case GGUFTypeInt64:
  560. v = llm.readI64(rso)
  561. case GGUFTypeFloat32:
  562. v = llm.readF32(rso)
  563. case GGUFTypeFloat64:
  564. v = llm.readF64(rso)
  565. case GGUFTypeBool:
  566. v = llm.readBool(rso)
  567. case GGUFTypeString:
  568. s, err := llm.readString(rso)
  569. if err != nil {
  570. return err
  571. }
  572. v = s
  573. case GGUFTypeArray:
  574. a, err := llm.readArray(rso)
  575. if err != nil {
  576. return err
  577. }
  578. v = a
  579. default:
  580. return fmt.Errorf("invalid type: %d", vtype)
  581. }
  582. llm.KV[k] = v
  583. }
  584. // decode tensors
  585. for i := 0; uint64(i) < llm.NumTensor(); i++ {
  586. name, err := llm.readString(rso)
  587. if err != nil {
  588. return err
  589. }
  590. // dims is the number of dimensions in the tensor
  591. dims := llm.readU32(rso)
  592. shape := [4]uint64{1, 1, 1, 1}
  593. for i := 0; uint32(i) < dims; i++ {
  594. shape[i] = llm.readU64(rso)
  595. }
  596. tensor := Tensor{
  597. Name: name,
  598. Kind: llm.readU32(rso),
  599. Offset: llm.readU64(rso),
  600. Shape: shape[:],
  601. }
  602. llm.Tensors = append(llm.Tensors, tensor)
  603. llm.parameters += tensor.Parameters()
  604. }
  605. alignment, ok := llm.KV["general.alignment"].(uint32)
  606. if !ok {
  607. alignment = 32
  608. }
  609. rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
  610. for _, tensor := range llm.Tensors {
  611. padded := (int64(tensor.Size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
  612. rso.Seek(padded, io.SeekCurrent)
  613. }
  614. return nil
  615. }
  616. func (llm *GGUFModel) NumLayers() uint32 {
  617. value, exists := llm.KV[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
  618. if !exists {
  619. return 0
  620. }
  621. return value.(uint32)
  622. }
  623. func (llm *GGUFModel) NumHead() uint32 {
  624. value, exists := llm.KV[fmt.Sprintf("%s.attention.head_count", llm.ModelFamily())]
  625. if !exists {
  626. return 0
  627. }
  628. return value.(uint32)
  629. }
  630. func (llm *GGUFModel) NumEmbed() uint32 {
  631. value, exists := llm.KV[fmt.Sprintf("%s.embedding_length", llm.ModelFamily())]
  632. if !exists {
  633. return 0
  634. }
  635. return value.(uint32)
  636. }
  637. func (llm *GGUFModel) NumHeadKv() uint32 {
  638. value, exists := llm.KV[fmt.Sprintf("%s.attention.head_count_kv", llm.ModelFamily())]
  639. if !exists {
  640. return 0
  641. }
  642. return value.(uint32)
  643. }
  644. func (llm *GGUFModel) NumCtx() uint32 {
  645. value, exists := llm.KV[fmt.Sprintf("%s.context_length", llm.ModelFamily())]
  646. if !exists {
  647. return 0
  648. }
  649. return value.(uint32)
  650. }
  651. func (llm *GGUFModel) NumGQA() uint32 {
  652. numHeadKv := llm.NumHeadKv()
  653. if numHeadKv == 0 {
  654. return 0
  655. }
  656. return llm.NumHead() / numHeadKv
  657. }
  658. func (llm GGUFModel) readU8(r io.Reader) uint8 {
  659. var u8 uint8
  660. binary.Read(r, llm.ByteOrder, &u8)
  661. return u8
  662. }
  663. func (llm GGUFModel) readI8(r io.Reader) int8 {
  664. var i8 int8
  665. binary.Read(r, llm.ByteOrder, &i8)
  666. return i8
  667. }
  668. func (llm GGUFModel) readU16(r io.Reader) uint16 {
  669. var u16 uint16
  670. binary.Read(r, llm.ByteOrder, &u16)
  671. return u16
  672. }
  673. func (llm GGUFModel) readI16(r io.Reader) int16 {
  674. var i16 int16
  675. binary.Read(r, llm.ByteOrder, &i16)
  676. return i16
  677. }
  678. func (llm GGUFModel) readU32(r io.Reader) uint32 {
  679. var u32 uint32
  680. binary.Read(r, llm.ByteOrder, &u32)
  681. return u32
  682. }
  683. func (llm GGUFModel) readI32(r io.Reader) int32 {
  684. var i32 int32
  685. binary.Read(r, llm.ByteOrder, &i32)
  686. return i32
  687. }
  688. func (llm GGUFModel) readU64(r io.Reader) uint64 {
  689. var u64 uint64
  690. binary.Read(r, llm.ByteOrder, &u64)
  691. return u64
  692. }
  693. func (llm GGUFModel) readI64(r io.Reader) int64 {
  694. var i64 int64
  695. binary.Read(r, llm.ByteOrder, &i64)
  696. return i64
  697. }
  698. func (llm GGUFModel) readF32(r io.Reader) float32 {
  699. var f32 float32
  700. binary.Read(r, llm.ByteOrder, &f32)
  701. return f32
  702. }
  703. func (llm GGUFModel) readF64(r io.Reader) float64 {
  704. var f64 float64
  705. binary.Read(r, llm.ByteOrder, &f64)
  706. return f64
  707. }
  708. func (llm GGUFModel) readBool(r io.Reader) bool {
  709. var b bool
  710. binary.Read(r, llm.ByteOrder, &b)
  711. return b
  712. }
  713. func (llm GGUFModel) readStringV1(r io.Reader) (string, error) {
  714. var nameLength uint32
  715. binary.Read(r, llm.ByteOrder, &nameLength)
  716. var b bytes.Buffer
  717. if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
  718. return "", err
  719. }
  720. // gguf v1 strings are null-terminated
  721. b.Truncate(b.Len() - 1)
  722. return b.String(), nil
  723. }
  724. func (llm GGUFModel) readString(r io.Reader) (string, error) {
  725. if llm.Version == 1 {
  726. return llm.readStringV1(r)
  727. }
  728. var nameLength uint64
  729. binary.Read(r, llm.ByteOrder, &nameLength)
  730. var b bytes.Buffer
  731. if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
  732. return "", err
  733. }
  734. return b.String(), nil
  735. }
  736. func (llm *GGUFModel) readArrayV1(r io.Reader) (arr []any, err error) {
  737. atype := llm.readU32(r)
  738. n := llm.readU32(r)
  739. for i := 0; uint32(i) < n; i++ {
  740. switch atype {
  741. case GGUFTypeUint8:
  742. arr = append(arr, llm.readU8(r))
  743. case GGUFTypeInt8:
  744. arr = append(arr, llm.readI8(r))
  745. case GGUFTypeUint16:
  746. arr = append(arr, llm.readU16(r))
  747. case GGUFTypeInt16:
  748. arr = append(arr, llm.readI16(r))
  749. case GGUFTypeUint32:
  750. arr = append(arr, llm.readU32(r))
  751. case GGUFTypeInt32:
  752. arr = append(arr, llm.readI32(r))
  753. case GGUFTypeFloat32:
  754. arr = append(arr, llm.readF32(r))
  755. case GGUFTypeBool:
  756. arr = append(arr, llm.readBool(r))
  757. case GGUFTypeString:
  758. s, err := llm.readStringV1(r)
  759. if err != nil {
  760. return nil, err
  761. }
  762. arr = append(arr, s)
  763. default:
  764. return nil, fmt.Errorf("invalid array type: %d", atype)
  765. }
  766. }
  767. return
  768. }
  769. func (llm *GGUFModel) readArray(r io.Reader) (arr []any, err error) {
  770. if llm.Version == 1 {
  771. return llm.readArrayV1(r)
  772. }
  773. atype := llm.readU32(r)
  774. n := llm.readU64(r)
  775. for i := 0; uint64(i) < n; i++ {
  776. switch atype {
  777. case GGUFTypeUint8:
  778. arr = append(arr, llm.readU8(r))
  779. case GGUFTypeInt8:
  780. arr = append(arr, llm.readI8(r))
  781. case GGUFTypeUint16:
  782. arr = append(arr, llm.readU16(r))
  783. case GGUFTypeInt16:
  784. arr = append(arr, llm.readI16(r))
  785. case GGUFTypeUint32:
  786. arr = append(arr, llm.readU32(r))
  787. case GGUFTypeInt32:
  788. arr = append(arr, llm.readI32(r))
  789. case GGUFTypeUint64:
  790. arr = append(arr, llm.readU64(r))
  791. case GGUFTypeInt64:
  792. arr = append(arr, llm.readI64(r))
  793. case GGUFTypeFloat32:
  794. arr = append(arr, llm.readF32(r))
  795. case GGUFTypeFloat64:
  796. arr = append(arr, llm.readF64(r))
  797. case GGUFTypeBool:
  798. arr = append(arr, llm.readBool(r))
  799. case GGUFTypeString:
  800. s, err := llm.readString(r)
  801. if err != nil {
  802. return nil, err
  803. }
  804. arr = append(arr, s)
  805. default:
  806. return nil, fmt.Errorf("invalid array type: %d", atype)
  807. }
  808. }
  809. return
  810. }