gguf.go 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. package llm
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "fmt"
  6. "io"
  7. "github.com/jmorganca/ollama/format"
  8. )
  9. type containerGGUF struct {
  10. bo binary.ByteOrder
  11. Version uint32
  12. V1 struct {
  13. NumTensor uint32
  14. NumKV uint32
  15. }
  16. V2 struct {
  17. NumTensor uint64
  18. NumKV uint64
  19. }
  20. }
  21. func (c *containerGGUF) Name() string {
  22. return "gguf"
  23. }
  24. func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) {
  25. binary.Read(rso, c.bo, &c.Version)
  26. switch c.Version {
  27. case 1:
  28. binary.Read(rso, c.bo, &c.V1)
  29. default:
  30. binary.Read(rso, c.bo, &c.V2)
  31. }
  32. model := newGGUFModel(c)
  33. if err := model.Decode(rso); err != nil {
  34. return nil, err
  35. }
  36. return model, nil
  37. }
  38. const (
  39. ggufTypeUint8 uint32 = iota
  40. ggufTypeInt8
  41. ggufTypeUint16
  42. ggufTypeInt16
  43. ggufTypeUint32
  44. ggufTypeInt32
  45. ggufTypeFloat32
  46. ggufTypeBool
  47. ggufTypeString
  48. ggufTypeArray
  49. ggufTypeUint64
  50. ggufTypeInt64
  51. ggufTypeFloat64
  52. )
  53. type kv map[string]any
  54. type tensor struct {
  55. name string
  56. kind uint32
  57. offset uint64
  58. // shape is the number of elements in each dimension
  59. shape [4]uint64
  60. }
  61. func (t tensor) blockSize() uint64 {
  62. switch {
  63. case t.kind < 2:
  64. return 1
  65. case t.kind < 10:
  66. return 32
  67. default:
  68. return 256
  69. }
  70. }
  71. func (t tensor) typeSize() uint64 {
  72. blockSize := t.blockSize()
  73. switch t.kind {
  74. case 0: // FP32
  75. return 4
  76. case 1: // FP16
  77. return 2
  78. case 2: // Q4_0
  79. return 2 + blockSize/2
  80. case 3: // Q4_1
  81. return 2 + 2 + blockSize/2
  82. case 6: // Q5_0
  83. return 2 + 4 + blockSize/2
  84. case 7: // Q5_1
  85. return 2 + 2 + 4 + blockSize/2
  86. case 8: // Q8_0
  87. return 2 + blockSize
  88. case 9: // Q8_1
  89. return 4 + 4 + blockSize
  90. case 10: // Q2_K
  91. return blockSize/16 + blockSize/4 + 2 + 2
  92. case 11: // Q3_K
  93. return blockSize/8 + blockSize/4 + 12 + 2
  94. case 12: // Q4_K
  95. return 2 + 2 + 12 + blockSize/2
  96. case 13: // Q5_K
  97. return 2 + 2 + 12 + blockSize/8 + blockSize/2
  98. case 14: // Q6_K
  99. return blockSize/2 + blockSize/4 + blockSize/16 + 2
  100. default:
  101. return 0
  102. }
  103. }
  104. func (t tensor) parameters() uint64 {
  105. return t.shape[0] * t.shape[1] * t.shape[2] * t.shape[3]
  106. }
  107. func (t tensor) size() uint64 {
  108. return t.parameters() * t.typeSize() / t.blockSize()
  109. }
  110. type ggufModel struct {
  111. *containerGGUF
  112. kv
  113. tensors []tensor
  114. parameters uint64
  115. }
  116. func newGGUFModel(container *containerGGUF) *ggufModel {
  117. return &ggufModel{
  118. containerGGUF: container,
  119. kv: make(kv),
  120. }
  121. }
  122. func (llm *ggufModel) NumTensor() uint64 {
  123. if llm.Version == 1 {
  124. return uint64(llm.V1.NumTensor)
  125. }
  126. return llm.V2.NumTensor
  127. }
  128. func (llm *ggufModel) NumKV() uint64 {
  129. if llm.Version == 1 {
  130. return uint64(llm.V1.NumKV)
  131. }
  132. return llm.V2.NumKV
  133. }
  134. func (llm *ggufModel) ModelFamily() string {
  135. if t, ok := llm.kv["general.architecture"].(string); ok {
  136. return t
  137. }
  138. return "unknown"
  139. }
  140. func (llm *ggufModel) ModelType() string {
  141. if llm.parameters > 0 {
  142. return format.HumanNumber(llm.parameters)
  143. }
  144. return "unknown"
  145. }
  146. func (llm *ggufModel) FileType() string {
  147. if t, ok := llm.kv["general.file_type"].(uint32); ok {
  148. return fileType(t)
  149. }
  150. return "unknown"
  151. }
  152. func (llm *ggufModel) Decode(rso *readSeekOffset) error {
  153. // decode key-values
  154. for i := 0; uint64(i) < llm.NumKV(); i++ {
  155. k, err := llm.readString(rso)
  156. if err != nil {
  157. return err
  158. }
  159. vtype := llm.readU32(rso)
  160. var v any
  161. switch vtype {
  162. case ggufTypeUint8:
  163. v = llm.readU8(rso)
  164. case ggufTypeInt8:
  165. v = llm.readI8(rso)
  166. case ggufTypeUint16:
  167. v = llm.readU16(rso)
  168. case ggufTypeInt16:
  169. v = llm.readI16(rso)
  170. case ggufTypeUint32:
  171. v = llm.readU32(rso)
  172. case ggufTypeInt32:
  173. v = llm.readI32(rso)
  174. case ggufTypeUint64:
  175. v = llm.readU64(rso)
  176. case ggufTypeInt64:
  177. v = llm.readI64(rso)
  178. case ggufTypeFloat32:
  179. v = llm.readF32(rso)
  180. case ggufTypeFloat64:
  181. v = llm.readF64(rso)
  182. case ggufTypeBool:
  183. v = llm.readBool(rso)
  184. case ggufTypeString:
  185. s, err := llm.readString(rso)
  186. if err != nil {
  187. return err
  188. }
  189. v = s
  190. case ggufTypeArray:
  191. a, err := llm.readArray(rso)
  192. if err != nil {
  193. return err
  194. }
  195. v = a
  196. default:
  197. return fmt.Errorf("invalid type: %d", vtype)
  198. }
  199. llm.kv[k] = v
  200. }
  201. // decode tensors
  202. for i := 0; uint64(i) < llm.NumTensor(); i++ {
  203. name, err := llm.readString(rso)
  204. if err != nil {
  205. return err
  206. }
  207. // dims is the number of dimensions in the tensor
  208. dims := llm.readU32(rso)
  209. shape := [4]uint64{1, 1, 1, 1}
  210. for i := 0; uint32(i) < dims; i++ {
  211. shape[i] = llm.readU64(rso)
  212. }
  213. tensor := tensor{
  214. name: name,
  215. kind: llm.readU32(rso),
  216. offset: llm.readU64(rso),
  217. shape: shape,
  218. }
  219. llm.tensors = append(llm.tensors, tensor)
  220. llm.parameters += tensor.parameters()
  221. }
  222. alignment, ok := llm.kv["general.alignment"].(uint32)
  223. if !ok {
  224. alignment = 32
  225. }
  226. rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
  227. for _, tensor := range llm.tensors {
  228. padded := (int64(tensor.size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
  229. rso.Seek(padded, io.SeekCurrent)
  230. }
  231. return nil
  232. }
  233. func (llm *ggufModel) NumLayers() uint32 {
  234. value, exists := llm.kv[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
  235. if !exists {
  236. return 0
  237. }
  238. return value.(uint32)
  239. }
  240. func (llm *ggufModel) NumHead() uint32 {
  241. value, exists := llm.kv[fmt.Sprintf("%s.attention.head_count", llm.ModelFamily())]
  242. if !exists {
  243. return 0
  244. }
  245. return value.(uint32)
  246. }
  247. func (llm *ggufModel) NumEmbed() uint32 {
  248. value, exists := llm.kv[fmt.Sprintf("%s.embedding_length", llm.ModelFamily())]
  249. if !exists {
  250. return 0
  251. }
  252. return value.(uint32)
  253. }
  254. func (llm *ggufModel) NumHeadKv() uint32 {
  255. value, exists := llm.kv[fmt.Sprintf("%s.attention.head_count_kv", llm.ModelFamily())]
  256. if !exists {
  257. return 0
  258. }
  259. return value.(uint32)
  260. }
  261. func (llm *ggufModel) NumCtx() uint32 {
  262. value, exists := llm.kv[fmt.Sprintf("%s.context_length", llm.ModelFamily())]
  263. if !exists {
  264. return 0
  265. }
  266. return value.(uint32)
  267. }
  268. func (llm *ggufModel) NumGQA() uint32 {
  269. numHeadKv := llm.NumHeadKv()
  270. if numHeadKv == 0 {
  271. return 0
  272. }
  273. return llm.NumHead() / numHeadKv
  274. }
  275. func (llm ggufModel) readU8(r io.Reader) uint8 {
  276. var u8 uint8
  277. binary.Read(r, llm.bo, &u8)
  278. return u8
  279. }
  280. func (llm ggufModel) readI8(r io.Reader) int8 {
  281. var i8 int8
  282. binary.Read(r, llm.bo, &i8)
  283. return i8
  284. }
  285. func (llm ggufModel) readU16(r io.Reader) uint16 {
  286. var u16 uint16
  287. binary.Read(r, llm.bo, &u16)
  288. return u16
  289. }
  290. func (llm ggufModel) readI16(r io.Reader) int16 {
  291. var i16 int16
  292. binary.Read(r, llm.bo, &i16)
  293. return i16
  294. }
  295. func (llm ggufModel) readU32(r io.Reader) uint32 {
  296. var u32 uint32
  297. binary.Read(r, llm.bo, &u32)
  298. return u32
  299. }
  300. func (llm ggufModel) readI32(r io.Reader) int32 {
  301. var i32 int32
  302. binary.Read(r, llm.bo, &i32)
  303. return i32
  304. }
  305. func (llm ggufModel) readU64(r io.Reader) uint64 {
  306. var u64 uint64
  307. binary.Read(r, llm.bo, &u64)
  308. return u64
  309. }
  310. func (llm ggufModel) readI64(r io.Reader) int64 {
  311. var i64 int64
  312. binary.Read(r, llm.bo, &i64)
  313. return i64
  314. }
  315. func (llm ggufModel) readF32(r io.Reader) float32 {
  316. var f32 float32
  317. binary.Read(r, llm.bo, &f32)
  318. return f32
  319. }
  320. func (llm ggufModel) readF64(r io.Reader) float64 {
  321. var f64 float64
  322. binary.Read(r, llm.bo, &f64)
  323. return f64
  324. }
  325. func (llm ggufModel) readBool(r io.Reader) bool {
  326. var b bool
  327. binary.Read(r, llm.bo, &b)
  328. return b
  329. }
  330. func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
  331. var nameLength uint32
  332. binary.Read(r, llm.bo, &nameLength)
  333. var b bytes.Buffer
  334. if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
  335. return "", err
  336. }
  337. // gguf v1 strings are null-terminated
  338. b.Truncate(b.Len() - 1)
  339. return b.String(), nil
  340. }
  341. func (llm ggufModel) readString(r io.Reader) (string, error) {
  342. if llm.Version == 1 {
  343. return llm.readStringV1(r)
  344. }
  345. var nameLength uint64
  346. binary.Read(r, llm.bo, &nameLength)
  347. var b bytes.Buffer
  348. if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
  349. return "", err
  350. }
  351. return b.String(), nil
  352. }
  353. func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
  354. atype := llm.readU32(r)
  355. n := llm.readU32(r)
  356. for i := 0; uint32(i) < n; i++ {
  357. switch atype {
  358. case ggufTypeUint8:
  359. arr = append(arr, llm.readU8(r))
  360. case ggufTypeInt8:
  361. arr = append(arr, llm.readI8(r))
  362. case ggufTypeUint16:
  363. arr = append(arr, llm.readU16(r))
  364. case ggufTypeInt16:
  365. arr = append(arr, llm.readI16(r))
  366. case ggufTypeUint32:
  367. arr = append(arr, llm.readU32(r))
  368. case ggufTypeInt32:
  369. arr = append(arr, llm.readI32(r))
  370. case ggufTypeFloat32:
  371. arr = append(arr, llm.readF32(r))
  372. case ggufTypeBool:
  373. arr = append(arr, llm.readBool(r))
  374. case ggufTypeString:
  375. s, err := llm.readStringV1(r)
  376. if err != nil {
  377. return nil, err
  378. }
  379. arr = append(arr, s)
  380. default:
  381. return nil, fmt.Errorf("invalid array type: %d", atype)
  382. }
  383. }
  384. return
  385. }
  386. func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
  387. if llm.Version == 1 {
  388. return llm.readArrayV1(r)
  389. }
  390. atype := llm.readU32(r)
  391. n := llm.readU64(r)
  392. for i := 0; uint64(i) < n; i++ {
  393. switch atype {
  394. case ggufTypeUint8:
  395. arr = append(arr, llm.readU8(r))
  396. case ggufTypeInt8:
  397. arr = append(arr, llm.readI8(r))
  398. case ggufTypeUint16:
  399. arr = append(arr, llm.readU16(r))
  400. case ggufTypeInt16:
  401. arr = append(arr, llm.readI16(r))
  402. case ggufTypeUint32:
  403. arr = append(arr, llm.readU32(r))
  404. case ggufTypeInt32:
  405. arr = append(arr, llm.readI32(r))
  406. case ggufTypeUint64:
  407. arr = append(arr, llm.readU64(r))
  408. case ggufTypeInt64:
  409. arr = append(arr, llm.readI64(r))
  410. case ggufTypeFloat32:
  411. arr = append(arr, llm.readF32(r))
  412. case ggufTypeFloat64:
  413. arr = append(arr, llm.readF64(r))
  414. case ggufTypeBool:
  415. arr = append(arr, llm.readBool(r))
  416. case ggufTypeString:
  417. s, err := llm.readString(r)
  418. if err != nil {
  419. return nil, err
  420. }
  421. arr = append(arr, s)
  422. default:
  423. return nil, fmt.Errorf("invalid array type: %d", atype)
  424. }
  425. }
  426. return
  427. }