gguf.go 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499
  1. package llm
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "fmt"
  6. "io"
  7. "github.com/jmorganca/ollama/format"
  8. )
  9. type containerGGUF struct {
  10. bo binary.ByteOrder
  11. Version uint32
  12. V1 struct {
  13. NumTensor uint32
  14. NumKV uint32
  15. }
  16. V2 struct {
  17. NumTensor uint64
  18. NumKV uint64
  19. }
  20. }
  21. func (c *containerGGUF) Name() string {
  22. return "gguf"
  23. }
  24. func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) {
  25. binary.Read(rso, c.bo, &c.Version)
  26. switch c.Version {
  27. case 1:
  28. binary.Read(rso, c.bo, &c.V1)
  29. default:
  30. binary.Read(rso, c.bo, &c.V2)
  31. }
  32. model := newGGUFModel(c)
  33. if err := model.Decode(rso); err != nil {
  34. return nil, err
  35. }
  36. return model, nil
  37. }
  38. const (
  39. ggufTypeUint8 uint32 = iota
  40. ggufTypeInt8
  41. ggufTypeUint16
  42. ggufTypeInt16
  43. ggufTypeUint32
  44. ggufTypeInt32
  45. ggufTypeFloat32
  46. ggufTypeBool
  47. ggufTypeString
  48. ggufTypeArray
  49. ggufTypeUint64
  50. ggufTypeInt64
  51. ggufTypeFloat64
  52. )
  53. type kv map[string]any
  54. type tensor struct {
  55. name string
  56. kind uint32
  57. offset uint64
  58. size uint64
  59. // shape is the number of elements in each dimension
  60. shape [4]uint64
  61. }
  62. type ggufModel struct {
  63. *containerGGUF
  64. kv
  65. tensors []tensor
  66. parameters uint64
  67. }
  68. func newGGUFModel(container *containerGGUF) *ggufModel {
  69. return &ggufModel{
  70. containerGGUF: container,
  71. kv: make(kv),
  72. }
  73. }
  74. func (llm *ggufModel) NumTensor() uint64 {
  75. if llm.Version == 1 {
  76. return uint64(llm.V1.NumTensor)
  77. }
  78. return llm.V2.NumTensor
  79. }
  80. func (llm *ggufModel) NumKV() uint64 {
  81. if llm.Version == 1 {
  82. return uint64(llm.V1.NumKV)
  83. }
  84. return llm.V2.NumKV
  85. }
  86. func (llm *ggufModel) ModelFamily() string {
  87. if t, ok := llm.kv["general.architecture"].(string); ok {
  88. return t
  89. }
  90. return "unknown"
  91. }
  92. func (llm *ggufModel) ModelType() string {
  93. if llm.parameters > 0 {
  94. return format.HumanNumber(llm.parameters)
  95. }
  96. return "unknown"
  97. }
  98. func (llm *ggufModel) FileType() string {
  99. if t, ok := llm.kv["general.file_type"].(uint32); ok {
  100. return fileType(t)
  101. }
  102. return "unknown"
  103. }
  104. func (llm *ggufModel) Decode(rso *readSeekOffset) error {
  105. // decode key-values
  106. for i := 0; uint64(i) < llm.NumKV(); i++ {
  107. k, err := llm.readString(rso)
  108. if err != nil {
  109. return err
  110. }
  111. vtype := llm.readU32(rso)
  112. var v any
  113. switch vtype {
  114. case ggufTypeUint8:
  115. v = llm.readU8(rso)
  116. case ggufTypeInt8:
  117. v = llm.readI8(rso)
  118. case ggufTypeUint16:
  119. v = llm.readU16(rso)
  120. case ggufTypeInt16:
  121. v = llm.readI16(rso)
  122. case ggufTypeUint32:
  123. v = llm.readU32(rso)
  124. case ggufTypeInt32:
  125. v = llm.readI32(rso)
  126. case ggufTypeUint64:
  127. v = llm.readU64(rso)
  128. case ggufTypeInt64:
  129. v = llm.readI64(rso)
  130. case ggufTypeFloat32:
  131. v = llm.readF32(rso)
  132. case ggufTypeFloat64:
  133. v = llm.readF64(rso)
  134. case ggufTypeBool:
  135. v = llm.readBool(rso)
  136. case ggufTypeString:
  137. s, err := llm.readString(rso)
  138. if err != nil {
  139. return err
  140. }
  141. v = s
  142. case ggufTypeArray:
  143. a, err := llm.readArray(rso)
  144. if err != nil {
  145. return err
  146. }
  147. v = a
  148. default:
  149. return fmt.Errorf("invalid type: %d", vtype)
  150. }
  151. llm.kv[k] = v
  152. }
  153. // decode tensors
  154. for i := 0; uint64(i) < llm.NumTensor(); i++ {
  155. name, err := llm.readString(rso)
  156. if err != nil {
  157. return err
  158. }
  159. // dims is the number of dimensions in the tensor
  160. dims := llm.readU32(rso)
  161. shape := [4]uint64{1, 1, 1, 1}
  162. for i := 0; uint32(i) < dims; i++ {
  163. shape[i] = llm.readU64(rso)
  164. }
  165. kind := llm.readU32(rso)
  166. offset := llm.readU64(rso)
  167. var blockSize uint64
  168. switch {
  169. case kind < 2:
  170. blockSize = 1
  171. case kind < 10:
  172. blockSize = 32
  173. default:
  174. blockSize = 256
  175. }
  176. var typeSize uint64
  177. switch kind {
  178. case 0: // FP32
  179. typeSize = 4
  180. case 1: // FP16
  181. typeSize = 2
  182. case 2: // Q4_0
  183. typeSize = 2 + blockSize/2
  184. case 3: // Q4_1
  185. typeSize = 2 + 2 + blockSize/2
  186. case 6: // Q5_0
  187. typeSize = 2 + 4 + blockSize/2
  188. case 7: // Q5_1
  189. typeSize = 2 + 2 + 4 + blockSize/2
  190. case 8: // Q8_0
  191. typeSize = 2 + blockSize
  192. case 9: // Q8_1
  193. typeSize = 4 + 4 + blockSize
  194. case 10: // Q2_K
  195. typeSize = blockSize/16 + blockSize/4 + 2 + 2
  196. case 11: // Q3_K
  197. typeSize = blockSize/8 + blockSize/4 + 12 + 2
  198. case 12: // Q4_K
  199. typeSize = 2 + 2 + 12 + blockSize/2
  200. case 13: // Q5_K
  201. typeSize = 2 + 2 + 12 + blockSize/8 + blockSize/2
  202. case 14: // Q6_K
  203. typeSize = blockSize/2 + blockSize/4 + blockSize/16 + 2
  204. }
  205. parameters := shape[0] * shape[1] * shape[2] * shape[3]
  206. size := parameters * typeSize / blockSize
  207. llm.tensors = append(llm.tensors, tensor{
  208. name: name,
  209. kind: kind,
  210. offset: offset,
  211. size: size,
  212. shape: shape,
  213. })
  214. llm.parameters += parameters
  215. }
  216. alignment, ok := llm.kv["general.alignment"].(uint32)
  217. if !ok {
  218. alignment = 32
  219. }
  220. rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
  221. for _, tensor := range llm.tensors {
  222. padded := (int64(tensor.size) + int64(alignment) - 1) & ^(int64(alignment) - 1)
  223. rso.Seek(padded, io.SeekCurrent)
  224. }
  225. return nil
  226. }
  227. func (llm *ggufModel) NumLayers() uint32 {
  228. value, exists := llm.kv[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
  229. if !exists {
  230. return 0
  231. }
  232. return value.(uint32)
  233. }
  234. func (llm *ggufModel) NumHead() uint32 {
  235. value, exists := llm.kv[fmt.Sprintf("%s.attention.head_count", llm.ModelFamily())]
  236. if !exists {
  237. return 0
  238. }
  239. return value.(uint32)
  240. }
  241. func (llm *ggufModel) NumEmbed() uint32 {
  242. value, exists := llm.kv[fmt.Sprintf("%s.embedding_length", llm.ModelFamily())]
  243. if !exists {
  244. return 0
  245. }
  246. return value.(uint32)
  247. }
  248. func (llm *ggufModel) NumHeadKv() uint32 {
  249. value, exists := llm.kv[fmt.Sprintf("%s.attention.head_count_kv", llm.ModelFamily())]
  250. if !exists {
  251. return 0
  252. }
  253. return value.(uint32)
  254. }
  255. func (llm *ggufModel) NumGQA() uint32 {
  256. numHeadKv := llm.NumHeadKv()
  257. if numHeadKv == 0 {
  258. return 0
  259. }
  260. return llm.NumHead() / numHeadKv
  261. }
  262. func (llm ggufModel) readU8(r io.Reader) uint8 {
  263. var u8 uint8
  264. binary.Read(r, llm.bo, &u8)
  265. return u8
  266. }
  267. func (llm ggufModel) readI8(r io.Reader) int8 {
  268. var i8 int8
  269. binary.Read(r, llm.bo, &i8)
  270. return i8
  271. }
  272. func (llm ggufModel) readU16(r io.Reader) uint16 {
  273. var u16 uint16
  274. binary.Read(r, llm.bo, &u16)
  275. return u16
  276. }
  277. func (llm ggufModel) readI16(r io.Reader) int16 {
  278. var i16 int16
  279. binary.Read(r, llm.bo, &i16)
  280. return i16
  281. }
  282. func (llm ggufModel) readU32(r io.Reader) uint32 {
  283. var u32 uint32
  284. binary.Read(r, llm.bo, &u32)
  285. return u32
  286. }
  287. func (llm ggufModel) readI32(r io.Reader) int32 {
  288. var i32 int32
  289. binary.Read(r, llm.bo, &i32)
  290. return i32
  291. }
  292. func (llm ggufModel) readU64(r io.Reader) uint64 {
  293. var u64 uint64
  294. binary.Read(r, llm.bo, &u64)
  295. return u64
  296. }
  297. func (llm ggufModel) readI64(r io.Reader) int64 {
  298. var i64 int64
  299. binary.Read(r, llm.bo, &i64)
  300. return i64
  301. }
  302. func (llm ggufModel) readF32(r io.Reader) float32 {
  303. var f32 float32
  304. binary.Read(r, llm.bo, &f32)
  305. return f32
  306. }
  307. func (llm ggufModel) readF64(r io.Reader) float64 {
  308. var f64 float64
  309. binary.Read(r, llm.bo, &f64)
  310. return f64
  311. }
  312. func (llm ggufModel) readBool(r io.Reader) bool {
  313. var b bool
  314. binary.Read(r, llm.bo, &b)
  315. return b
  316. }
  317. func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
  318. var nameLength uint32
  319. binary.Read(r, llm.bo, &nameLength)
  320. var b bytes.Buffer
  321. if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
  322. return "", err
  323. }
  324. // gguf v1 strings are null-terminated
  325. b.Truncate(b.Len() - 1)
  326. return b.String(), nil
  327. }
  328. func (llm ggufModel) readString(r io.Reader) (string, error) {
  329. if llm.Version == 1 {
  330. return llm.readStringV1(r)
  331. }
  332. var nameLength uint64
  333. binary.Read(r, llm.bo, &nameLength)
  334. var b bytes.Buffer
  335. if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
  336. return "", err
  337. }
  338. return b.String(), nil
  339. }
  340. func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
  341. atype := llm.readU32(r)
  342. n := llm.readU32(r)
  343. for i := 0; uint32(i) < n; i++ {
  344. switch atype {
  345. case ggufTypeUint8:
  346. arr = append(arr, llm.readU8(r))
  347. case ggufTypeInt8:
  348. arr = append(arr, llm.readI8(r))
  349. case ggufTypeUint16:
  350. arr = append(arr, llm.readU16(r))
  351. case ggufTypeInt16:
  352. arr = append(arr, llm.readI16(r))
  353. case ggufTypeUint32:
  354. arr = append(arr, llm.readU32(r))
  355. case ggufTypeInt32:
  356. arr = append(arr, llm.readI32(r))
  357. case ggufTypeFloat32:
  358. arr = append(arr, llm.readF32(r))
  359. case ggufTypeBool:
  360. arr = append(arr, llm.readBool(r))
  361. case ggufTypeString:
  362. s, err := llm.readStringV1(r)
  363. if err != nil {
  364. return nil, err
  365. }
  366. arr = append(arr, s)
  367. default:
  368. return nil, fmt.Errorf("invalid array type: %d", atype)
  369. }
  370. }
  371. return
  372. }
  373. func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
  374. if llm.Version == 1 {
  375. return llm.readArrayV1(r)
  376. }
  377. atype := llm.readU32(r)
  378. n := llm.readU64(r)
  379. for i := 0; uint64(i) < n; i++ {
  380. switch atype {
  381. case ggufTypeUint8:
  382. arr = append(arr, llm.readU8(r))
  383. case ggufTypeInt8:
  384. arr = append(arr, llm.readI8(r))
  385. case ggufTypeUint16:
  386. arr = append(arr, llm.readU16(r))
  387. case ggufTypeInt16:
  388. arr = append(arr, llm.readI16(r))
  389. case ggufTypeUint32:
  390. arr = append(arr, llm.readU32(r))
  391. case ggufTypeInt32:
  392. arr = append(arr, llm.readI32(r))
  393. case ggufTypeUint64:
  394. arr = append(arr, llm.readU64(r))
  395. case ggufTypeInt64:
  396. arr = append(arr, llm.readI64(r))
  397. case ggufTypeFloat32:
  398. arr = append(arr, llm.readF32(r))
  399. case ggufTypeFloat64:
  400. arr = append(arr, llm.readF64(r))
  401. case ggufTypeBool:
  402. arr = append(arr, llm.readBool(r))
  403. case ggufTypeString:
  404. s, err := llm.readString(r)
  405. if err != nil {
  406. return nil, err
  407. }
  408. arr = append(arr, s)
  409. default:
  410. return nil, fmt.Errorf("invalid array type: %d", atype)
  411. }
  412. }
  413. return
  414. }