gguf.go 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. package llm
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "fmt"
  6. "io"
  7. "github.com/jmorganca/ollama/format"
  8. )
  9. type containerGGUF struct {
  10. bo binary.ByteOrder
  11. Version uint32
  12. V1 struct {
  13. NumTensor uint32
  14. NumKV uint32
  15. }
  16. V2 struct {
  17. NumTensor uint64
  18. NumKV uint64
  19. }
  20. }
  21. func (c *containerGGUF) Name() string {
  22. return "gguf"
  23. }
  24. func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) {
  25. binary.Read(rso, c.bo, &c.Version)
  26. switch c.Version {
  27. case 1:
  28. binary.Read(rso, c.bo, &c.V1)
  29. default:
  30. binary.Read(rso, c.bo, &c.V2)
  31. }
  32. model := newGGUFModel(c)
  33. if err := model.Decode(rso); err != nil {
  34. return nil, err
  35. }
  36. return model, nil
  37. }
  38. const (
  39. ggufTypeUint8 uint32 = iota
  40. ggufTypeInt8
  41. ggufTypeUint16
  42. ggufTypeInt16
  43. ggufTypeUint32
  44. ggufTypeInt32
  45. ggufTypeFloat32
  46. ggufTypeBool
  47. ggufTypeString
  48. ggufTypeArray
  49. ggufTypeUint64
  50. ggufTypeInt64
  51. ggufTypeFloat64
  52. )
  53. type kv map[string]any
  54. type tensor struct {
  55. name string
  56. kind uint32
  57. offset uint64
  58. size uint64
  59. // shape is the number of elements in each dimension
  60. shape [4]uint64
  61. }
  62. type ggufModel struct {
  63. *containerGGUF
  64. kv
  65. tensors []tensor
  66. parameters uint64
  67. }
  68. func newGGUFModel(container *containerGGUF) *ggufModel {
  69. return &ggufModel{
  70. containerGGUF: container,
  71. kv: make(kv),
  72. }
  73. }
  74. func (llm *ggufModel) NumTensor() uint64 {
  75. if llm.Version == 1 {
  76. return uint64(llm.V1.NumTensor)
  77. }
  78. return llm.V2.NumTensor
  79. }
  80. func (llm *ggufModel) NumKV() uint64 {
  81. if llm.Version == 1 {
  82. return uint64(llm.V1.NumKV)
  83. }
  84. return llm.V2.NumKV
  85. }
  86. func (llm *ggufModel) ModelFamily() string {
  87. if t, ok := llm.kv["general.architecture"].(string); ok {
  88. return t
  89. }
  90. return "unknown"
  91. }
  92. func (llm *ggufModel) ModelType() string {
  93. if llm.parameters > 0 {
  94. return format.HumanNumber(llm.parameters)
  95. }
  96. switch llm.ModelFamily() {
  97. case "llama":
  98. if blocks, ok := llm.kv["llama.block_count"].(uint32); ok {
  99. heads, headsOK := llm.kv["llama.head_count"].(uint32)
  100. headKVs, headsKVsOK := llm.kv["llama.head_count_kv"].(uint32)
  101. if headsOK && headsKVsOK && heads/headKVs == 8 {
  102. return "70B"
  103. }
  104. return llamaModelType(blocks)
  105. }
  106. case "falcon":
  107. if blocks, ok := llm.kv["falcon.block_count"].(uint32); ok {
  108. return falconModelType(blocks)
  109. }
  110. case "starcoder":
  111. if blocks, ok := llm.kv["starcoder.block_count"].(uint32); ok {
  112. return starCoderModelType(blocks)
  113. }
  114. }
  115. return "unknown"
  116. }
  117. func (llm *ggufModel) FileType() string {
  118. if t, ok := llm.kv["general.file_type"].(uint32); ok {
  119. return fileType(t)
  120. }
  121. return "unknown"
  122. }
  123. func (llm *ggufModel) Decode(rso *readSeekOffset) error {
  124. // decode key-values
  125. for i := 0; uint64(i) < llm.NumKV(); i++ {
  126. k, err := llm.readString(rso)
  127. if err != nil {
  128. return err
  129. }
  130. vtype := llm.readU32(rso)
  131. var v any
  132. switch vtype {
  133. case ggufTypeUint8:
  134. v = llm.readU8(rso)
  135. case ggufTypeInt8:
  136. v = llm.readI8(rso)
  137. case ggufTypeUint16:
  138. v = llm.readU16(rso)
  139. case ggufTypeInt16:
  140. v = llm.readI16(rso)
  141. case ggufTypeUint32:
  142. v = llm.readU32(rso)
  143. case ggufTypeInt32:
  144. v = llm.readI32(rso)
  145. case ggufTypeUint64:
  146. v = llm.readU64(rso)
  147. case ggufTypeInt64:
  148. v = llm.readI64(rso)
  149. case ggufTypeFloat32:
  150. v = llm.readF32(rso)
  151. case ggufTypeFloat64:
  152. v = llm.readF64(rso)
  153. case ggufTypeBool:
  154. v = llm.readBool(rso)
  155. case ggufTypeString:
  156. s, err := llm.readString(rso)
  157. if err != nil {
  158. return err
  159. }
  160. v = s
  161. case ggufTypeArray:
  162. a, err := llm.readArray(rso)
  163. if err != nil {
  164. return err
  165. }
  166. v = a
  167. default:
  168. return fmt.Errorf("invalid type: %d", vtype)
  169. }
  170. llm.kv[k] = v
  171. }
  172. // decode tensors
  173. for i := 0; uint64(i) < llm.NumTensor(); i++ {
  174. name, err := llm.readString(rso)
  175. if err != nil {
  176. return err
  177. }
  178. // dims is the number of dimensions in the tensor
  179. dims := llm.readU32(rso)
  180. shape := [4]uint64{1, 1, 1, 1}
  181. for i := 0; uint32(i) < dims; i++ {
  182. shape[i] = llm.readU64(rso)
  183. }
  184. kind := llm.readU32(rso)
  185. offset := llm.readU64(rso)
  186. var blockSize uint64
  187. switch {
  188. case kind < 2:
  189. blockSize = 1
  190. case kind < 10:
  191. blockSize = 32
  192. default:
  193. blockSize = 256
  194. }
  195. var typeSize uint64
  196. switch kind {
  197. case 0: // FP32
  198. typeSize = 4
  199. case 1: // FP16
  200. typeSize = 2
  201. case 2: // Q4_0
  202. typeSize = 2 + blockSize/2
  203. case 3: // Q4_1
  204. typeSize = 2 + 2 + blockSize/2
  205. case 6: // Q5_0
  206. typeSize = 2 + 4 + blockSize/2
  207. case 7: // Q5_1
  208. typeSize = 2 + 2 + 4 + blockSize/2
  209. case 8: // Q8_0
  210. typeSize = 2 + blockSize
  211. case 9: // Q8_1
  212. typeSize = 4 + 4 + blockSize
  213. case 10: // Q2_K
  214. typeSize = blockSize/16 + blockSize/4 + 2 + 2
  215. case 11: // Q3_K
  216. typeSize = blockSize/8 + blockSize/4 + 12 + 2
  217. case 12: // Q4_K
  218. typeSize = 2 + 2 + 12 + blockSize/2
  219. case 13: // Q5_K
  220. typeSize = 2 + 2 + 12 + blockSize/8 + blockSize/2
  221. case 14: // Q6_K
  222. typeSize = blockSize/2 + blockSize/4 + blockSize/16 + 2
  223. }
  224. parameters := shape[0] * shape[1] * shape[2] * shape[3]
  225. size := parameters * typeSize / blockSize
  226. llm.tensors = append(llm.tensors, tensor{
  227. name: name,
  228. kind: kind,
  229. offset: offset,
  230. size: size,
  231. shape: shape,
  232. })
  233. llm.parameters += parameters
  234. }
  235. alignment, ok := llm.kv["general.alignment"].(uint32)
  236. if !ok {
  237. alignment = 32
  238. }
  239. rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
  240. for _, tensor := range llm.tensors {
  241. padded := (int64(tensor.size) + int64(alignment) - 1) & ^(int64(alignment) - 1)
  242. rso.Seek(padded, io.SeekCurrent)
  243. }
  244. return nil
  245. }
  246. func (llm *ggufModel) NumLayers() int64 {
  247. value, exists := llm.kv[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
  248. if !exists {
  249. return 0
  250. }
  251. v := value.(uint32)
  252. return int64(v)
  253. }
  254. func (llm ggufModel) readU8(r io.Reader) uint8 {
  255. var u8 uint8
  256. binary.Read(r, llm.bo, &u8)
  257. return u8
  258. }
  259. func (llm ggufModel) readI8(r io.Reader) int8 {
  260. var i8 int8
  261. binary.Read(r, llm.bo, &i8)
  262. return i8
  263. }
  264. func (llm ggufModel) readU16(r io.Reader) uint16 {
  265. var u16 uint16
  266. binary.Read(r, llm.bo, &u16)
  267. return u16
  268. }
  269. func (llm ggufModel) readI16(r io.Reader) int16 {
  270. var i16 int16
  271. binary.Read(r, llm.bo, &i16)
  272. return i16
  273. }
  274. func (llm ggufModel) readU32(r io.Reader) uint32 {
  275. var u32 uint32
  276. binary.Read(r, llm.bo, &u32)
  277. return u32
  278. }
  279. func (llm ggufModel) readI32(r io.Reader) int32 {
  280. var i32 int32
  281. binary.Read(r, llm.bo, &i32)
  282. return i32
  283. }
  284. func (llm ggufModel) readU64(r io.Reader) uint64 {
  285. var u64 uint64
  286. binary.Read(r, llm.bo, &u64)
  287. return u64
  288. }
  289. func (llm ggufModel) readI64(r io.Reader) int64 {
  290. var i64 int64
  291. binary.Read(r, llm.bo, &i64)
  292. return i64
  293. }
  294. func (llm ggufModel) readF32(r io.Reader) float32 {
  295. var f32 float32
  296. binary.Read(r, llm.bo, &f32)
  297. return f32
  298. }
  299. func (llm ggufModel) readF64(r io.Reader) float64 {
  300. var f64 float64
  301. binary.Read(r, llm.bo, &f64)
  302. return f64
  303. }
  304. func (llm ggufModel) readBool(r io.Reader) bool {
  305. var b bool
  306. binary.Read(r, llm.bo, &b)
  307. return b
  308. }
  309. func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
  310. var nameLength uint32
  311. binary.Read(r, llm.bo, &nameLength)
  312. var b bytes.Buffer
  313. if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
  314. return "", err
  315. }
  316. // gguf v1 strings are null-terminated
  317. b.Truncate(b.Len() - 1)
  318. return b.String(), nil
  319. }
  320. func (llm ggufModel) readString(r io.Reader) (string, error) {
  321. if llm.Version == 1 {
  322. return llm.readStringV1(r)
  323. }
  324. var nameLength uint64
  325. binary.Read(r, llm.bo, &nameLength)
  326. var b bytes.Buffer
  327. if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
  328. return "", err
  329. }
  330. return b.String(), nil
  331. }
  332. func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
  333. atype := llm.readU32(r)
  334. n := llm.readU32(r)
  335. for i := 0; uint32(i) < n; i++ {
  336. switch atype {
  337. case ggufTypeUint8:
  338. arr = append(arr, llm.readU8(r))
  339. case ggufTypeInt8:
  340. arr = append(arr, llm.readI8(r))
  341. case ggufTypeUint16:
  342. arr = append(arr, llm.readU16(r))
  343. case ggufTypeInt16:
  344. arr = append(arr, llm.readI16(r))
  345. case ggufTypeUint32:
  346. arr = append(arr, llm.readU32(r))
  347. case ggufTypeInt32:
  348. arr = append(arr, llm.readI32(r))
  349. case ggufTypeFloat32:
  350. arr = append(arr, llm.readF32(r))
  351. case ggufTypeBool:
  352. arr = append(arr, llm.readBool(r))
  353. case ggufTypeString:
  354. s, err := llm.readStringV1(r)
  355. if err != nil {
  356. return nil, err
  357. }
  358. arr = append(arr, s)
  359. default:
  360. return nil, fmt.Errorf("invalid array type: %d", atype)
  361. }
  362. }
  363. return
  364. }
  365. func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
  366. if llm.Version == 1 {
  367. return llm.readArrayV1(r)
  368. }
  369. atype := llm.readU32(r)
  370. n := llm.readU64(r)
  371. for i := 0; uint64(i) < n; i++ {
  372. switch atype {
  373. case ggufTypeUint8:
  374. arr = append(arr, llm.readU8(r))
  375. case ggufTypeInt8:
  376. arr = append(arr, llm.readI8(r))
  377. case ggufTypeUint16:
  378. arr = append(arr, llm.readU16(r))
  379. case ggufTypeInt16:
  380. arr = append(arr, llm.readI16(r))
  381. case ggufTypeUint32:
  382. arr = append(arr, llm.readU32(r))
  383. case ggufTypeInt32:
  384. arr = append(arr, llm.readI32(r))
  385. case ggufTypeUint64:
  386. arr = append(arr, llm.readU64(r))
  387. case ggufTypeInt64:
  388. arr = append(arr, llm.readI64(r))
  389. case ggufTypeFloat32:
  390. arr = append(arr, llm.readF32(r))
  391. case ggufTypeFloat64:
  392. arr = append(arr, llm.readF64(r))
  393. case ggufTypeBool:
  394. arr = append(arr, llm.readBool(r))
  395. case ggufTypeString:
  396. s, err := llm.readString(r)
  397. if err != nil {
  398. return nil, err
  399. }
  400. arr = append(arr, s)
  401. default:
  402. return nil, fmt.Errorf("invalid array type: %d", atype)
  403. }
  404. }
  405. return
  406. }