123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515 |
- package llm
- import (
- "bytes"
- "encoding/binary"
- "fmt"
- "io"
- "github.com/jmorganca/ollama/format"
- )
- type containerGGUF struct {
- bo binary.ByteOrder
- Version uint32
- V1 struct {
- NumTensor uint32
- NumKV uint32
- }
- V2 struct {
- NumTensor uint64
- NumKV uint64
- }
- }
- func (c *containerGGUF) Name() string {
- return "gguf"
- }
- func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) {
- binary.Read(rso, c.bo, &c.Version)
- switch c.Version {
- case 1:
- binary.Read(rso, c.bo, &c.V1)
- default:
- binary.Read(rso, c.bo, &c.V2)
- }
- model := newGGUFModel(c)
- if err := model.Decode(rso); err != nil {
- return nil, err
- }
- return model, nil
- }
- const (
- ggufTypeUint8 uint32 = iota
- ggufTypeInt8
- ggufTypeUint16
- ggufTypeInt16
- ggufTypeUint32
- ggufTypeInt32
- ggufTypeFloat32
- ggufTypeBool
- ggufTypeString
- ggufTypeArray
- ggufTypeUint64
- ggufTypeInt64
- ggufTypeFloat64
- )
- type kv map[string]any
- type tensor struct {
- name string
- kind uint32
- offset uint64
- // shape is the number of elements in each dimension
- shape [4]uint64
- }
- func (t tensor) blockSize() uint64 {
- switch {
- case t.kind < 2:
- return 1
- case t.kind < 10:
- return 32
- default:
- return 256
- }
- }
- func (t tensor) typeSize() uint64 {
- blockSize := t.blockSize()
- switch t.kind {
- case 0: // FP32
- return 4
- case 1: // FP16
- return 2
- case 2: // Q4_0
- return 2 + blockSize/2
- case 3: // Q4_1
- return 2 + 2 + blockSize/2
- case 6: // Q5_0
- return 2 + 4 + blockSize/2
- case 7: // Q5_1
- return 2 + 2 + 4 + blockSize/2
- case 8: // Q8_0
- return 2 + blockSize
- case 9: // Q8_1
- return 4 + 4 + blockSize
- case 10: // Q2_K
- return blockSize/16 + blockSize/4 + 2 + 2
- case 11: // Q3_K
- return blockSize/8 + blockSize/4 + 12 + 2
- case 12: // Q4_K
- return 2 + 2 + 12 + blockSize/2
- case 13: // Q5_K
- return 2 + 2 + 12 + blockSize/8 + blockSize/2
- case 14: // Q6_K
- return blockSize/2 + blockSize/4 + blockSize/16 + 2
- default:
- return 0
- }
- }
- func (t tensor) parameters() uint64 {
- return t.shape[0] * t.shape[1] * t.shape[2] * t.shape[3]
- }
- func (t tensor) size() uint64 {
- return t.parameters() * t.typeSize() / t.blockSize()
- }
- type ggufModel struct {
- *containerGGUF
- kv
- tensors []tensor
- parameters uint64
- }
- func newGGUFModel(container *containerGGUF) *ggufModel {
- return &ggufModel{
- containerGGUF: container,
- kv: make(kv),
- }
- }
- func (llm *ggufModel) NumTensor() uint64 {
- if llm.Version == 1 {
- return uint64(llm.V1.NumTensor)
- }
- return llm.V2.NumTensor
- }
- func (llm *ggufModel) NumKV() uint64 {
- if llm.Version == 1 {
- return uint64(llm.V1.NumKV)
- }
- return llm.V2.NumKV
- }
- func (llm *ggufModel) ModelFamily() string {
- if t, ok := llm.kv["general.architecture"].(string); ok {
- return t
- }
- return "unknown"
- }
- func (llm *ggufModel) ModelType() string {
- if llm.parameters > 0 {
- return format.HumanNumber(llm.parameters)
- }
- return "unknown"
- }
- func (llm *ggufModel) FileType() string {
- if t, ok := llm.kv["general.file_type"].(uint32); ok {
- return fileType(t)
- }
- return "unknown"
- }
- func (llm *ggufModel) Decode(rso *readSeekOffset) error {
- // decode key-values
- for i := 0; uint64(i) < llm.NumKV(); i++ {
- k, err := llm.readString(rso)
- if err != nil {
- return err
- }
- vtype := llm.readU32(rso)
- var v any
- switch vtype {
- case ggufTypeUint8:
- v = llm.readU8(rso)
- case ggufTypeInt8:
- v = llm.readI8(rso)
- case ggufTypeUint16:
- v = llm.readU16(rso)
- case ggufTypeInt16:
- v = llm.readI16(rso)
- case ggufTypeUint32:
- v = llm.readU32(rso)
- case ggufTypeInt32:
- v = llm.readI32(rso)
- case ggufTypeUint64:
- v = llm.readU64(rso)
- case ggufTypeInt64:
- v = llm.readI64(rso)
- case ggufTypeFloat32:
- v = llm.readF32(rso)
- case ggufTypeFloat64:
- v = llm.readF64(rso)
- case ggufTypeBool:
- v = llm.readBool(rso)
- case ggufTypeString:
- s, err := llm.readString(rso)
- if err != nil {
- return err
- }
- v = s
- case ggufTypeArray:
- a, err := llm.readArray(rso)
- if err != nil {
- return err
- }
- v = a
- default:
- return fmt.Errorf("invalid type: %d", vtype)
- }
- llm.kv[k] = v
- }
- // decode tensors
- for i := 0; uint64(i) < llm.NumTensor(); i++ {
- name, err := llm.readString(rso)
- if err != nil {
- return err
- }
- // dims is the number of dimensions in the tensor
- dims := llm.readU32(rso)
- shape := [4]uint64{1, 1, 1, 1}
- for i := 0; uint32(i) < dims; i++ {
- shape[i] = llm.readU64(rso)
- }
- tensor := tensor{
- name: name,
- kind: llm.readU32(rso),
- offset: llm.readU64(rso),
- shape: shape,
- }
- llm.tensors = append(llm.tensors, tensor)
- llm.parameters += tensor.parameters()
- }
- alignment, ok := llm.kv["general.alignment"].(uint32)
- if !ok {
- alignment = 32
- }
- rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
- for _, tensor := range llm.tensors {
- padded := (int64(tensor.size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
- rso.Seek(padded, io.SeekCurrent)
- }
- return nil
- }
- func (llm *ggufModel) NumLayers() uint32 {
- value, exists := llm.kv[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
- if !exists {
- return 0
- }
- return value.(uint32)
- }
- func (llm *ggufModel) NumHead() uint32 {
- value, exists := llm.kv[fmt.Sprintf("%s.attention.head_count", llm.ModelFamily())]
- if !exists {
- return 0
- }
- return value.(uint32)
- }
- func (llm *ggufModel) NumEmbed() uint32 {
- value, exists := llm.kv[fmt.Sprintf("%s.embedding_length", llm.ModelFamily())]
- if !exists {
- return 0
- }
- return value.(uint32)
- }
- func (llm *ggufModel) NumHeadKv() uint32 {
- value, exists := llm.kv[fmt.Sprintf("%s.attention.head_count_kv", llm.ModelFamily())]
- if !exists {
- return 0
- }
- return value.(uint32)
- }
- func (llm *ggufModel) NumCtx() uint32 {
- value, exists := llm.kv[fmt.Sprintf("%s.context_length", llm.ModelFamily())]
- if !exists {
- return 0
- }
- return value.(uint32)
- }
- func (llm *ggufModel) NumGQA() uint32 {
- numHeadKv := llm.NumHeadKv()
- if numHeadKv == 0 {
- return 0
- }
- return llm.NumHead() / numHeadKv
- }
- func (llm ggufModel) readU8(r io.Reader) uint8 {
- var u8 uint8
- binary.Read(r, llm.bo, &u8)
- return u8
- }
- func (llm ggufModel) readI8(r io.Reader) int8 {
- var i8 int8
- binary.Read(r, llm.bo, &i8)
- return i8
- }
- func (llm ggufModel) readU16(r io.Reader) uint16 {
- var u16 uint16
- binary.Read(r, llm.bo, &u16)
- return u16
- }
- func (llm ggufModel) readI16(r io.Reader) int16 {
- var i16 int16
- binary.Read(r, llm.bo, &i16)
- return i16
- }
- func (llm ggufModel) readU32(r io.Reader) uint32 {
- var u32 uint32
- binary.Read(r, llm.bo, &u32)
- return u32
- }
- func (llm ggufModel) readI32(r io.Reader) int32 {
- var i32 int32
- binary.Read(r, llm.bo, &i32)
- return i32
- }
- func (llm ggufModel) readU64(r io.Reader) uint64 {
- var u64 uint64
- binary.Read(r, llm.bo, &u64)
- return u64
- }
- func (llm ggufModel) readI64(r io.Reader) int64 {
- var i64 int64
- binary.Read(r, llm.bo, &i64)
- return i64
- }
- func (llm ggufModel) readF32(r io.Reader) float32 {
- var f32 float32
- binary.Read(r, llm.bo, &f32)
- return f32
- }
- func (llm ggufModel) readF64(r io.Reader) float64 {
- var f64 float64
- binary.Read(r, llm.bo, &f64)
- return f64
- }
- func (llm ggufModel) readBool(r io.Reader) bool {
- var b bool
- binary.Read(r, llm.bo, &b)
- return b
- }
- func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
- var nameLength uint32
- binary.Read(r, llm.bo, &nameLength)
- var b bytes.Buffer
- if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
- return "", err
- }
- // gguf v1 strings are null-terminated
- b.Truncate(b.Len() - 1)
- return b.String(), nil
- }
- func (llm ggufModel) readString(r io.Reader) (string, error) {
- if llm.Version == 1 {
- return llm.readStringV1(r)
- }
- var nameLength uint64
- binary.Read(r, llm.bo, &nameLength)
- var b bytes.Buffer
- if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
- return "", err
- }
- return b.String(), nil
- }
- func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
- atype := llm.readU32(r)
- n := llm.readU32(r)
- for i := 0; uint32(i) < n; i++ {
- switch atype {
- case ggufTypeUint8:
- arr = append(arr, llm.readU8(r))
- case ggufTypeInt8:
- arr = append(arr, llm.readI8(r))
- case ggufTypeUint16:
- arr = append(arr, llm.readU16(r))
- case ggufTypeInt16:
- arr = append(arr, llm.readI16(r))
- case ggufTypeUint32:
- arr = append(arr, llm.readU32(r))
- case ggufTypeInt32:
- arr = append(arr, llm.readI32(r))
- case ggufTypeFloat32:
- arr = append(arr, llm.readF32(r))
- case ggufTypeBool:
- arr = append(arr, llm.readBool(r))
- case ggufTypeString:
- s, err := llm.readStringV1(r)
- if err != nil {
- return nil, err
- }
- arr = append(arr, s)
- default:
- return nil, fmt.Errorf("invalid array type: %d", atype)
- }
- }
- return
- }
- func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
- if llm.Version == 1 {
- return llm.readArrayV1(r)
- }
- atype := llm.readU32(r)
- n := llm.readU64(r)
- for i := 0; uint64(i) < n; i++ {
- switch atype {
- case ggufTypeUint8:
- arr = append(arr, llm.readU8(r))
- case ggufTypeInt8:
- arr = append(arr, llm.readI8(r))
- case ggufTypeUint16:
- arr = append(arr, llm.readU16(r))
- case ggufTypeInt16:
- arr = append(arr, llm.readI16(r))
- case ggufTypeUint32:
- arr = append(arr, llm.readU32(r))
- case ggufTypeInt32:
- arr = append(arr, llm.readI32(r))
- case ggufTypeUint64:
- arr = append(arr, llm.readU64(r))
- case ggufTypeInt64:
- arr = append(arr, llm.readI64(r))
- case ggufTypeFloat32:
- arr = append(arr, llm.readF32(r))
- case ggufTypeFloat64:
- arr = append(arr, llm.readF64(r))
- case ggufTypeBool:
- arr = append(arr, llm.readBool(r))
- case ggufTypeString:
- s, err := llm.readString(r)
- if err != nil {
- return nil, err
- }
- arr = append(arr, s)
- default:
- return nil, fmt.Errorf("invalid array type: %d", atype)
- }
- }
- return
- }
|