|
@@ -23,26 +23,24 @@ type containerGGUF struct {
|
|
NumTensor uint64
|
|
NumTensor uint64
|
|
NumKV uint64
|
|
NumKV uint64
|
|
}
|
|
}
|
|
-
|
|
|
|
- parameters uint64
|
|
|
|
}
|
|
}
|
|
|
|
|
|
func (c *containerGGUF) Name() string {
|
|
func (c *containerGGUF) Name() string {
|
|
return "gguf"
|
|
return "gguf"
|
|
}
|
|
}
|
|
|
|
|
|
-func (c *containerGGUF) Decode(r io.Reader) (model, error) {
|
|
|
|
- binary.Read(r, c.bo, &c.Version)
|
|
|
|
|
|
+func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) {
|
|
|
|
+ binary.Read(rso, c.bo, &c.Version)
|
|
|
|
|
|
switch c.Version {
|
|
switch c.Version {
|
|
case 1:
|
|
case 1:
|
|
- binary.Read(r, c.bo, &c.V1)
|
|
|
|
|
|
+ binary.Read(rso, c.bo, &c.V1)
|
|
default:
|
|
default:
|
|
- binary.Read(r, c.bo, &c.V2)
|
|
|
|
|
|
+ binary.Read(rso, c.bo, &c.V2)
|
|
}
|
|
}
|
|
|
|
|
|
model := newGGUFModel(c)
|
|
model := newGGUFModel(c)
|
|
- if err := model.Decode(r); err != nil {
|
|
|
|
|
|
+ if err := model.Decode(rso); err != nil {
|
|
return nil, err
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
|
|
@@ -67,9 +65,23 @@ const (
|
|
|
|
|
|
type kv map[string]any
|
|
type kv map[string]any
|
|
|
|
|
|
|
|
+type tensor struct {
|
|
|
|
+ name string
|
|
|
|
+ kind uint32
|
|
|
|
+ offset uint64
|
|
|
|
+ size uint64
|
|
|
|
+
|
|
|
|
+ // shape is the number of elements in each dimension
|
|
|
|
+ shape [4]uint64
|
|
|
|
+}
|
|
|
|
+
|
|
type ggufModel struct {
|
|
type ggufModel struct {
|
|
*containerGGUF
|
|
*containerGGUF
|
|
|
|
+
|
|
kv
|
|
kv
|
|
|
|
+ tensors []tensor
|
|
|
|
+
|
|
|
|
+ parameters uint64
|
|
}
|
|
}
|
|
|
|
|
|
func newGGUFModel(container *containerGGUF) *ggufModel {
|
|
func newGGUFModel(container *containerGGUF) *ggufModel {
|
|
@@ -96,8 +108,7 @@ func (llm *ggufModel) NumKV() uint64 {
|
|
}
|
|
}
|
|
|
|
|
|
func (llm *ggufModel) ModelFamily() string {
|
|
func (llm *ggufModel) ModelFamily() string {
|
|
- t, ok := llm.kv["general.architecture"].(string)
|
|
|
|
- if ok {
|
|
|
|
|
|
+ if t, ok := llm.kv["general.architecture"].(string); ok {
|
|
return t
|
|
return t
|
|
}
|
|
}
|
|
|
|
|
|
@@ -134,57 +145,56 @@ func (llm *ggufModel) ModelType() string {
|
|
}
|
|
}
|
|
|
|
|
|
func (llm *ggufModel) FileType() string {
|
|
func (llm *ggufModel) FileType() string {
|
|
- t, ok := llm.kv["general.file_type"].(uint32)
|
|
|
|
- if ok {
|
|
|
|
|
|
+ if t, ok := llm.kv["general.file_type"].(uint32); ok {
|
|
return fileType(t)
|
|
return fileType(t)
|
|
}
|
|
}
|
|
|
|
|
|
return "unknown"
|
|
return "unknown"
|
|
}
|
|
}
|
|
|
|
|
|
-func (llm *ggufModel) Decode(r io.Reader) error {
|
|
|
|
|
|
+func (llm *ggufModel) Decode(rso *readSeekOffset) error {
|
|
// decode key-values
|
|
// decode key-values
|
|
for i := 0; uint64(i) < llm.NumKV(); i++ {
|
|
for i := 0; uint64(i) < llm.NumKV(); i++ {
|
|
- k, err := llm.readString(r)
|
|
|
|
|
|
+ k, err := llm.readString(rso)
|
|
if err != nil {
|
|
if err != nil {
|
|
return err
|
|
return err
|
|
}
|
|
}
|
|
|
|
|
|
- vtype := llm.readU32(r)
|
|
|
|
|
|
+ vtype := llm.readU32(rso)
|
|
|
|
|
|
var v any
|
|
var v any
|
|
switch vtype {
|
|
switch vtype {
|
|
case ggufTypeUint8:
|
|
case ggufTypeUint8:
|
|
- v = llm.readU8(r)
|
|
|
|
|
|
+ v = llm.readU8(rso)
|
|
case ggufTypeInt8:
|
|
case ggufTypeInt8:
|
|
- v = llm.readI8(r)
|
|
|
|
|
|
+ v = llm.readI8(rso)
|
|
case ggufTypeUint16:
|
|
case ggufTypeUint16:
|
|
- v = llm.readU16(r)
|
|
|
|
|
|
+ v = llm.readU16(rso)
|
|
case ggufTypeInt16:
|
|
case ggufTypeInt16:
|
|
- v = llm.readI16(r)
|
|
|
|
|
|
+ v = llm.readI16(rso)
|
|
case ggufTypeUint32:
|
|
case ggufTypeUint32:
|
|
- v = llm.readU32(r)
|
|
|
|
|
|
+ v = llm.readU32(rso)
|
|
case ggufTypeInt32:
|
|
case ggufTypeInt32:
|
|
- v = llm.readI32(r)
|
|
|
|
|
|
+ v = llm.readI32(rso)
|
|
case ggufTypeUint64:
|
|
case ggufTypeUint64:
|
|
- v = llm.readU64(r)
|
|
|
|
|
|
+ v = llm.readU64(rso)
|
|
case ggufTypeInt64:
|
|
case ggufTypeInt64:
|
|
- v = llm.readI64(r)
|
|
|
|
|
|
+ v = llm.readI64(rso)
|
|
case ggufTypeFloat32:
|
|
case ggufTypeFloat32:
|
|
- v = llm.readF32(r)
|
|
|
|
|
|
+ v = llm.readF32(rso)
|
|
case ggufTypeFloat64:
|
|
case ggufTypeFloat64:
|
|
- v = llm.readF64(r)
|
|
|
|
|
|
+ v = llm.readF64(rso)
|
|
case ggufTypeBool:
|
|
case ggufTypeBool:
|
|
- v = llm.readBool(r)
|
|
|
|
|
|
+ v = llm.readBool(rso)
|
|
case ggufTypeString:
|
|
case ggufTypeString:
|
|
- s, err := llm.readString(r)
|
|
|
|
|
|
+ s, err := llm.readString(rso)
|
|
if err != nil {
|
|
if err != nil {
|
|
return err
|
|
return err
|
|
}
|
|
}
|
|
|
|
|
|
v = s
|
|
v = s
|
|
case ggufTypeArray:
|
|
case ggufTypeArray:
|
|
- a, err := llm.readArray(r)
|
|
|
|
|
|
+ a, err := llm.readArray(rso)
|
|
if err != nil {
|
|
if err != nil {
|
|
return err
|
|
return err
|
|
}
|
|
}
|
|
@@ -199,21 +209,85 @@ func (llm *ggufModel) Decode(r io.Reader) error {
|
|
|
|
|
|
// decode tensors
|
|
// decode tensors
|
|
for i := 0; uint64(i) < llm.NumTensor(); i++ {
|
|
for i := 0; uint64(i) < llm.NumTensor(); i++ {
|
|
- if _, err := llm.readString(r); err != nil {
|
|
|
|
|
|
+ name, err := llm.readString(rso)
|
|
|
|
+ if err != nil {
|
|
return err
|
|
return err
|
|
}
|
|
}
|
|
|
|
|
|
- dimensions := llm.readU32(r)
|
|
|
|
|
|
+ // dims is the number of dimensions in the tensor
|
|
|
|
+ dims := llm.readU32(rso)
|
|
|
|
|
|
- var elements uint64 = 1
|
|
|
|
- for i := 0; uint32(i) < dimensions; i++ {
|
|
|
|
- elements *= llm.readU64(r)
|
|
|
|
|
|
+ shape := [4]uint64{1, 1, 1, 1}
|
|
|
|
+ for i := 0; uint32(i) < dims; i++ {
|
|
|
|
+ shape[i] = llm.readU64(rso)
|
|
}
|
|
}
|
|
|
|
|
|
- llm.readU32(r) // type
|
|
|
|
- llm.readU64(r) // offset
|
|
|
|
|
|
+ kind := llm.readU32(rso)
|
|
|
|
+ offset := llm.readU64(rso)
|
|
|
|
+
|
|
|
|
+ var blockSize uint64
|
|
|
|
+ switch {
|
|
|
|
+ case kind < 2:
|
|
|
|
+ blockSize = 1
|
|
|
|
+ case kind < 10:
|
|
|
|
+ blockSize = 32
|
|
|
|
+ default:
|
|
|
|
+ blockSize = 256
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ var typeSize uint64
|
|
|
|
+ switch kind {
|
|
|
|
+ case 0: // FP32
|
|
|
|
+ typeSize = 4
|
|
|
|
+ case 1: // FP16
|
|
|
|
+ typeSize = 2
|
|
|
|
+ case 2: // Q4_0
|
|
|
|
+ typeSize = 2 + blockSize/2
|
|
|
|
+ case 3: // Q4_1
|
|
|
|
+ typeSize = 2 + 2 + blockSize/2
|
|
|
|
+ case 6: // Q5_0
|
|
|
|
+ typeSize = 2 + 4 + blockSize/2
|
|
|
|
+ case 7: // Q5_1
|
|
|
|
+ typeSize = 2 + 2 + 4 + blockSize/2
|
|
|
|
+ case 8: // Q8_0
|
|
|
|
+ typeSize = 2 + blockSize
|
|
|
|
+ case 9: // Q8_1
|
|
|
|
+ typeSize = 4 + 4 + blockSize
|
|
|
|
+ case 10: // Q2_K
|
|
|
|
+ typeSize = blockSize/16 + blockSize/4 + 2 + 2
|
|
|
|
+ case 11: // Q3_K
|
|
|
|
+ typeSize = blockSize/8 + blockSize/4 + 12 + 2
|
|
|
|
+ case 12: // Q4_K
|
|
|
|
+ typeSize = 2 + 2 + 12 + blockSize/2
|
|
|
|
+ case 13: // Q5_K
|
|
|
|
+ typeSize = 2 + 2 + 12 + blockSize/8 + blockSize/2
|
|
|
|
+ case 14: // Q6_K
|
|
|
|
+ typeSize = blockSize/2 + blockSize/4 + blockSize/16 + 2
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ parameters := shape[0] * shape[1] * shape[2] * shape[3]
|
|
|
|
+ size := parameters * typeSize / blockSize
|
|
|
|
+
|
|
|
|
+ llm.tensors = append(llm.tensors, tensor{
|
|
|
|
+ name: name,
|
|
|
|
+ kind: kind,
|
|
|
|
+ offset: offset,
|
|
|
|
+ size: size,
|
|
|
|
+ shape: shape,
|
|
|
|
+ })
|
|
|
|
+
|
|
|
|
+ llm.parameters += parameters
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ alignment, ok := llm.kv["general.alignment"].(uint32)
|
|
|
|
+ if !ok {
|
|
|
|
+ alignment = 32
|
|
|
|
+ }
|
|
|
|
|
|
- llm.parameters += elements
|
|
|
|
|
|
+ rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
|
|
|
|
+ for _, tensor := range llm.tensors {
|
|
|
|
+ padded := (int64(tensor.size) + int64(alignment) - 1) & ^(int64(alignment) - 1)
|
|
|
|
+ rso.Seek(padded, io.SeekCurrent)
|
|
}
|
|
}
|
|
|
|
|
|
return nil
|
|
return nil
|