123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975 |
- package llm
- import (
- "bytes"
- "encoding/binary"
- "errors"
- "fmt"
- "io"
- "log/slog"
- "os"
- "regexp"
- "github.com/d4l3k/go-bfloat16"
- "github.com/pdevine/tensor"
- "github.com/pdevine/tensor/native"
- "github.com/x448/float16"
- "github.com/ollama/ollama/format"
- )
- type ContainerGGUF struct {
- ByteOrder binary.ByteOrder
- Version uint32
- V1 struct {
- NumTensor uint32
- NumKV uint32
- }
- V2 struct {
- NumTensor uint64
- NumKV uint64
- }
- V3 struct {
- NumTensor uint64
- NumKV uint64
- }
- }
- func (c *ContainerGGUF) Name() string {
- return "gguf"
- }
- func (c *ContainerGGUF) Decode(rs io.ReadSeeker) (model, error) {
- binary.Read(rs, c.ByteOrder, &c.Version)
- switch c.Version {
- case 1:
- binary.Read(rs, c.ByteOrder, &c.V1)
- default:
- binary.Read(rs, c.ByteOrder, &c.V2)
- }
- model := NewGGUFModel(c)
- if err := model.Decode(rs); err != nil {
- return nil, err
- }
- return model, nil
- }
- const (
- _ uint32 = iota
- GGUFTokenNormal
- GGUFTokenUnknown
- GGUFTokenControl
- GGUFTokenUserDefined
- GGUFTokenUnused
- GGUFTokenByte
- )
- const (
- GGUFTypeUint8 uint32 = iota
- GGUFTypeInt8
- GGUFTypeUint16
- GGUFTypeInt16
- GGUFTypeUint32
- GGUFTypeInt32
- GGUFTypeFloat32
- GGUFTypeBool
- GGUFTypeString
- GGUFTypeArray
- GGUFTypeUint64
- GGUFTypeInt64
- GGUFTypeFloat64
- )
- type KV map[string]any
- type Tensor struct {
- Name string
- Kind uint32
- Offset uint64
- // shape is the number of elements in each dimension
- Shape []uint64
- FileName string
- OffsetPadding uint64
- FileOffsets []uint64
- }
- func (t Tensor) BlockSize() uint64 {
- switch {
- case t.Kind < 2:
- return 1
- case t.Kind < 10:
- return 32
- default:
- return 256
- }
- }
- func (t Tensor) TypeSize() uint64 {
- blockSize := t.BlockSize()
- switch t.Kind {
- case 0: // FP32
- return 4
- case 1: // FP16
- return 2
- case 2: // Q4_0
- return 2 + blockSize/2
- case 3: // Q4_1
- return 2 + 2 + blockSize/2
- case 6: // Q5_0
- return 2 + 4 + blockSize/2
- case 7: // Q5_1
- return 2 + 2 + 4 + blockSize/2
- case 8: // Q8_0
- return 2 + blockSize
- case 9: // Q8_1
- return 4 + 4 + blockSize
- case 10: // Q2_K
- return blockSize/16 + blockSize/4 + 2 + 2
- case 11: // Q3_K
- return blockSize/8 + blockSize/4 + 12 + 2
- case 12: // Q4_K
- return 2 + 2 + 12 + blockSize/2
- case 13: // Q5_K
- return 2 + 2 + 12 + blockSize/8 + blockSize/2
- case 14: // Q6_K
- return blockSize/2 + blockSize/4 + blockSize/16 + 2
- case 15: // Q8_K
- return 2 + blockSize + 2*blockSize/16
- case 16: // IQ2_XXS
- return 2 + 2*blockSize/8
- case 17: // IQ2_XS
- return 2 + 2*blockSize/8 + blockSize/32
- case 18: // IQ3_XXS
- return 2 + 3*blockSize/8
- default:
- return 0
- }
- }
- func (t Tensor) Parameters() uint64 {
- var count uint64 = 1
- for _, n := range t.Shape {
- count *= n
- }
- return count
- }
- func (t Tensor) Size() uint64 {
- return t.Parameters() * t.TypeSize() / t.BlockSize()
- }
- func (t Tensor) Repack(data []uint16, heads int) ([]uint16, error) {
- n := tensor.New(tensor.WithShape(int(t.Shape[0]), int(t.Shape[1])), tensor.WithBacking(data))
- origShape := n.Shape().Clone()
- // reshape the tensor and swap axes 1 and 2 to unpack the layer for gguf
- if err := n.Reshape(heads, 2, origShape[0]/heads/2, origShape[1]); err != nil {
- return []uint16{}, err
- }
- if err := n.T(0, 2, 1, 3); err != nil {
- return []uint16{}, err
- }
- if err := n.Reshape(origShape...); err != nil {
- return []uint16{}, err
- }
- if err := n.Transpose(); err != nil {
- return []uint16{}, err
- }
- newN, err := native.SelectU16(n, 1)
- if err != nil {
- return []uint16{}, err
- }
- var fullTensor []uint16
- for _, v := range newN {
- fullTensor = append(fullTensor, v...)
- }
- return fullTensor, nil
- }
- type GGUFModel struct {
- *ContainerGGUF
- KV
- Tensors []Tensor
- parameters uint64
- }
- func NewGGUFModel(container *ContainerGGUF) *GGUFModel {
- return &GGUFModel{
- ContainerGGUF: container,
- KV: make(KV),
- }
- }
- func (llm *GGUFModel) NumTensor() uint64 {
- if llm.Version == 1 {
- return uint64(llm.V1.NumTensor)
- }
- return llm.V2.NumTensor
- }
- func (llm *GGUFModel) NumKV() uint64 {
- if llm.Version == 1 {
- return uint64(llm.V1.NumKV)
- }
- return llm.V2.NumKV
- }
- func (llm *GGUFModel) ModelFamily() string {
- if t, ok := llm.KV["general.architecture"].(string); ok {
- return t
- }
- return "unknown"
- }
- func (llm *GGUFModel) ModelType() string {
- if llm.parameters > 0 {
- return format.HumanNumber(llm.parameters)
- }
- return "unknown"
- }
- func (llm *GGUFModel) FileType() string {
- if t, ok := llm.KV["general.file_type"].(uint32); ok {
- return fileType(t)
- }
- return "unknown"
- }
- func (llm *GGUFModel) Encode(f *os.File) error {
- // this mimics the order of the llama.cpp convert script
- kOrder := []string{
- "general.architecture",
- "general.name",
- "llama.context_length",
- "llama.embedding_length",
- "llama.block_count",
- "llama.feed_forward_length",
- "llama.rope.dimension_count",
- "llama.attention.head_count",
- "llama.attention.head_count_kv",
- "llama.attention.layer_norm_rms_epsilon",
- "llama.rope.freq_base",
- "general.file_type",
- "tokenizer.ggml.model",
- "tokenizer.ggml.tokens",
- "tokenizer.ggml.scores",
- "tokenizer.ggml.token_type",
- "tokenizer.ggml.bos_token_id",
- "tokenizer.ggml.eos_token_id",
- "tokenizer.ggml.unknown_token_id",
- "tokenizer.ggml.add_bos_token",
- "tokenizer.ggml.add_eos_token",
- "tokenizer.chat_template",
- }
- if err := binary.Write(f, llm.ByteOrder, []byte("GGUF")); err != nil {
- return err
- }
- if err := binary.Write(f, llm.ByteOrder, uint32(3)); err != nil {
- return err
- }
- if err := binary.Write(f, llm.ByteOrder, uint64(llm.V3.NumTensor)); err != nil {
- return err
- }
- if err := binary.Write(f, llm.ByteOrder, uint64(llm.V3.NumKV)); err != nil {
- return err
- }
- for _, k := range kOrder {
- val, ok := llm.KV[k]
- if !ok {
- continue
- }
- if err := binary.Write(f, llm.ByteOrder, uint64(len(k))); err != nil {
- return err
- }
- if err := binary.Write(f, llm.ByteOrder, []byte(k)); err != nil {
- return err
- }
- switch v := val.(type) {
- case uint32:
- if err := binary.Write(f, llm.ByteOrder, GGUFTypeUint32); err != nil {
- return err
- }
- if err := llm.writeUint32(f, v); err != nil {
- return err
- }
- case float32:
- if err := binary.Write(f, llm.ByteOrder, GGUFTypeFloat32); err != nil {
- return err
- }
- if err := llm.writeF32(f, v); err != nil {
- return err
- }
- case bool:
- if err := binary.Write(f, llm.ByteOrder, GGUFTypeBool); err != nil {
- return err
- }
- if err := llm.writeBool(f, v); err != nil {
- return err
- }
- case string:
- if err := binary.Write(f, llm.ByteOrder, GGUFTypeString); err != nil {
- return err
- }
- if err := llm.writeString(f, v); err != nil {
- return err
- }
- case []int32:
- if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
- return err
- }
- if err := binary.Write(f, llm.ByteOrder, GGUFTypeInt32); err != nil {
- return err
- }
- if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
- return err
- }
- for _, i := range v {
- if err := llm.writeInt32(f, i); err != nil {
- return err
- }
- }
- case []uint32:
- if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
- return err
- }
- if err := binary.Write(f, llm.ByteOrder, GGUFTypeUint32); err != nil {
- return err
- }
- if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
- return err
- }
- for _, i := range v {
- if err := llm.writeUint32(f, i); err != nil {
- return err
- }
- }
- case []float32:
- if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
- return err
- }
- if err := binary.Write(f, llm.ByteOrder, GGUFTypeFloat32); err != nil {
- return err
- }
- if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
- return err
- }
- for _, fl := range v {
- if err := llm.writeF32(f, fl); err != nil {
- return err
- }
- }
- case []string:
- if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
- return err
- }
- if err := binary.Write(f, llm.ByteOrder, GGUFTypeString); err != nil {
- return err
- }
- if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
- return err
- }
- for _, s := range v {
- if err := llm.writeString(f, s); err != nil {
- return err
- }
- }
- }
- }
- // write layer metadata
- for _, t := range llm.Tensors {
- if err := llm.writeString(f, t.Name); err != nil {
- return err
- }
- // the dimensions of the tensor
- dims := 1
- if t.Shape[1] > 0 {
- dims = 2
- }
- if err := binary.Write(f, llm.ByteOrder, uint32(dims)); err != nil {
- return err
- }
- for i := 0; i < dims; i++ {
- if err := binary.Write(f, llm.ByteOrder, uint64(t.Shape[dims-1-i])); err != nil {
- return err
- }
- }
- if err := binary.Write(f, llm.ByteOrder, uint32(t.Kind)); err != nil {
- return err
- }
- if err := binary.Write(f, llm.ByteOrder, uint64(t.Offset)); err != nil {
- return err
- }
- }
- offset, terr := f.Seek(0, io.SeekCurrent)
- if terr != nil {
- return terr
- }
- slog.Debug(fmt.Sprintf("tensors offset = %x", offset))
- if err := llm.writePadding(f, 32); err != nil {
- return err
- }
- var dataFile *os.File
- var currentFile string
- var err error
- for _, t := range llm.Tensors {
- if currentFile != t.FileName {
- if f != nil {
- dataFile.Close()
- }
- currentFile = t.FileName
- dataFile, err = os.Open(t.FileName)
- if err != nil {
- fmt.Println(err)
- return err
- }
- }
- dataFile.Seek(int64(t.OffsetPadding+t.FileOffsets[0]), 0)
- pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
- re, err := regexp.Compile(pattern)
- if err != nil {
- return err
- }
- matches := re.FindAllStringSubmatch(t.Name, -1)
- if len(matches) > 0 {
- layerSize := t.FileOffsets[1] - t.FileOffsets[0]
- var err error
- tData := make([]uint16, layerSize/2)
- if err = binary.Read(dataFile, llm.ByteOrder, tData); err != nil {
- return err
- }
- layerType := matches[0][re.SubexpIndex("layer")]
- var heads uint32
- switch layerType {
- case "q":
- heads = llm.KV["llama.attention.head_count"].(uint32)
- case "k":
- heads = llm.KV["llama.attention.head_count_kv"].(uint32)
- if heads == 0 {
- heads = llm.KV["llama.attention.head_count"].(uint32)
- }
- }
- tData, err = t.Repack(tData, int(heads))
- if err != nil {
- return err
- }
- var buf []byte
- for _, n := range tData {
- buf = binary.LittleEndian.AppendUint16(buf, n)
- }
- tempBuf := make([]uint16, len(tData))
- tDataF32 := bfloat16.DecodeFloat32(buf)
- for cnt, v := range tDataF32 {
- tDataF16 := float16.Fromfloat32(v)
- tempBuf[cnt] = uint16(tDataF16)
- }
- if err = binary.Write(f, llm.ByteOrder, tempBuf); err != nil {
- return err
- }
- if err := llm.writePadding(f, 32); err != nil {
- return err
- }
- continue
- }
- remaining := t.FileOffsets[1] - t.FileOffsets[0]
- bufSize := uint64(10240)
- var finished bool
- for {
- data := make([]byte, min(bufSize, remaining))
- b, err := io.ReadFull(dataFile, data)
- remaining -= uint64(b)
- if errors.Is(err, io.EOF) || remaining <= 0 {
- finished = true
- } else if err != nil {
- return err
- }
- // convert bfloat16 -> ieee float32
- tDataF32 := bfloat16.DecodeFloat32(data)
- switch t.Kind {
- case 0:
- if err := binary.Write(f, llm.ByteOrder, tDataF32); err != nil {
- return err
- }
- case 1:
- // convert float32 -> float16
- tempBuf := make([]uint16, len(data)/2)
- for cnt, v := range tDataF32 {
- tDataF16 := float16.Fromfloat32(v)
- tempBuf[cnt] = uint16(tDataF16)
- }
- if err := binary.Write(f, llm.ByteOrder, tempBuf); err != nil {
- return err
- }
- }
- if finished {
- break
- }
- }
- if err := llm.writePadding(f, 32); err != nil {
- return err
- }
- }
- f.Close()
- return nil
- }
- func (llm *GGUFModel) writePadding(f *os.File, align int64) error {
- // gguf file padding is defined in https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#file-structure
- offset, err := f.Seek(0, io.SeekCurrent)
- if err != nil {
- return err
- }
- padding := ((offset + align - 1) / align) * align
- buf := make([]byte, padding-offset)
- if err := binary.Write(f, llm.ByteOrder, buf); err != nil {
- return err
- }
- return nil
- }
- func (llm *GGUFModel) writeInt32(f *os.File, v int32) error {
- if err := binary.Write(f, llm.ByteOrder, v); err != nil {
- return err
- }
- return nil
- }
- func (llm *GGUFModel) writeUint32(f *os.File, v uint32) error {
- if err := binary.Write(f, llm.ByteOrder, v); err != nil {
- return err
- }
- return nil
- }
- func (llm *GGUFModel) writeF32(f *os.File, v float32) error {
- if err := binary.Write(f, llm.ByteOrder, v); err != nil {
- return err
- }
- return nil
- }
- func (llm *GGUFModel) writeBool(f *os.File, b bool) error {
- if err := binary.Write(f, llm.ByteOrder, b); err != nil {
- return err
- }
- return nil
- }
- func (llm *GGUFModel) writeString(f *os.File, s string) error {
- if err := binary.Write(f, llm.ByteOrder, uint64(len(s))); err != nil {
- return err
- }
- if err := binary.Write(f, llm.ByteOrder, []byte(s)); err != nil {
- return err
- }
- return nil
- }
- func (llm *GGUFModel) Decode(rs io.ReadSeeker) error {
- // decode key-values
- for i := 0; uint64(i) < llm.NumKV(); i++ {
- k, err := llm.readString(rs)
- if err != nil {
- return err
- }
- vtype := llm.readU32(rs)
- var v any
- switch vtype {
- case GGUFTypeUint8:
- v = llm.readU8(rs)
- case GGUFTypeInt8:
- v = llm.readI8(rs)
- case GGUFTypeUint16:
- v = llm.readU16(rs)
- case GGUFTypeInt16:
- v = llm.readI16(rs)
- case GGUFTypeUint32:
- v = llm.readU32(rs)
- case GGUFTypeInt32:
- v = llm.readI32(rs)
- case GGUFTypeUint64:
- v = llm.readU64(rs)
- case GGUFTypeInt64:
- v = llm.readI64(rs)
- case GGUFTypeFloat32:
- v = llm.readF32(rs)
- case GGUFTypeFloat64:
- v = llm.readF64(rs)
- case GGUFTypeBool:
- v = llm.readBool(rs)
- case GGUFTypeString:
- s, err := llm.readString(rs)
- if err != nil {
- return err
- }
- v = s
- case GGUFTypeArray:
- a, err := llm.readArray(rs)
- if err != nil {
- return err
- }
- v = a
- default:
- return fmt.Errorf("invalid type: %d", vtype)
- }
- llm.KV[k] = v
- }
- // decode tensors
- for i := 0; uint64(i) < llm.NumTensor(); i++ {
- name, err := llm.readString(rs)
- if err != nil {
- return err
- }
- // dims is the number of dimensions in the tensor
- dims := llm.readU32(rs)
- shape := [4]uint64{1, 1, 1, 1}
- for i := 0; uint32(i) < dims; i++ {
- shape[i] = llm.readU64(rs)
- }
- tensor := Tensor{
- Name: name,
- Kind: llm.readU32(rs),
- Offset: llm.readU64(rs),
- Shape: shape[:],
- }
- llm.Tensors = append(llm.Tensors, tensor)
- llm.parameters += tensor.Parameters()
- }
- alignment, ok := llm.KV["general.alignment"].(uint32)
- if !ok {
- alignment = 32
- }
- offset, err := rs.Seek(0, io.SeekCurrent)
- if err != nil {
- return err
- }
- if _, err := rs.Seek(int64(alignment)-offset%int64(alignment), io.SeekCurrent); err != nil {
- return err
- }
- for _, tensor := range llm.Tensors {
- padded := (int64(tensor.Size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
- if _, err := rs.Seek(padded, io.SeekCurrent); err != nil {
- return err
- }
- }
- return nil
- }
- func (llm *GGUFModel) NumLayers() uint32 {
- value, exists := llm.KV[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
- if !exists {
- return 0
- }
- return value.(uint32)
- }
- func (llm *GGUFModel) NumHead() uint32 {
- value, exists := llm.KV[fmt.Sprintf("%s.attention.head_count", llm.ModelFamily())]
- if !exists {
- return 0
- }
- return value.(uint32)
- }
- func (llm *GGUFModel) NumEmbed() uint32 {
- value, exists := llm.KV[fmt.Sprintf("%s.embedding_length", llm.ModelFamily())]
- if !exists {
- return 0
- }
- return value.(uint32)
- }
- func (llm *GGUFModel) NumHeadKv() uint32 {
- value, exists := llm.KV[fmt.Sprintf("%s.attention.head_count_kv", llm.ModelFamily())]
- if !exists {
- return 0
- }
- return value.(uint32)
- }
- func (llm *GGUFModel) NumCtx() uint32 {
- value, exists := llm.KV[fmt.Sprintf("%s.context_length", llm.ModelFamily())]
- if !exists {
- return 0
- }
- return value.(uint32)
- }
- func (llm *GGUFModel) NumGQA() uint32 {
- numHeadKv := llm.NumHeadKv()
- if numHeadKv == 0 {
- return 0
- }
- return llm.NumHead() / numHeadKv
- }
- func (llm GGUFModel) readU8(r io.Reader) uint8 {
- var u8 uint8
- binary.Read(r, llm.ByteOrder, &u8)
- return u8
- }
- func (llm GGUFModel) readI8(r io.Reader) int8 {
- var i8 int8
- binary.Read(r, llm.ByteOrder, &i8)
- return i8
- }
- func (llm GGUFModel) readU16(r io.Reader) uint16 {
- var u16 uint16
- binary.Read(r, llm.ByteOrder, &u16)
- return u16
- }
- func (llm GGUFModel) readI16(r io.Reader) int16 {
- var i16 int16
- binary.Read(r, llm.ByteOrder, &i16)
- return i16
- }
- func (llm GGUFModel) readU32(r io.Reader) uint32 {
- var u32 uint32
- binary.Read(r, llm.ByteOrder, &u32)
- return u32
- }
- func (llm GGUFModel) readI32(r io.Reader) int32 {
- var i32 int32
- binary.Read(r, llm.ByteOrder, &i32)
- return i32
- }
- func (llm GGUFModel) readU64(r io.Reader) uint64 {
- var u64 uint64
- binary.Read(r, llm.ByteOrder, &u64)
- return u64
- }
- func (llm GGUFModel) readI64(r io.Reader) int64 {
- var i64 int64
- binary.Read(r, llm.ByteOrder, &i64)
- return i64
- }
- func (llm GGUFModel) readF32(r io.Reader) float32 {
- var f32 float32
- binary.Read(r, llm.ByteOrder, &f32)
- return f32
- }
- func (llm GGUFModel) readF64(r io.Reader) float64 {
- var f64 float64
- binary.Read(r, llm.ByteOrder, &f64)
- return f64
- }
- func (llm GGUFModel) readBool(r io.Reader) bool {
- var b bool
- binary.Read(r, llm.ByteOrder, &b)
- return b
- }
- func (llm GGUFModel) readStringV1(r io.Reader) (string, error) {
- var nameLength uint32
- binary.Read(r, llm.ByteOrder, &nameLength)
- var b bytes.Buffer
- if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
- return "", err
- }
- // gguf v1 strings are null-terminated
- b.Truncate(b.Len() - 1)
- return b.String(), nil
- }
- func (llm GGUFModel) readString(r io.Reader) (string, error) {
- if llm.Version == 1 {
- return llm.readStringV1(r)
- }
- var nameLength uint64
- binary.Read(r, llm.ByteOrder, &nameLength)
- var b bytes.Buffer
- if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
- return "", err
- }
- return b.String(), nil
- }
- func (llm *GGUFModel) readArrayV1(r io.Reader) (arr []any, err error) {
- atype := llm.readU32(r)
- n := llm.readU32(r)
- for i := 0; uint32(i) < n; i++ {
- switch atype {
- case GGUFTypeUint8:
- arr = append(arr, llm.readU8(r))
- case GGUFTypeInt8:
- arr = append(arr, llm.readI8(r))
- case GGUFTypeUint16:
- arr = append(arr, llm.readU16(r))
- case GGUFTypeInt16:
- arr = append(arr, llm.readI16(r))
- case GGUFTypeUint32:
- arr = append(arr, llm.readU32(r))
- case GGUFTypeInt32:
- arr = append(arr, llm.readI32(r))
- case GGUFTypeFloat32:
- arr = append(arr, llm.readF32(r))
- case GGUFTypeBool:
- arr = append(arr, llm.readBool(r))
- case GGUFTypeString:
- s, err := llm.readStringV1(r)
- if err != nil {
- return nil, err
- }
- arr = append(arr, s)
- default:
- return nil, fmt.Errorf("invalid array type: %d", atype)
- }
- }
- return
- }
- func (llm *GGUFModel) readArray(r io.Reader) (arr []any, err error) {
- if llm.Version == 1 {
- return llm.readArrayV1(r)
- }
- atype := llm.readU32(r)
- n := llm.readU64(r)
- for i := 0; uint64(i) < n; i++ {
- switch atype {
- case GGUFTypeUint8:
- arr = append(arr, llm.readU8(r))
- case GGUFTypeInt8:
- arr = append(arr, llm.readI8(r))
- case GGUFTypeUint16:
- arr = append(arr, llm.readU16(r))
- case GGUFTypeInt16:
- arr = append(arr, llm.readI16(r))
- case GGUFTypeUint32:
- arr = append(arr, llm.readU32(r))
- case GGUFTypeInt32:
- arr = append(arr, llm.readI32(r))
- case GGUFTypeUint64:
- arr = append(arr, llm.readU64(r))
- case GGUFTypeInt64:
- arr = append(arr, llm.readI64(r))
- case GGUFTypeFloat32:
- arr = append(arr, llm.readF32(r))
- case GGUFTypeFloat64:
- arr = append(arr, llm.readF64(r))
- case GGUFTypeBool:
- arr = append(arr, llm.readBool(r))
- case GGUFTypeString:
- s, err := llm.readString(r)
- if err != nil {
- return nil, err
- }
- arr = append(arr, s)
- default:
- return nil, fmt.Errorf("invalid array type: %d", atype)
- }
- }
- return
- }
|