Преглед изворни кода

ggufv3

ggufv3 adds support for big endianness, mainly for s390x architecture.
while that's not currently supported for ollama, the change is simple.

loosen version check to be more forward compatible. unless specified,
gguf versions other v1 will be decoded into v2.
Michael Yang пре 1 година
родитељ
комит
125d0a013a
2 измењених фајлова са 36 додато и 34 уклоњено
  1. 6 3
      llm/ggml.go
  2. 30 31
      llm/gguf.go

+ 6 - 3
llm/ggml.go

@@ -175,7 +175,8 @@ const (
 	// Magic constant for `ggla` files (LoRA adapter).
 	// Magic constant for `ggla` files (LoRA adapter).
 	FILE_MAGIC_GGLA = 0x67676C61
 	FILE_MAGIC_GGLA = 0x67676C61
 	// Magic constant for `gguf` files (versioned, gguf)
 	// Magic constant for `gguf` files (versioned, gguf)
-	FILE_MAGIC_GGUF = 0x46554747
+	FILE_MAGIC_GGUF_LE = 0x46554747
+	FILE_MAGIC_GGUF_BE = 0x47475546
 )
 )
 
 
 func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
 func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
@@ -191,8 +192,10 @@ func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
 		ggml.container = &containerGGJT{}
 		ggml.container = &containerGGJT{}
 	case FILE_MAGIC_GGLA:
 	case FILE_MAGIC_GGLA:
 		ggml.container = &containerLORA{}
 		ggml.container = &containerLORA{}
-	case FILE_MAGIC_GGUF:
-		ggml.container = &containerGGUF{}
+	case FILE_MAGIC_GGUF_LE:
+		ggml.container = &containerGGUF{bo: binary.LittleEndian}
+	case FILE_MAGIC_GGUF_BE:
+		ggml.container = &containerGGUF{bo: binary.BigEndian}
 	default:
 	default:
 		return nil, errors.New("invalid file magic")
 		return nil, errors.New("invalid file magic")
 	}
 	}

+ 30 - 31
llm/gguf.go

@@ -3,12 +3,13 @@ package llm
 import (
 import (
 	"bytes"
 	"bytes"
 	"encoding/binary"
 	"encoding/binary"
-	"errors"
 	"fmt"
 	"fmt"
 	"io"
 	"io"
 )
 )
 
 
 type containerGGUF struct {
 type containerGGUF struct {
+	bo binary.ByteOrder
+
 	Version uint32
 	Version uint32
 
 
 	V1 struct {
 	V1 struct {
@@ -27,15 +28,13 @@ func (c *containerGGUF) Name() string {
 }
 }
 
 
 func (c *containerGGUF) Decode(r io.Reader) (model, error) {
 func (c *containerGGUF) Decode(r io.Reader) (model, error) {
-	binary.Read(r, binary.LittleEndian, &c.Version)
+	binary.Read(r, c.bo, &c.Version)
 
 
 	switch c.Version {
 	switch c.Version {
 	case 1:
 	case 1:
-		binary.Read(r, binary.LittleEndian, &c.V1)
-	case 2:
-		binary.Read(r, binary.LittleEndian, &c.V2)
+		binary.Read(r, c.bo, &c.V1)
 	default:
 	default:
-		return nil, errors.New("invalid version")
+		binary.Read(r, c.bo, &c.V2)
 	}
 	}
 
 
 	model := newGGUFModel(c)
 	model := newGGUFModel(c)
@@ -209,75 +208,75 @@ func (llm *ggufModel) NumLayers() int64 {
 	return int64(v)
 	return int64(v)
 }
 }
 
 
-func (ggufModel) readU8(r io.Reader) uint8 {
+func (llm ggufModel) readU8(r io.Reader) uint8 {
 	var u8 uint8
 	var u8 uint8
-	binary.Read(r, binary.LittleEndian, &u8)
+	binary.Read(r, llm.bo, &u8)
 	return u8
 	return u8
 }
 }
 
 
-func (ggufModel) readI8(r io.Reader) int8 {
+func (llm ggufModel) readI8(r io.Reader) int8 {
 	var i8 int8
 	var i8 int8
-	binary.Read(r, binary.LittleEndian, &i8)
+	binary.Read(r, llm.bo, &i8)
 	return i8
 	return i8
 }
 }
 
 
-func (ggufModel) readU16(r io.Reader) uint16 {
+func (llm ggufModel) readU16(r io.Reader) uint16 {
 	var u16 uint16
 	var u16 uint16
-	binary.Read(r, binary.LittleEndian, &u16)
+	binary.Read(r, llm.bo, &u16)
 	return u16
 	return u16
 }
 }
 
 
-func (ggufModel) readI16(r io.Reader) int16 {
+func (llm ggufModel) readI16(r io.Reader) int16 {
 	var i16 int16
 	var i16 int16
-	binary.Read(r, binary.LittleEndian, &i16)
+	binary.Read(r, llm.bo, &i16)
 	return i16
 	return i16
 }
 }
 
 
-func (ggufModel) readU32(r io.Reader) uint32 {
+func (llm ggufModel) readU32(r io.Reader) uint32 {
 	var u32 uint32
 	var u32 uint32
-	binary.Read(r, binary.LittleEndian, &u32)
+	binary.Read(r, llm.bo, &u32)
 	return u32
 	return u32
 }
 }
 
 
-func (ggufModel) readI32(r io.Reader) int32 {
+func (llm ggufModel) readI32(r io.Reader) int32 {
 	var i32 int32
 	var i32 int32
-	binary.Read(r, binary.LittleEndian, &i32)
+	binary.Read(r, llm.bo, &i32)
 	return i32
 	return i32
 }
 }
 
 
-func (ggufModel) readU64(r io.Reader) uint64 {
+func (llm ggufModel) readU64(r io.Reader) uint64 {
 	var u64 uint64
 	var u64 uint64
-	binary.Read(r, binary.LittleEndian, &u64)
+	binary.Read(r, llm.bo, &u64)
 	return u64
 	return u64
 }
 }
 
 
-func (ggufModel) readI64(r io.Reader) int64 {
+func (llm ggufModel) readI64(r io.Reader) int64 {
 	var i64 int64
 	var i64 int64
-	binary.Read(r, binary.LittleEndian, &i64)
+	binary.Read(r, llm.bo, &i64)
 	return i64
 	return i64
 }
 }
 
 
-func (ggufModel) readF32(r io.Reader) float32 {
+func (llm ggufModel) readF32(r io.Reader) float32 {
 	var f32 float32
 	var f32 float32
-	binary.Read(r, binary.LittleEndian, &f32)
+	binary.Read(r, llm.bo, &f32)
 	return f32
 	return f32
 }
 }
 
 
-func (ggufModel) readF64(r io.Reader) float64 {
+func (llm ggufModel) readF64(r io.Reader) float64 {
 	var f64 float64
 	var f64 float64
-	binary.Read(r, binary.LittleEndian, &f64)
+	binary.Read(r, llm.bo, &f64)
 	return f64
 	return f64
 }
 }
 
 
-func (ggufModel) readBool(r io.Reader) bool {
+func (llm ggufModel) readBool(r io.Reader) bool {
 	var b bool
 	var b bool
-	binary.Read(r, binary.LittleEndian, &b)
+	binary.Read(r, llm.bo, &b)
 	return b
 	return b
 }
 }
 
 
-func (ggufModel) readStringV1(r io.Reader) (string, error) {
+func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
 	var nameLength uint32
 	var nameLength uint32
-	binary.Read(r, binary.LittleEndian, &nameLength)
+	binary.Read(r, llm.bo, &nameLength)
 
 
 	var b bytes.Buffer
 	var b bytes.Buffer
 	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
 	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
@@ -292,7 +291,7 @@ func (ggufModel) readStringV1(r io.Reader) (string, error) {
 
 
 func (llm ggufModel) readString(r io.Reader) (string, error) {
 func (llm ggufModel) readString(r io.Reader) (string, error) {
 	var nameLength uint64
 	var nameLength uint64
-	binary.Read(r, binary.LittleEndian, &nameLength)
+	binary.Read(r, llm.bo, &nameLength)
 
 
 	var b bytes.Buffer
 	var b bytes.Buffer
 	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
 	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {