소스 검색

ggufv3

ggufv3 adds support for big endianness, mainly for s390x architecture.
while that's not currently supported for ollama, the change is simple.

loosen version check to be more forward compatible. unless specified,
gguf versions other v1 will be decoded into v2.
Michael Yang 1 년 전
부모
커밋
125d0a013a
2개의 변경된 파일36개의 추가작업 그리고 34개의 파일을 삭제
  1. 6 3
      llm/ggml.go
  2. 30 31
      llm/gguf.go

+ 6 - 3
llm/ggml.go

@@ -175,7 +175,8 @@ const (
 	// Magic constant for `ggla` files (LoRA adapter).
 	FILE_MAGIC_GGLA = 0x67676C61
 	// Magic constant for `gguf` files (versioned, gguf)
-	FILE_MAGIC_GGUF = 0x46554747
+	FILE_MAGIC_GGUF_LE = 0x46554747
+	FILE_MAGIC_GGUF_BE = 0x47475546
 )
 
 func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
@@ -191,8 +192,10 @@ func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
 		ggml.container = &containerGGJT{}
 	case FILE_MAGIC_GGLA:
 		ggml.container = &containerLORA{}
-	case FILE_MAGIC_GGUF:
-		ggml.container = &containerGGUF{}
+	case FILE_MAGIC_GGUF_LE:
+		ggml.container = &containerGGUF{bo: binary.LittleEndian}
+	case FILE_MAGIC_GGUF_BE:
+		ggml.container = &containerGGUF{bo: binary.BigEndian}
 	default:
 		return nil, errors.New("invalid file magic")
 	}

+ 30 - 31
llm/gguf.go

@@ -3,12 +3,13 @@ package llm
 import (
 	"bytes"
 	"encoding/binary"
-	"errors"
 	"fmt"
 	"io"
 )
 
 type containerGGUF struct {
+	bo binary.ByteOrder
+
 	Version uint32
 
 	V1 struct {
@@ -27,15 +28,13 @@ func (c *containerGGUF) Name() string {
 }
 
 func (c *containerGGUF) Decode(r io.Reader) (model, error) {
-	binary.Read(r, binary.LittleEndian, &c.Version)
+	binary.Read(r, c.bo, &c.Version)
 
 	switch c.Version {
 	case 1:
-		binary.Read(r, binary.LittleEndian, &c.V1)
-	case 2:
-		binary.Read(r, binary.LittleEndian, &c.V2)
+		binary.Read(r, c.bo, &c.V1)
 	default:
-		return nil, errors.New("invalid version")
+		binary.Read(r, c.bo, &c.V2)
 	}
 
 	model := newGGUFModel(c)
@@ -209,75 +208,75 @@ func (llm *ggufModel) NumLayers() int64 {
 	return int64(v)
 }
 
-func (ggufModel) readU8(r io.Reader) uint8 {
+func (llm ggufModel) readU8(r io.Reader) uint8 {
 	var u8 uint8
-	binary.Read(r, binary.LittleEndian, &u8)
+	binary.Read(r, llm.bo, &u8)
 	return u8
 }
 
-func (ggufModel) readI8(r io.Reader) int8 {
+func (llm ggufModel) readI8(r io.Reader) int8 {
 	var i8 int8
-	binary.Read(r, binary.LittleEndian, &i8)
+	binary.Read(r, llm.bo, &i8)
 	return i8
 }
 
-func (ggufModel) readU16(r io.Reader) uint16 {
+func (llm ggufModel) readU16(r io.Reader) uint16 {
 	var u16 uint16
-	binary.Read(r, binary.LittleEndian, &u16)
+	binary.Read(r, llm.bo, &u16)
 	return u16
 }
 
-func (ggufModel) readI16(r io.Reader) int16 {
+func (llm ggufModel) readI16(r io.Reader) int16 {
 	var i16 int16
-	binary.Read(r, binary.LittleEndian, &i16)
+	binary.Read(r, llm.bo, &i16)
 	return i16
 }
 
-func (ggufModel) readU32(r io.Reader) uint32 {
+func (llm ggufModel) readU32(r io.Reader) uint32 {
 	var u32 uint32
-	binary.Read(r, binary.LittleEndian, &u32)
+	binary.Read(r, llm.bo, &u32)
 	return u32
 }
 
-func (ggufModel) readI32(r io.Reader) int32 {
+func (llm ggufModel) readI32(r io.Reader) int32 {
 	var i32 int32
-	binary.Read(r, binary.LittleEndian, &i32)
+	binary.Read(r, llm.bo, &i32)
 	return i32
 }
 
-func (ggufModel) readU64(r io.Reader) uint64 {
+func (llm ggufModel) readU64(r io.Reader) uint64 {
 	var u64 uint64
-	binary.Read(r, binary.LittleEndian, &u64)
+	binary.Read(r, llm.bo, &u64)
 	return u64
 }
 
-func (ggufModel) readI64(r io.Reader) int64 {
+func (llm ggufModel) readI64(r io.Reader) int64 {
 	var i64 int64
-	binary.Read(r, binary.LittleEndian, &i64)
+	binary.Read(r, llm.bo, &i64)
 	return i64
 }
 
-func (ggufModel) readF32(r io.Reader) float32 {
+func (llm ggufModel) readF32(r io.Reader) float32 {
 	var f32 float32
-	binary.Read(r, binary.LittleEndian, &f32)
+	binary.Read(r, llm.bo, &f32)
 	return f32
 }
 
-func (ggufModel) readF64(r io.Reader) float64 {
+func (llm ggufModel) readF64(r io.Reader) float64 {
 	var f64 float64
-	binary.Read(r, binary.LittleEndian, &f64)
+	binary.Read(r, llm.bo, &f64)
 	return f64
 }
 
-func (ggufModel) readBool(r io.Reader) bool {
+func (llm ggufModel) readBool(r io.Reader) bool {
 	var b bool
-	binary.Read(r, binary.LittleEndian, &b)
+	binary.Read(r, llm.bo, &b)
 	return b
 }
 
-func (ggufModel) readStringV1(r io.Reader) (string, error) {
+func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
 	var nameLength uint32
-	binary.Read(r, binary.LittleEndian, &nameLength)
+	binary.Read(r, llm.bo, &nameLength)
 
 	var b bytes.Buffer
 	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
@@ -292,7 +291,7 @@ func (ggufModel) readStringV1(r io.Reader) (string, error) {
 
 func (llm ggufModel) readString(r io.Reader) (string, error) {
 	var nameLength uint64
-	binary.Read(r, binary.LittleEndian, &nameLength)
+	binary.Read(r, llm.bo, &nameLength)
 
 	var b bytes.Buffer
 	if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {