Pārlūkot izejas kodu

add gguf file types (#2532)

Michael Yang 1 gadu atpakaļ
vecāks
revīzija
949d7b1c48
2 mainītis faili ar 23 papildinājumiem un 0 dzēšanām
  1. 15 0
      llm/ggml.go
  2. 8 0
      llm/gguf.go

+ 15 - 0
llm/ggml.go

@@ -31,6 +31,11 @@ const (
 	fileTypeQ5_K_S
 	fileTypeQ5_K_S
 	fileTypeQ5_K_M
 	fileTypeQ5_K_M
 	fileTypeQ6_K
 	fileTypeQ6_K
+	fileTypeIQ2_XXS
+	fileTypeIQ2_XS
+	fileTypeQ2_K_S
+	fileTypeQ3_K_XS
+	fileTypeIQ3_XXS
 )
 )
 
 
 func fileType(fileType uint32) string {
 func fileType(fileType uint32) string {
@@ -69,6 +74,16 @@ func fileType(fileType uint32) string {
 		return "Q5_K_M"
 		return "Q5_K_M"
 	case fileTypeQ6_K:
 	case fileTypeQ6_K:
 		return "Q6_K"
 		return "Q6_K"
+	case fileTypeIQ2_XXS:
+		return "IQ2_XXS"
+	case fileTypeIQ2_XS:
+		return "IQ2_XS"
+	case fileTypeQ2_K_S:
+		return "Q2_K_S"
+	case fileTypeQ3_K_XS:
+		return "Q3_K_XS"
+	case fileTypeIQ3_XXS:
+		return "IQ3_XXS"
 	default:
 	default:
 		return "unknown"
 		return "unknown"
 	}
 	}

+ 8 - 0
llm/gguf.go

@@ -115,6 +115,14 @@ func (t tensor) typeSize() uint64 {
 		return 2 + 2 + 12 + blockSize/8 + blockSize/2
 		return 2 + 2 + 12 + blockSize/8 + blockSize/2
 	case 14: // Q6_K
 	case 14: // Q6_K
 		return blockSize/2 + blockSize/4 + blockSize/16 + 2
 		return blockSize/2 + blockSize/4 + blockSize/16 + 2
+	case 15: // Q8_K
+		return 2 + blockSize + 2*blockSize/16
+	case 16: // IQ2_XXS
+		return 2 + 2*blockSize/8
+	case 17: // IQ2_XS
+		return 2 + 2*blockSize/8 + blockSize/32
+	case 18: // IQ3_XXS
+		return 2 + 3*blockSize/8
 	default:
 	default:
 		return 0
 		return 0
 	}
 	}