Prechádzať zdrojové kódy

recent llama.cpp update added kernels for fp32, q5_0, and q5_1

Michael Yang 1 rok pred
rodič
commit
19b7a4d715
1 zmenil súbory, kde vykonal 1 pridanie a 8 odobranie
  1. 1 8
      llm/llm.go

+ 1 - 8
llm/llm.go

@@ -41,20 +41,13 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
 
 	if runtime.GOOS == "darwin" {
 		switch ggml.FileType() {
-		case "Q8_0":
+		case "F32", "Q5_0", "Q5_1", "Q8_0":
 			if ggml.Name() != "gguf" && opts.NumGPU != 0 {
 				// GGML Q8_0 do not support Metal API and will
 				// cause the runner to segmentation fault so disable GPU
 				log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
 				opts.NumGPU = 0
 			}
-		case "F32", "Q5_0", "Q5_1":
-			if opts.NumGPU != 0 {
-				// F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
-				// cause the runner to segmentation fault so disable GPU
-				log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
-				opts.NumGPU = 0
-			}
 		}
 
 		var requiredMemory int64