瀏覽代碼

Fix up the CPU fallback selection

The memory changes and multi-variant change had some merge
glitches I missed.  This fixes them so we actually get the cpu llm lib
and best variant for the given system.
Daniel Hiltgen 1 年之前
父節點
當前提交
7427fa1387
共有 4 個文件被更改,包括 30 次插入16 次删除
  1. 1 1
      gpu/gpu_darwin.go
  2. 9 6
      llm/llm.go
  3. 7 0
      llm/payload_common.go
  4. 13 9
      llm/payload_test.go

+ 1 - 1
gpu/gpu_darwin.go

@@ -34,7 +34,7 @@ func GetGPUInfo() GpuInfo {
 	mem, _ := getCPUMem()
 	if runtime.GOARCH == "amd64" {
 		return GpuInfo{
-			Library: "default",
+			Library: "cpu",
 			Variant: GetCPUVariant(),
 			memInfo: mem,
 		}

+ 9 - 6
llm/llm.go

@@ -2,6 +2,7 @@ package llm
 
 import (
 	"context"
+	"fmt"
 	"log"
 	"os"
 	"runtime"
@@ -50,7 +51,6 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
 	graph := int64(ggml.NumGQA()) * kv / 6
 
 	info := gpu.GetGPUInfo()
-	library := info.Library
 	switch runtime.GOOS {
 	case "darwin":
 		if opts.NumGPU == 0 {
@@ -59,13 +59,15 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
 
 		if size+kv+graph > vram {
 			log.Println("not enough vram available, falling back to CPU only")
+			info.Library = "cpu"
+			info.Variant = gpu.GetCPUVariant()
 			opts.NumGPU = 0
 			break
 		}
 
 		opts.NumGPU = 1
 	default:
-		if library == "cpu" || library == "default" {
+		if info.Library == "cpu" {
 			log.Println("GPU not available, falling back to CPU")
 			opts.NumGPU = 0
 			break
@@ -73,7 +75,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
 
 		// don't use GPU at all if no layers are loaded
 		if opts.NumGPU == 0 {
-			library = "cpu"
+			info.Library = "cpu"
+			info.Variant = gpu.GetCPUVariant()
 			break
 		}
 
@@ -100,7 +103,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
 		min := graph + kv*layers/maxlayers
 		if layers <= 0 || min > avg {
 			log.Printf("not enough vram available, falling back to CPU only")
-			library = "cpu"
+			info.Library = "cpu"
+			info.Variant = gpu.GetCPUVariant()
 			opts.NumGPU = 0
 			break
 		}
@@ -110,8 +114,7 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
 
 	opts.RopeFrequencyBase = 0.0
 	opts.RopeFrequencyScale = 0.0
-	gpuInfo := gpu.GetGPUInfo()
-	return newLlmServer(gpuInfo, model, adapters, projectors, opts)
+	return newLlmServer(info, model, adapters, projectors, opts)
 }
 
 // Give any native cgo implementations an opportunity to initialize

+ 7 - 0
llm/payload_common.go

@@ -28,6 +28,13 @@ func getDynLibs(gpuInfo gpu.GpuInfo) []string {
 	if gpuInfo.Library == "default" {
 		return []string{"default"}
 	}
+	// TODO - temporary until we have multiple CPU variations for Darwin
+	// Short circuit on darwin with metal only
+	if len(availableDynLibs) == 1 {
+		if _, onlyMetal := availableDynLibs["metal"]; onlyMetal {
+			return []string{availableDynLibs["metal"]}
+		}
+	}
 
 	exactMatch := ""
 	dynLibs := []string{}

+ 13 - 9
llm/payload_test.go

@@ -16,39 +16,43 @@ func TestGetDynLibs(t *testing.T) {
 	assert.Len(t, res, 1)
 	assert.Equal(t, availableDynLibs["cpu"], res[0])
 
+	variant := gpu.GetCPUVariant()
+	if variant != "" {
+		variant = "_" + variant
+	}
 	availableDynLibs = map[string]string{
-		"rocm_v5": "X_rocm_v5",
-		"rocm_v6": "X_rocm_v6",
-		"cpu":     "X_cpu",
+		"rocm_v5":       "X_rocm_v5",
+		"rocm_v6":       "X_rocm_v6",
+		"cpu" + variant: "X_cpu",
 	}
 	assert.Equal(t, true, rocmDynLibPresent())
 	res = getDynLibs(gpu.GpuInfo{Library: "rocm"})
 	assert.Len(t, res, 3)
 	assert.Equal(t, availableDynLibs["rocm_v5"], res[0])
 	assert.Equal(t, availableDynLibs["rocm_v6"], res[1])
-	assert.Equal(t, availableDynLibs["cpu"], res[2])
+	assert.Equal(t, availableDynLibs["cpu"+variant], res[2])
 
 	res = getDynLibs(gpu.GpuInfo{Library: "rocm", Variant: "v6"})
 	assert.Len(t, res, 3)
 	assert.Equal(t, availableDynLibs["rocm_v6"], res[0])
 	assert.Equal(t, availableDynLibs["rocm_v5"], res[1])
-	assert.Equal(t, availableDynLibs["cpu"], res[2])
+	assert.Equal(t, availableDynLibs["cpu"+variant], res[2])
 
 	res = getDynLibs(gpu.GpuInfo{Library: "cuda"})
 	assert.Len(t, res, 1)
-	assert.Equal(t, availableDynLibs["cpu"], res[0])
+	assert.Equal(t, availableDynLibs["cpu"+variant], res[0])
 
 	res = getDynLibs(gpu.GpuInfo{Library: "default"})
 	assert.Len(t, res, 1)
 	assert.Equal(t, "default", res[0])
 
 	availableDynLibs = map[string]string{
-		"rocm": "X_rocm_v5",
-		"cpu":  "X_cpu",
+		"rocm":          "X_rocm_v5",
+		"cpu" + variant: "X_cpu",
 	}
 	assert.Equal(t, true, rocmDynLibPresent())
 	res = getDynLibs(gpu.GpuInfo{Library: "rocm", Variant: "v6"})
 	assert.Len(t, res, 2)
 	assert.Equal(t, availableDynLibs["rocm"], res[0])
-	assert.Equal(t, availableDynLibs["cpu"], res[1])
+	assert.Equal(t, availableDynLibs["cpu"+variant], res[1])
 }