9 ヶ月前 · f457d63400
--- a/api/types.go
+++ b/api/types.go
@@ -231,7 +231,6 @@ type Options struct {
 
				 
			
 
				 // Runner options which must be set when the model is loaded into memory
			
 
				 type Runner struct {
			
 
				-	UseNUMA   bool  `json:"numa,omitempty"`
			
 
				 	NumCtx    int   `json:"num_ctx,omitempty"`
			
 
				 	NumBatch  int   `json:"num_batch,omitempty"`
			
 
				 	NumGPU    int   `json:"num_gpu,omitempty"`
			
@@ -615,7 +614,6 @@ func DefaultOptions() Options {
 
				 			F16KV:     true,
			
 
				 			UseMLock:  false,
			
 
				 			UseMMap:   nil,
			
 
				-			UseNUMA:   false,
			
 
				 		},
			
 
				 	}
			
 
				 }
			
--- a/gpu/cpu_common.go
+++ b/gpu/cpu_common.go
@@ -1,6 +1,11 @@
 
				 package gpu
			
 
				 
			
 
				 import (
			
 
				+	"os"
			
 
				+	"path/filepath"
			
 
				+	"runtime"
			
 
				+	"strings"
			
 
				+
			
 
				 	"golang.org/x/sys/cpu"
			
 
				 )
			
 
				 
			
@@ -14,3 +19,19 @@ func GetCPUCapability() CPUCapability {
 
				 	// else LCD
			
 
				 	return CPUCapabilityNone
			
 
				 }
			
 
				+
			
 
				+func IsNUMA() bool {
			
 
				+	if runtime.GOOS != "linux" {
			
 
				+		// numa support in llama.cpp is linux only
			
 
				+		return false
			
 
				+	}
			
 
				+	ids := map[string]interface{}{}
			
 
				+	packageIds, _ := filepath.Glob("/sys/devices/system/cpu/cpu*/topology/physical_package_id")
			
 
				+	for _, packageId := range packageIds {
			
 
				+		id, err := os.ReadFile(packageId)
			
 
				+		if err == nil {
			
 
				+			ids[strings.TrimSpace(string(id))] = struct{}{}
			
 
				+		}
			
 
				+	}
			
 
				+	return len(ids) > 1
			
 
				+}
			
--- a/llm/server.go
+++ b/llm/server.go
@@ -256,8 +256,14 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 
				 		params = append(params, "--mlock")
			
 
				 	}
			
 
				 
			
 
				-	if opts.UseNUMA {
			
 
				-		params = append(params, "--numa")
			
 
				+	if gpu.IsNUMA() {
			
 
				+		numaMode := "distribute"
			
 
				+		if runtime.GOOS == "linux" {
			
 
				+			if _, err := exec.LookPath("numactl"); err == nil {
			
 
				+				numaMode = "numactl"
			
 
				+			}
			
 
				+		}
			
 
				+		params = append(params, "--numa", numaMode)
			
 
				 	}
			
 
				 
			
 
				 	params = append(params, "--parallel", strconv.Itoa(numParallel))