hai 6 meses · 24636dfa87
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -229,7 +229,10 @@ func GetGPUInfo() GpuInfoList {
 
															 			slog.Warn("error looking up system memory", "error", err)
														
 
															 		}
														
 
															 		depPath := LibraryDir()
														
 
															-
														
 
															+		details, err := GetCPUDetails()
														
 
															+		if err != nil {
														
 
															+			slog.Warn("failed to lookup CPU details", "error", err)
														
 
															+		}
														
 
															 		cpus = []CPUInfo{
														
 
															 			{
														
 
															 				GpuInfo: GpuInfo{
														
@@ -239,6 +242,7 @@ func GetGPUInfo() GpuInfoList {
 
															 					ID:             "0",
														
 
															 					DependencyPath: depPath,
														
 
															 				},
														
 
															+				CPUs: details,
														
 
															 			},
														
 
															 		}
														
--- a/gpu/gpu_darwin.go
+++ b/gpu/gpu_darwin.go
@@ -10,7 +10,9 @@ package gpu
 
															 import "C"
														
 
															 import (
														
 
															+	"log/slog"
														
 
															 	"runtime"
														
 
															+	"syscall"
														
 
															 	"github.com/ollama/ollama/format"
														
 
															 )
														
@@ -69,11 +71,30 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
 
															 func GetSystemInfo() SystemInfo {
														
 
															 	mem, _ := GetCPUMem()
														
 
															+	query := "hw.perflevel0.physicalcpu"
														
 
															+	perfCores, err := syscall.SysctlUint32(query)
														
 
															+	if err != nil {
														
 
															+		slog.Warn("failed to discover physical CPU details", "query", query, "error", err)
														
 
															+	}
														
 
															+	query = "hw.perflevel1.physicalcpu"
														
 
															+	efficiencyCores, _ := syscall.SysctlUint32(query) // On x86 xeon this wont return data
														
 
															+
														
 
															+	// Determine thread count
														
 
															+	query = "hw.logicalcpu"
														
 
															+	logicalCores, _ := syscall.SysctlUint32(query)
														
 
															+
														
 
															 	return SystemInfo{
														
 
															 		System: CPUInfo{
														
 
															 			GpuInfo: GpuInfo{
														
 
															 				memInfo: mem,
														
 
															 			},
														
 
															+			CPUs: []CPU{
														
 
															+				{
														
 
															+					CoreCount:           int(perfCores + efficiencyCores),
														
 
															+					EfficiencyCoreCount: int(efficiencyCores),
														
 
															+					ThreadCount:         int(logicalCores),
														
 
															+				},
														
 
															+			},
														
 
															 		},
														
 
															 		GPUs: GetGPUInfo(),
														
 
															 	}
														
--- a/gpu/gpu_linux.go
+++ b/gpu/gpu_linux.go
@@ -4,6 +4,8 @@ import (
 
															 	"bufio"
														
 
															 	"fmt"
														
 
															 	"os"
														
 
															+	"reflect"
														
 
															+	"regexp"
														
 
															 	"strings"
														
 
															 	"github.com/ollama/ollama/format"
														
@@ -90,3 +92,95 @@ func GetCPUMem() (memInfo, error) {
 
															 	}
														
 
															 	return mem, nil
														
 
															 }
														
 
															+
														
 
															+const CpuInfoFilename = "/proc/cpuinfo"
														
 
															+
														
 
															+type linuxCpuInfo struct {
														
 
															+	ID         string `cpuinfo:"processor"`
														
 
															+	VendorID   string `cpuinfo:"vendor_id"`
														
 
															+	ModelName  string `cpuinfo:"model name"`
														
 
															+	PhysicalID string `cpuinfo:"physical id"`
														
 
															+	Siblings   string `cpuinfo:"siblings"`
														
 
															+	CoreID     string `cpuinfo:"core id"`
														
 
															+}
														
 
															+
														
 
															+func GetCPUDetails() ([]CPU, error) {
														
 
															+	file, err := os.Open(CpuInfoFilename)
														
 
															+	if err != nil {
														
 
															+		return nil, err
														
 
															+	}
														
 
															+	reColumns := regexp.MustCompile("\t+: ")
														
 
															+	scanner := bufio.NewScanner(file)
														
 
															+	cpuInfos := []linuxCpuInfo{}
														
 
															+	cpu := &linuxCpuInfo{}
														
 
															+	for scanner.Scan() {
														
 
															+		line := scanner.Text()
														
 
															+		if sl := reColumns.Split(line, 2); len(sl) > 1 {
														
 
															+			t := reflect.TypeOf(cpu).Elem()
														
 
															+			s := reflect.ValueOf(cpu).Elem()
														
 
															+			for i := range t.NumField() {
														
 
															+				field := t.Field(i)
														
 
															+				tag := field.Tag.Get("cpuinfo")
														
 
															+				if tag == sl[0] {
														
 
															+					s.FieldByName(field.Name).SetString(sl[1])
														
 
															+					break
														
 
															+				}
														
 
															+			}
														
 
															+		} else if strings.TrimSpace(line) == "" && cpu.ID != "" {
														
 
															+			cpuInfos = append(cpuInfos, *cpu)
														
 
															+			cpu = &linuxCpuInfo{}
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	// Process the sockets/cores/threads
														
 
															+	socketByID := map[string]*CPU{}
														
 
															+	coreBySocket := map[string]map[string]struct{}{}
														
 
															+	threadsByCoreBySocket := map[string]map[string]int{}
														
 
															+	for _, c := range cpuInfos {
														
 
															+		if _, found := socketByID[c.PhysicalID]; !found {
														
 
															+			socketByID[c.PhysicalID] = &CPU{
														
 
															+				ID:        c.PhysicalID,
														
 
															+				VendorID:  c.VendorID,
														
 
															+				ModelName: c.ModelName,
														
 
															+			}
														
 
															+			coreBySocket[c.PhysicalID] = map[string]struct{}{}
														
 
															+			threadsByCoreBySocket[c.PhysicalID] = map[string]int{}
														
 
															+		}
														
 
															+		if c.CoreID != "" {
														
 
															+			coreBySocket[c.PhysicalID][c.PhysicalID+":"+c.CoreID] = struct{}{}
														
 
															+			threadsByCoreBySocket[c.PhysicalID][c.PhysicalID+":"+c.CoreID]++
														
 
															+		} else {
														
 
															+			coreBySocket[c.PhysicalID][c.PhysicalID+":"+c.ID] = struct{}{}
														
 
															+			threadsByCoreBySocket[c.PhysicalID][c.PhysicalID+":"+c.ID]++
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	// Tally up the values from the tracking maps
														
 
															+	for id, s := range socketByID {
														
 
															+		s.CoreCount = len(coreBySocket[id])
														
 
															+		s.ThreadCount = 0
														
 
															+		for _, tc := range threadsByCoreBySocket[id] {
														
 
															+			s.ThreadCount += tc
														
 
															+		}
														
 
															+
														
 
															+		// This only works if HT is enabled, consider a more reliable model, maybe cache size comparisons?
														
 
															+		efficiencyCoreCount := 0
														
 
															+		for _, threads := range threadsByCoreBySocket[id] {
														
 
															+			if threads == 1 {
														
 
															+				efficiencyCoreCount++
														
 
															+			}
														
 
															+		}
														
 
															+		if efficiencyCoreCount == s.CoreCount {
														
 
															+			// 1:1 mapping means they're not actually efficiency cores, but regular cores
														
 
															+			s.EfficiencyCoreCount = 0
														
 
															+		} else {
														
 
															+			s.EfficiencyCoreCount = efficiencyCoreCount
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	result := []CPU{}
														
 
															+	for _, c := range socketByID {
														
 
															+		result = append(result, *c)
														
 
															+	}
														
 
															+	return result, nil
														
 
															+}
														
--- a/gpu/gpu_windows.go
+++ b/gpu/gpu_windows.go
@@ -2,6 +2,7 @@ package gpu
 
															 import (
														
 
															 	"fmt"
														
 
															+	"log/slog"
														
 
															 	"syscall"
														
 
															 	"unsafe"
														
 
															 )
														
@@ -19,9 +20,10 @@ type MEMORYSTATUSEX struct {
 
															 }
														
 
															 var (
														
 
															-	k32                      = syscall.NewLazyDLL("kernel32.dll")
														
 
															-	globalMemoryStatusExProc = k32.NewProc("GlobalMemoryStatusEx")
														
 
															-	sizeofMemoryStatusEx     = uint32(unsafe.Sizeof(MEMORYSTATUSEX{}))
														
 
															+	k32                              = syscall.NewLazyDLL("kernel32.dll")
														
 
															+	globalMemoryStatusExProc         = k32.NewProc("GlobalMemoryStatusEx")
														
 
															+	sizeofMemoryStatusEx             = uint32(unsafe.Sizeof(MEMORYSTATUSEX{}))
														
 
															+	GetLogicalProcessorInformationEx = k32.NewProc("GetLogicalProcessorInformationEx")
														
 
															 )
														
 
															 var CudartGlobs = []string{
														
@@ -55,3 +57,178 @@ func GetCPUMem() (memInfo, error) {
 
															 	}
														
 
															 	return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys, FreeSwap: memStatus.AvailPageFile}, nil
														
 
															 }
														
 
															+
														
 
															+type LOGICAL_PROCESSOR_RELATIONSHIP uint32
														
 
															+
														
 
															+const (
														
 
															+	RelationProcessorCore LOGICAL_PROCESSOR_RELATIONSHIP = iota
														
 
															+	RelationNumaNode
														
 
															+	RelationCache
														
 
															+	RelationProcessorPackage
														
 
															+	RelationGroup
														
 
															+	RelationProcessorDie
														
 
															+	RelationNumaNodeEx
														
 
															+	RelationProcessorModule
														
 
															+)
														
 
															+const RelationAll LOGICAL_PROCESSOR_RELATIONSHIP = 0xffff
														
 
															+
														
 
															+type GROUP_AFFINITY struct {
														
 
															+	Mask     uintptr // KAFFINITY
														
 
															+	Group    uint16
														
 
															+	Reserved [3]uint16
														
 
															+}
														
 
															+
														
 
															+type PROCESSOR_RELATIONSHIP struct {
														
 
															+	Flags           byte
														
 
															+	EfficiencyClass byte
														
 
															+	Reserved        [20]byte
														
 
															+	GroupCount      uint16
														
 
															+	GroupMask       [1]GROUP_AFFINITY // len GroupCount
														
 
															+}
														
 
															+
														
 
															+// Omitted unused structs: NUMA_NODE_RELATIONSHIP CACHE_RELATIONSHIP GROUP_RELATIONSHIP
														
 
															+
														
 
															+type SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX struct {
														
 
															+	Relationship LOGICAL_PROCESSOR_RELATIONSHIP
														
 
															+	Size         uint32
														
 
															+	U            [1]byte // Union len Size
														
 
															+	// PROCESSOR_RELATIONSHIP
														
 
															+	// NUMA_NODE_RELATIONSHIP
														
 
															+	// CACHE_RELATIONSHIP
														
 
															+	// GROUP_RELATIONSHIP
														
 
															+}
														
 
															+
														
 
															+func (group *GROUP_AFFINITY) IsMember(target *GROUP_AFFINITY) bool {
														
 
															+	if group == nil || target == nil {
														
 
															+		return false
														
 
															+	}
														
 
															+	return group.Mask&target.Mask != 0
														
 
															+}
														
 
															+
														
 
															+type winPackage struct {
														
 
															+	groups              []*GROUP_AFFINITY
														
 
															+	coreCount           int // performance cores = coreCount - efficiencyCoreCount
														
 
															+	efficiencyCoreCount int
														
 
															+	threadCount         int
														
 
															+}
														
 
															+
														
 
															+func (pkg *winPackage) IsMember(target *GROUP_AFFINITY) bool {
														
 
															+	for _, group := range pkg.groups {
														
 
															+		if group.IsMember(target) {
														
 
															+			return true
														
 
															+		}
														
 
															+	}
														
 
															+	return false
														
 
															+}
														
 
															+
														
 
															+func getLogicalProcessorInformationEx() ([]byte, error) {
														
 
															+	buf := make([]byte, 1)
														
 
															+	bufSize := len(buf)
														
 
															+	ret, _, err := GetLogicalProcessorInformationEx.Call(
														
 
															+		uintptr(RelationAll),
														
 
															+		uintptr(unsafe.Pointer(&buf[0])),
														
 
															+		uintptr(unsafe.Pointer(&bufSize)),
														
 
															+	)
														
 
															+	if ret != 0 {
														
 
															+		return nil, fmt.Errorf("failed to determine size info ret:%d %w", ret, err)
														
 
															+	}
														
 
															+
														
 
															+	buf = make([]byte, bufSize)
														
 
															+	ret, _, err = GetLogicalProcessorInformationEx.Call(
														
 
															+		uintptr(RelationAll),
														
 
															+		uintptr(unsafe.Pointer(&buf[0])),
														
 
															+		uintptr(unsafe.Pointer(&bufSize)),
														
 
															+	)
														
 
															+	if ret == 0 {
														
 
															+		return nil, fmt.Errorf("failed to gather processor information ret:%d buflen:%d %w", ret, bufSize, err)
														
 
															+	}
														
 
															+	return buf, nil
														
 
															+}
														
 
															+
														
 
															+func processSystemLogicalProcessorInforationList(buf []byte) []*winPackage {
														
 
															+	var slpi *SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX
														
 
															+	// Find all the packages first
														
 
															+	packages := []*winPackage{}
														
 
															+	for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
														
 
															+		slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
														
 
															+		if slpi.Relationship != RelationProcessorPackage {
														
 
															+			continue
														
 
															+		}
														
 
															+		pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
														
 
															+		pkg := &winPackage{}
														
 
															+		ga0 := unsafe.Pointer(&pr.GroupMask[0])
														
 
															+		for j := range pr.GroupCount {
														
 
															+			gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
														
 
															+			pkg.groups = append(pkg.groups, gm)
														
 
															+		}
														
 
															+		packages = append(packages, pkg)
														
 
															+	}
														
 
															+
														
 
															+	slog.Info("packages", "count", len(packages))
														
 
															+
														
 
															+	// To identify efficiency cores we have to compare the relative values
														
 
															+	// Larger values are "less efficient" (aka, more performant)
														
 
															+	var maxEfficiencyClass byte
														
 
															+	for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
														
 
															+		slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
														
 
															+		if slpi.Relationship != RelationProcessorCore {
														
 
															+			continue
														
 
															+		}
														
 
															+		pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
														
 
															+		if pr.EfficiencyClass > maxEfficiencyClass {
														
 
															+			maxEfficiencyClass = pr.EfficiencyClass
														
 
															+		}
														
 
															+	}
														
 
															+	if maxEfficiencyClass > 0 {
														
 
															+		slog.Info("efficiency cores detected", "maxEfficiencyClass", maxEfficiencyClass)
														
 
															+	}
														
 
															+
														
 
															+	// then match up the Cores to the Packages, count up cores, threads and efficiency cores
														
 
															+	for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
														
 
															+		slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
														
 
															+		if slpi.Relationship != RelationProcessorCore {
														
 
															+			continue
														
 
															+		}
														
 
															+		pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
														
 
															+		ga0 := unsafe.Pointer(&pr.GroupMask[0])
														
 
															+		for j := range pr.GroupCount {
														
 
															+			gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
														
 
															+			for _, pkg := range packages {
														
 
															+				if pkg.IsMember(gm) {
														
 
															+					pkg.coreCount++
														
 
															+					if pr.Flags == 0 {
														
 
															+						pkg.threadCount++
														
 
															+					} else {
														
 
															+						pkg.threadCount += 2
														
 
															+					}
														
 
															+					if pr.EfficiencyClass < maxEfficiencyClass {
														
 
															+						pkg.efficiencyCoreCount++
														
 
															+					}
														
 
															+				}
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	// Sumarize the results
														
 
															+	for i, pkg := range packages {
														
 
															+		slog.Info("", "package", i, "cores", pkg.coreCount, "efficiency", pkg.efficiencyCoreCount, "threads", pkg.threadCount)
														
 
															+	}
														
 
															+
														
 
															+	return packages
														
 
															+}
														
 
															+
														
 
															+func GetCPUDetails() ([]CPU, error) {
														
 
															+	buf, err := getLogicalProcessorInformationEx()
														
 
															+	if err != nil {
														
 
															+		return nil, err
														
 
															+	}
														
 
															+	packages := processSystemLogicalProcessorInforationList(buf)
														
 
															+	cpus := make([]CPU, len(packages))
														
 
															+
														
 
															+	for i, pkg := range packages {
														
 
															+		cpus[i].CoreCount = pkg.coreCount
														
 
															+		cpus[i].EfficiencyCoreCount = pkg.efficiencyCoreCount
														
 
															+		cpus[i].ThreadCount = pkg.threadCount
														
 
															+	}
														
 
															+	return cpus, nil
														
 
															+}
														
--- a/gpu/gpu_windows_test.go
+++ b/gpu/gpu_windows_test.go
--- a/gpu/types.go
+++ b/gpu/types.go
@@ -10,11 +10,11 @@ import (
 
															 type memInfo struct {
														
 
															 	TotalMemory uint64 `json:"total_memory,omitempty"`
														
 
															 	FreeMemory  uint64 `json:"free_memory,omitempty"`
														
 
															-	FreeSwap    uint64 `json:"free_swap,omitempty"`
														
 
															+	FreeSwap    uint64 `json:"free_swap,omitempty"` // TODO split this out for system only
														
 
															 }
														
 
															 // Beginning of an `ollama info` command
														
 
															-type GpuInfo struct {
														
 
															+type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
														
 
															 	memInfo
														
 
															 	Library string `json:"library,omitempty"`
														
@@ -49,6 +49,17 @@ type GpuInfo struct {
 
															 type CPUInfo struct {
														
 
															 	GpuInfo
														
 
															+	CPUs []CPU
														
 
															+}
														
 
															+
														
 
															+// CPU type represents a CPU Package occupying a socket
														
 
															+type CPU struct {
														
 
															+	ID                  string `cpuinfo:"processor"`
														
 
															+	VendorID            string `cpuinfo:"vendor_id"`
														
 
															+	ModelName           string `cpuinfo:"model name"`
														
 
															+	CoreCount           int
														
 
															+	EfficiencyCoreCount int // Performance = CoreCount - Efficiency
														
 
															+	ThreadCount         int
														
 
															 }
														
 
															 type CudaGPUInfo struct {
														
@@ -158,3 +169,12 @@ type SystemInfo struct {
 
															 	UnsupportedGPUs []UnsupportedGPUInfo `json:"unsupported_gpus"`
														
 
															 	DiscoveryErrors []string             `json:"discovery_errors"`
														
 
															 }
														
 
															+
														
 
															+// Return the optimal number of threads to use for inference
														
 
															+func (si SystemInfo) GetOptimalThreadCount() int {
														
 
															+	if len(si.System.CPUs) == 0 {
														
 
															+		return 0
														
 
															+	}
														
 
															+	// Allocate thread count matching the performance cores on a single socket
														
 
															+	return si.System.CPUs[0].CoreCount - si.System.CPUs[0].EfficiencyCoreCount
														
 
															+}
														
--- a/llm/server.go
+++ b/llm/server.go
@@ -98,15 +98,11 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 
															 	var systemFreeMemory uint64
														
 
															 	var systemSwapFreeMemory uint64
														
 
															-	systemMemInfo, err := gpu.GetCPUMem()
														
 
															-	if err != nil {
														
 
															-		slog.Error("failed to lookup system memory", "error", err)
														
 
															-	} else {
														
 
															-		systemTotalMemory = systemMemInfo.TotalMemory
														
 
															-		systemFreeMemory = systemMemInfo.FreeMemory
														
 
															-		systemSwapFreeMemory = systemMemInfo.FreeSwap
														
 
															-		slog.Info("system memory", "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "free_swap", format.HumanBytes2(systemSwapFreeMemory))
														
 
															-	}
														
 
															+	systemInfo := gpu.GetSystemInfo()
														
 
															+	systemTotalMemory = systemInfo.System.TotalMemory
														
 
															+	systemFreeMemory = systemInfo.System.FreeMemory
														
 
															+	systemSwapFreeMemory = systemInfo.System.FreeSwap
														
 
															+	slog.Info("system memory", "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "free_swap", format.HumanBytes2(systemSwapFreeMemory))
														
 
															 	// If the user wants zero GPU layers, reset the gpu list to be CPU/system ram info
														
 
															 	if opts.NumGPU == 0 {
														
@@ -217,8 +213,11 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 
															 		params = append(params, "--mmproj", projectors[0])
														
 
															 	}
														
 
															+	defaultThreads := systemInfo.GetOptimalThreadCount()
														
 
															 	if opts.NumThread > 0 {
														
 
															 		params = append(params, "--threads", strconv.Itoa(opts.NumThread))
														
 
															+	} else if defaultThreads > 0 {
														
 
															+		params = append(params, "--threads", strconv.Itoa(defaultThreads))
														
 
															 	}
														
 
															 	if !opts.F16KV {
														
@@ -260,15 +259,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 
															 		params = append(params, "--mlock")
														
 
															 	}
														
 
															-	if gpu.IsNUMA() && gpus[0].Library == "cpu" {
														
 
															-		numaMode := "distribute"
														
 
															-		if runtime.GOOS == "linux" {
														
 
															-			if _, err := exec.LookPath("numactl"); err == nil {
														
 
															-				numaMode = "numactl"
														
 
															-			}
														
 
															-		}
														
 
															-		params = append(params, "--numa", numaMode)
														
 
															-	}
														
 
															+	// TODO - NUMA support currently doesn't work properly
														
 
															 	params = append(params, "--parallel", strconv.Itoa(numParallel))