123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183 |
- package discover
- import (
- "fmt"
- "log/slog"
- "github.com/ollama/ollama/format"
- )
- type memInfo struct {
- TotalMemory uint64 `json:"total_memory,omitempty"`
- FreeMemory uint64 `json:"free_memory,omitempty"`
- FreeSwap uint64 `json:"free_swap,omitempty"` // TODO split this out for system only
- }
- // Beginning of an `ollama info` command
- type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
- memInfo
- Library string `json:"library,omitempty"`
- // Optional variant to select (e.g. versions, cpu feature flags)
- Variant string `json:"variant"`
- // MinimumMemory represents the minimum memory required to use the GPU
- MinimumMemory uint64 `json:"-"`
- // Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
- DependencyPath []string `json:"lib_path,omitempty"`
- // Extra environment variables specific to the GPU as list of [key,value]
- EnvWorkarounds [][2]string `json:"envs,omitempty"`
- // Set to true if we can NOT reliably discover FreeMemory. A value of true indicates
- // the FreeMemory is best effort, and may over or under report actual memory usage
- // False indicates FreeMemory can generally be trusted on this GPU
- UnreliableFreeMemory bool
- // GPU information
- ID string `json:"gpu_id"` // string to use for selection of this specific GPU
- Name string `json:"name"` // user friendly name if available
- Compute string `json:"compute"` // Compute Capability or gfx
- // Driver Information - TODO no need to put this on each GPU
- DriverMajor int `json:"driver_major,omitempty"`
- DriverMinor int `json:"driver_minor,omitempty"`
- // TODO other performance capability info to help in scheduling decisions
- }
- func (gpu GpuInfo) RunnerName() string {
- if gpu.Variant != "" {
- return gpu.Library + "_" + gpu.Variant
- }
- return gpu.Library
- }
- type CPUInfo struct {
- GpuInfo
- CPUs []CPU
- }
- // CPU type represents a CPU Package occupying a socket
- type CPU struct {
- ID string `cpuinfo:"processor"`
- VendorID string `cpuinfo:"vendor_id"`
- ModelName string `cpuinfo:"model name"`
- CoreCount int
- EfficiencyCoreCount int // Performance = CoreCount - Efficiency
- ThreadCount int
- }
- type CudaGPUInfo struct {
- GpuInfo
- OSOverhead uint64 // Memory overhead between the driver library and management library
- index int //nolint:unused,nolintlint
- computeMajor int //nolint:unused,nolintlint
- computeMinor int //nolint:unused,nolintlint
- }
- type CudaGPUInfoList []CudaGPUInfo
- type RocmGPUInfo struct {
- GpuInfo
- usedFilepath string //nolint:unused,nolintlint
- index int //nolint:unused,nolintlint
- }
- type RocmGPUInfoList []RocmGPUInfo
- type OneapiGPUInfo struct {
- GpuInfo
- driverIndex int //nolint:unused,nolintlint
- gpuIndex int //nolint:unused,nolintlint
- }
- type OneapiGPUInfoList []OneapiGPUInfo
- type GpuInfoList []GpuInfo
- type UnsupportedGPUInfo struct {
- GpuInfo
- Reason string `json:"reason"`
- }
- // Split up the set of gpu info's by Library and variant
- func (l GpuInfoList) ByLibrary() []GpuInfoList {
- resp := []GpuInfoList{}
- libs := []string{}
- for _, info := range l {
- found := false
- requested := info.Library
- if info.Variant != "" {
- requested += "_" + info.Variant
- }
- for i, lib := range libs {
- if lib == requested {
- resp[i] = append(resp[i], info)
- found = true
- break
- }
- }
- if !found {
- libs = append(libs, requested)
- resp = append(resp, []GpuInfo{info})
- }
- }
- return resp
- }
- // Report the GPU information into the log an Info level
- func (l GpuInfoList) LogDetails() {
- for _, g := range l {
- slog.Info("inference compute",
- "id", g.ID,
- "library", g.Library,
- "variant", g.Variant,
- "compute", g.Compute,
- "driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
- "name", g.Name,
- "total", format.HumanBytes2(g.TotalMemory),
- "available", format.HumanBytes2(g.FreeMemory),
- )
- }
- }
- // Sort by Free Space
- type ByFreeMemory []GpuInfo
- func (a ByFreeMemory) Len() int { return len(a) }
- func (a ByFreeMemory) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
- func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }
- type SystemInfo struct {
- System CPUInfo `json:"system"`
- GPUs []GpuInfo `json:"gpus"`
- UnsupportedGPUs []UnsupportedGPUInfo `json:"unsupported_gpus"`
- DiscoveryErrors []string `json:"discovery_errors"`
- }
- // Return the optimal number of threads to use for inference
- func (si SystemInfo) GetOptimalThreadCount() int {
- if len(si.System.CPUs) == 0 {
- return 0
- }
- coreCount := 0
- for _, c := range si.System.CPUs {
- coreCount += c.CoreCount - c.EfficiencyCoreCount
- }
- return coreCount
- }
- // For each GPU, check if it does NOT support flash attention
- func (l GpuInfoList) FlashAttentionSupported() bool {
- for _, gpu := range l {
- supportsFA := gpu.Library == "metal" ||
- (gpu.Library == "cuda" && gpu.DriverMajor >= 7) ||
- gpu.Library == "rocm"
- if !supportsFA {
- return false
- }
- }
- return true
- }
|