types.go 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. package discover
  2. import (
  3. "fmt"
  4. "log/slog"
  5. "github.com/ollama/ollama/format"
  6. "github.com/ollama/ollama/runners"
  7. )
  8. type memInfo struct {
  9. TotalMemory uint64 `json:"total_memory,omitempty"`
  10. FreeMemory uint64 `json:"free_memory,omitempty"`
  11. FreeSwap uint64 `json:"free_swap,omitempty"` // TODO split this out for system only
  12. }
  13. // Beginning of an `ollama info` command
  14. type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
  15. memInfo
  16. Library string `json:"library,omitempty"`
  17. // Optional variant to select (e.g. versions, cpu feature flags)
  18. Variant string `json:"variant"`
  19. // MinimumMemory represents the minimum memory required to use the GPU
  20. MinimumMemory uint64 `json:"-"`
  21. // Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
  22. DependencyPath []string `json:"lib_path,omitempty"`
  23. // Extra environment variables specific to the GPU as list of [key,value]
  24. EnvWorkarounds [][2]string `json:"envs,omitempty"`
  25. // Set to true if we can NOT reliably discover FreeMemory. A value of true indicates
  26. // the FreeMemory is best effort, and may over or under report actual memory usage
  27. // False indicates FreeMemory can generally be trusted on this GPU
  28. UnreliableFreeMemory bool
  29. // GPU information
  30. ID string `json:"gpu_id"` // string to use for selection of this specific GPU
  31. Name string `json:"name"` // user friendly name if available
  32. Compute string `json:"compute"` // Compute Capability or gfx
  33. // Driver Information - TODO no need to put this on each GPU
  34. DriverMajor int `json:"driver_major,omitempty"`
  35. DriverMinor int `json:"driver_minor,omitempty"`
  36. // TODO other performance capability info to help in scheduling decisions
  37. }
  38. func (gpu GpuInfo) RunnerName() string {
  39. if gpu.Variant != "" {
  40. return gpu.Library + "_" + gpu.Variant
  41. }
  42. return gpu.Library
  43. }
  44. type CPUInfo struct {
  45. GpuInfo
  46. CPUs []CPU
  47. }
  48. // CPU type represents a CPU Package occupying a socket
  49. type CPU struct {
  50. ID string `cpuinfo:"processor"`
  51. VendorID string `cpuinfo:"vendor_id"`
  52. ModelName string `cpuinfo:"model name"`
  53. CoreCount int
  54. EfficiencyCoreCount int // Performance = CoreCount - Efficiency
  55. ThreadCount int
  56. }
  57. type CudaGPUInfo struct {
  58. GpuInfo
  59. OSOverhead uint64 // Memory overhead between the driver library and management library
  60. index int //nolint:unused,nolintlint
  61. computeMajor int //nolint:unused,nolintlint
  62. computeMinor int //nolint:unused,nolintlint
  63. }
  64. type CudaGPUInfoList []CudaGPUInfo
  65. type RocmGPUInfo struct {
  66. GpuInfo
  67. usedFilepath string //nolint:unused,nolintlint
  68. index int //nolint:unused,nolintlint
  69. }
  70. type RocmGPUInfoList []RocmGPUInfo
  71. type OneapiGPUInfo struct {
  72. GpuInfo
  73. driverIndex int //nolint:unused,nolintlint
  74. gpuIndex int //nolint:unused,nolintlint
  75. }
  76. type OneapiGPUInfoList []OneapiGPUInfo
  77. type GpuInfoList []GpuInfo
  78. type UnsupportedGPUInfo struct {
  79. GpuInfo
  80. Reason string `json:"reason"`
  81. }
  82. // Split up the set of gpu info's by Library and variant
  83. func (l GpuInfoList) ByLibrary() []GpuInfoList {
  84. resp := []GpuInfoList{}
  85. libs := []string{}
  86. for _, info := range l {
  87. found := false
  88. requested := info.Library
  89. if info.Variant != runners.CPUCapabilityNone.String() {
  90. requested += "_" + info.Variant
  91. }
  92. for i, lib := range libs {
  93. if lib == requested {
  94. resp[i] = append(resp[i], info)
  95. found = true
  96. break
  97. }
  98. }
  99. if !found {
  100. libs = append(libs, requested)
  101. resp = append(resp, []GpuInfo{info})
  102. }
  103. }
  104. return resp
  105. }
  106. // Report the GPU information into the log an Info level
  107. func (l GpuInfoList) LogDetails() {
  108. for _, g := range l {
  109. slog.Info("inference compute",
  110. "id", g.ID,
  111. "library", g.Library,
  112. "variant", g.Variant,
  113. "compute", g.Compute,
  114. "driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
  115. "name", g.Name,
  116. "total", format.HumanBytes2(g.TotalMemory),
  117. "available", format.HumanBytes2(g.FreeMemory),
  118. )
  119. }
  120. }
  121. // Sort by Free Space
  122. type ByFreeMemory []GpuInfo
  123. func (a ByFreeMemory) Len() int { return len(a) }
  124. func (a ByFreeMemory) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
  125. func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }
  126. type SystemInfo struct {
  127. System CPUInfo `json:"system"`
  128. GPUs []GpuInfo `json:"gpus"`
  129. UnsupportedGPUs []UnsupportedGPUInfo `json:"unsupported_gpus"`
  130. DiscoveryErrors []string `json:"discovery_errors"`
  131. }
  132. // Return the optimal number of threads to use for inference
  133. func (si SystemInfo) GetOptimalThreadCount() int {
  134. if len(si.System.CPUs) == 0 {
  135. return 0
  136. }
  137. coreCount := 0
  138. for _, c := range si.System.CPUs {
  139. coreCount += c.CoreCount - c.EfficiencyCoreCount
  140. }
  141. return coreCount
  142. }
  143. // For each GPU, check if it does NOT support flash attention
  144. func (l GpuInfoList) FlashAttentionSupported() bool {
  145. for _, gpu := range l {
  146. supportsFA := gpu.Library == "metal" ||
  147. (gpu.Library == "cuda" && gpu.DriverMajor >= 7) ||
  148. gpu.Library == "rocm"
  149. if !supportsFA {
  150. return false
  151. }
  152. }
  153. return true
  154. }