types.go 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. package discover
  2. import (
  3. "fmt"
  4. "log/slog"
  5. "github.com/ollama/ollama/format"
  6. )
  7. type memInfo struct {
  8. TotalMemory uint64 `json:"total_memory,omitempty"`
  9. FreeMemory uint64 `json:"free_memory,omitempty"`
  10. FreeSwap uint64 `json:"free_swap,omitempty"` // TODO split this out for system only
  11. }
  12. // Beginning of an `ollama info` command
  13. type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
  14. memInfo
  15. Library string `json:"library,omitempty"`
  16. // Optional variant to select (e.g. versions, cpu feature flags)
  17. Variant string `json:"variant"`
  18. // MinimumMemory represents the minimum memory required to use the GPU
  19. MinimumMemory uint64 `json:"-"`
  20. // Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
  21. DependencyPath string `json:"lib_path,omitempty"`
  22. // Extra environment variables specific to the GPU as list of [key,value]
  23. EnvWorkarounds [][2]string `json:"envs,omitempty"`
  24. // Set to true if we can NOT reliably discover FreeMemory. A value of true indicates
  25. // the FreeMemory is best effort, and may over or under report actual memory usage
  26. // False indicates FreeMemory can generally be trusted on this GPU
  27. UnreliableFreeMemory bool
  28. // GPU information
  29. ID string `json:"gpu_id"` // string to use for selection of this specific GPU
  30. Name string `json:"name"` // user friendly name if available
  31. Compute string `json:"compute"` // Compute Capability or gfx
  32. // Driver Information - TODO no need to put this on each GPU
  33. DriverMajor int `json:"driver_major,omitempty"`
  34. DriverMinor int `json:"driver_minor,omitempty"`
  35. // TODO other performance capability info to help in scheduling decisions
  36. }
  37. type CPUInfo struct {
  38. GpuInfo
  39. CPUs []CPU
  40. }
  41. // CPU type represents a CPU Package occupying a socket
  42. type CPU struct {
  43. ID string `cpuinfo:"processor"`
  44. VendorID string `cpuinfo:"vendor_id"`
  45. ModelName string `cpuinfo:"model name"`
  46. CoreCount int
  47. EfficiencyCoreCount int // Performance = CoreCount - Efficiency
  48. ThreadCount int
  49. }
  50. type CudaGPUInfo struct {
  51. GpuInfo
  52. OSOverhead uint64 // Memory overhead between the driver library and management library
  53. index int //nolint:unused,nolintlint
  54. computeMajor int //nolint:unused,nolintlint
  55. computeMinor int //nolint:unused,nolintlint
  56. }
  57. type CudaGPUInfoList []CudaGPUInfo
  58. type RocmGPUInfo struct {
  59. GpuInfo
  60. usedFilepath string //nolint:unused,nolintlint
  61. index int //nolint:unused,nolintlint
  62. }
  63. type RocmGPUInfoList []RocmGPUInfo
  64. type OneapiGPUInfo struct {
  65. GpuInfo
  66. driverIndex int //nolint:unused,nolintlint
  67. gpuIndex int //nolint:unused,nolintlint
  68. }
  69. type OneapiGPUInfoList []OneapiGPUInfo
  70. type GpuInfoList []GpuInfo
  71. type UnsupportedGPUInfo struct {
  72. GpuInfo
  73. Reason string `json:"reason"`
  74. }
  75. // Split up the set of gpu info's by Library and variant
  76. func (l GpuInfoList) ByLibrary() []GpuInfoList {
  77. resp := []GpuInfoList{}
  78. libs := []string{}
  79. for _, info := range l {
  80. found := false
  81. requested := info.Library
  82. if info.Variant != CPUCapabilityNone.String() {
  83. requested += "_" + info.Variant
  84. }
  85. for i, lib := range libs {
  86. if lib == requested {
  87. resp[i] = append(resp[i], info)
  88. found = true
  89. break
  90. }
  91. }
  92. if !found {
  93. libs = append(libs, requested)
  94. resp = append(resp, []GpuInfo{info})
  95. }
  96. }
  97. return resp
  98. }
  99. // Report the GPU information into the log an Info level
  100. func (l GpuInfoList) LogDetails() {
  101. for _, g := range l {
  102. slog.Info("inference compute",
  103. "id", g.ID,
  104. "library", g.Library,
  105. "variant", g.Variant,
  106. "compute", g.Compute,
  107. "driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
  108. "name", g.Name,
  109. "total", format.HumanBytes2(g.TotalMemory),
  110. "available", format.HumanBytes2(g.FreeMemory),
  111. )
  112. }
  113. }
  114. // Sort by Free Space
  115. type ByFreeMemory []GpuInfo
  116. func (a ByFreeMemory) Len() int { return len(a) }
  117. func (a ByFreeMemory) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
  118. func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }
  119. type CPUCapability uint32
  120. // Override at build time when building base GPU runners
  121. var GPURunnerCPUCapability = CPUCapabilityAVX
  122. const (
  123. CPUCapabilityNone CPUCapability = iota
  124. CPUCapabilityAVX
  125. CPUCapabilityAVX2
  126. // TODO AVX512
  127. )
  128. func (c CPUCapability) String() string {
  129. switch c {
  130. case CPUCapabilityAVX:
  131. return "avx"
  132. case CPUCapabilityAVX2:
  133. return "avx2"
  134. default:
  135. return "no vector extensions"
  136. }
  137. }
  138. type SystemInfo struct {
  139. System CPUInfo `json:"system"`
  140. GPUs []GpuInfo `json:"gpus"`
  141. UnsupportedGPUs []UnsupportedGPUInfo `json:"unsupported_gpus"`
  142. DiscoveryErrors []string `json:"discovery_errors"`
  143. }
  144. // Return the optimal number of threads to use for inference
  145. func (si SystemInfo) GetOptimalThreadCount() int {
  146. if len(si.System.CPUs) == 0 {
  147. return 0
  148. }
  149. coreCount := 0
  150. for _, c := range si.System.CPUs {
  151. coreCount += c.CoreCount - c.EfficiencyCoreCount
  152. }
  153. return coreCount
  154. }