types.go 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. package gpu
  2. import (
  3. "fmt"
  4. "log/slog"
  5. "github.com/ollama/ollama/format"
  6. )
  7. type memInfo struct {
  8. TotalMemory uint64 `json:"total_memory,omitempty"`
  9. FreeMemory uint64 `json:"free_memory,omitempty"`
  10. }
  11. // Beginning of an `ollama info` command
  12. type GpuInfo struct {
  13. memInfo
  14. Library string `json:"library,omitempty"`
  15. // Optional variant to select (e.g. versions, cpu feature flags)
  16. Variant string `json:"variant,omitempty"`
  17. // MinimumMemory represents the minimum memory required to use the GPU
  18. MinimumMemory uint64 `json:"-"`
  19. // Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
  20. DependencyPath string `json:"lib_path,omitempty"`
  21. // GPU information
  22. ID string `json:"gpu_id"` // string to use for selection of this specific GPU
  23. Name string `json:"name"` // user friendly name if available
  24. Compute string `json:"compute"` // Compute Capability or gfx
  25. // Driver Information - TODO no need to put this on each GPU
  26. DriverMajor int `json:"driver_major,omitempty"`
  27. DriverMinor int `json:"driver_minor,omitempty"`
  28. // TODO other performance capability info to help in scheduling decisions
  29. }
  30. type CPUInfo struct {
  31. GpuInfo
  32. }
  33. type CudaGPUInfo struct {
  34. GpuInfo
  35. index int // nolint: unused
  36. }
  37. type CudaGPUInfoList []CudaGPUInfo
  38. type RocmGPUInfo struct {
  39. GpuInfo
  40. usedFilepath string // nolint: unused
  41. index int // nolint: unused
  42. }
  43. type RocmGPUInfoList []RocmGPUInfo
  44. type OneapiGPUInfo struct {
  45. GpuInfo
  46. index int // device index
  47. }
  48. type OneapiGPUInfoList []OneapiGPUInfo
  49. type GpuInfoList []GpuInfo
  50. // Split up the set of gpu info's by Library and variant
  51. func (l GpuInfoList) ByLibrary() []GpuInfoList {
  52. resp := []GpuInfoList{}
  53. libs := []string{}
  54. for _, info := range l {
  55. found := false
  56. requested := info.Library
  57. if info.Variant != "" {
  58. requested += "_" + info.Variant
  59. }
  60. for i, lib := range libs {
  61. if lib == requested {
  62. resp[i] = append(resp[i], info)
  63. found = true
  64. break
  65. }
  66. }
  67. if !found {
  68. libs = append(libs, info.Library)
  69. resp = append(resp, []GpuInfo{info})
  70. }
  71. }
  72. return resp
  73. }
  74. // Report the GPU information into the log an Info level
  75. func (l GpuInfoList) LogDetails() {
  76. for _, g := range l {
  77. slog.Info("inference compute",
  78. "id", g.ID,
  79. "library", g.Library,
  80. "compute", g.Compute,
  81. "driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
  82. "name", g.Name,
  83. "total", format.HumanBytes2(g.TotalMemory),
  84. "available", format.HumanBytes2(g.FreeMemory),
  85. )
  86. }
  87. }
  88. // Sort by Free Space
  89. type ByFreeMemory []GpuInfo
  90. func (a ByFreeMemory) Len() int { return len(a) }
  91. func (a ByFreeMemory) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
  92. func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }
  93. type CPUCapability uint32
  94. // Override at build time when building base GPU runners
  95. var GPURunnerCPUCapability = CPUCapabilityAVX
  96. const (
  97. CPUCapabilityBase CPUCapability = iota
  98. CPUCapabilityAVX
  99. CPUCapabilityAVX2
  100. // TODO AVX512
  101. )
  102. func (c CPUCapability) ToString() string {
  103. switch c {
  104. case CPUCapabilityAVX:
  105. return "AVX"
  106. case CPUCapabilityAVX2:
  107. return "AVX2"
  108. default:
  109. return "no vector extensions"
  110. }
  111. }
  112. func (c CPUCapability) ToVariant() string {
  113. switch c {
  114. case CPUCapabilityAVX:
  115. return "avx"
  116. case CPUCapabilityAVX2:
  117. return "avx2"
  118. default:
  119. return ""
  120. }
  121. }