|
@@ -13,7 +13,10 @@ import "C"
|
|
|
import (
|
|
|
"fmt"
|
|
|
"log"
|
|
|
+ "os"
|
|
|
+ "path/filepath"
|
|
|
"runtime"
|
|
|
+ "strings"
|
|
|
"sync"
|
|
|
"unsafe"
|
|
|
)
|
|
@@ -29,31 +32,79 @@ var gpuHandles *handles = nil
|
|
|
// With our current CUDA compile flags, 5.2 and older will not work properly
|
|
|
const CudaComputeMajorMin = 6
|
|
|
|
|
|
+// Possible locations for the nvidia-ml library
|
|
|
+var CudaLinuxGlobs = []string{
|
|
|
+ "/usr/local/cuda/lib64/libnvidia-ml.so*",
|
|
|
+ "/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ml.so*",
|
|
|
+ "/usr/lib/x86_64-linux-gnu/libnvidia-ml.so*",
|
|
|
+ "/usr/lib/wsl/lib/libnvidia-ml.so*",
|
|
|
+ "/opt/cuda/lib64/libnvidia-ml.so*",
|
|
|
+ "/usr/lib*/libnvidia-ml.so*",
|
|
|
+ "/usr/local/lib*/libnvidia-ml.so*",
|
|
|
+ "/usr/lib/aarch64-linux-gnu/nvidia/current/libnvidia-ml.so*",
|
|
|
+ "/usr/lib/aarch64-linux-gnu/libnvidia-ml.so*",
|
|
|
+}
|
|
|
+
|
|
|
+var CudaWindowsGlobs = []string{
|
|
|
+ "c:\\Windows\\System32\\nvml.dll",
|
|
|
+}
|
|
|
+
|
|
|
+var RocmLinuxGlobs = []string{
|
|
|
+ "/opt/rocm*/lib*/librocm_smi64.so*",
|
|
|
+}
|
|
|
+
|
|
|
+var RocmWindowsGlobs = []string{
|
|
|
+ "c:\\Windows\\System32\\rocm_smi64.dll",
|
|
|
+}
|
|
|
+
|
|
|
// Note: gpuMutex must already be held
|
|
|
func initGPUHandles() {
|
|
|
+
|
|
|
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
|
|
|
+
|
|
|
+ var cudaMgmtName string
|
|
|
+ var cudaMgmtPatterns []string
|
|
|
+ var rocmMgmtName string
|
|
|
+ var rocmMgmtPatterns []string
|
|
|
+ switch runtime.GOOS {
|
|
|
+ case "windows":
|
|
|
+ cudaMgmtName = "nvml.dll"
|
|
|
+ cudaMgmtPatterns = make([]string, len(CudaWindowsGlobs))
|
|
|
+ copy(cudaMgmtPatterns, CudaWindowsGlobs)
|
|
|
+ rocmMgmtName = "rocm_smi64.dll"
|
|
|
+ rocmMgmtPatterns = make([]string, len(RocmWindowsGlobs))
|
|
|
+ copy(rocmMgmtPatterns, RocmWindowsGlobs)
|
|
|
+ case "linux":
|
|
|
+ cudaMgmtName = "libnvidia-ml.so"
|
|
|
+ cudaMgmtPatterns = make([]string, len(CudaLinuxGlobs))
|
|
|
+ copy(cudaMgmtPatterns, CudaLinuxGlobs)
|
|
|
+ rocmMgmtName = "librocm_smi64.so"
|
|
|
+ rocmMgmtPatterns = make([]string, len(RocmLinuxGlobs))
|
|
|
+ copy(rocmMgmtPatterns, RocmLinuxGlobs)
|
|
|
+ default:
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
log.Printf("Detecting GPU type")
|
|
|
gpuHandles = &handles{nil, nil}
|
|
|
- var resp C.cuda_init_resp_t
|
|
|
- C.cuda_init(&resp)
|
|
|
- if resp.err != nil {
|
|
|
- log.Printf("CUDA not detected: %s", C.GoString(resp.err))
|
|
|
- C.free(unsafe.Pointer(resp.err))
|
|
|
+ cudaLibPaths := FindGPULibs(cudaMgmtName, cudaMgmtPatterns)
|
|
|
+ if len(cudaLibPaths) > 0 {
|
|
|
+ cuda := LoadCUDAMgmt(cudaLibPaths)
|
|
|
+ if cuda != nil {
|
|
|
+ log.Printf("Nvidia GPU detected")
|
|
|
+ gpuHandles.cuda = cuda
|
|
|
+ return
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- var resp C.rocm_init_resp_t
|
|
|
- C.rocm_init(&resp)
|
|
|
- if resp.err != nil {
|
|
|
- log.Printf("ROCm not detected: %s", C.GoString(resp.err))
|
|
|
- C.free(unsafe.Pointer(resp.err))
|
|
|
- } else {
|
|
|
+ rocmLibPaths := FindGPULibs(rocmMgmtName, rocmMgmtPatterns)
|
|
|
+ if len(rocmLibPaths) > 0 {
|
|
|
+ rocm := LoadROCMMgmt(rocmLibPaths)
|
|
|
+ if rocm != nil {
|
|
|
log.Printf("Radeon GPU detected")
|
|
|
- rocm := resp.rh
|
|
|
- gpuHandles.rocm = &rocm
|
|
|
+ gpuHandles.rocm = rocm
|
|
|
+ return
|
|
|
}
|
|
|
- } else {
|
|
|
- log.Printf("Nvidia GPU detected")
|
|
|
- cuda := resp.ch
|
|
|
- gpuHandles.cuda = &cuda
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -143,3 +194,88 @@ func CheckVRAM() (int64, error) {
|
|
|
|
|
|
return 0, fmt.Errorf("no GPU detected") // TODO - better handling of CPU based memory determiniation
|
|
|
}
|
|
|
+
|
|
|
+func FindGPULibs(baseLibName string, patterns []string) []string {
|
|
|
+ // Multiple GPU libraries may exist, and some may not work, so keep trying until we exhaust them
|
|
|
+ var ldPaths []string
|
|
|
+ gpuLibPaths := []string{}
|
|
|
+ log.Printf("Searching for GPU management library %s", baseLibName)
|
|
|
+
|
|
|
+ switch runtime.GOOS {
|
|
|
+ case "windows":
|
|
|
+ ldPaths = strings.Split(os.Getenv("PATH"), ";")
|
|
|
+ case "linux":
|
|
|
+ ldPaths = strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
|
|
|
+ default:
|
|
|
+ return gpuLibPaths
|
|
|
+ }
|
|
|
+ // Start with whatever we find in the PATH/LD_LIBRARY_PATH
|
|
|
+ for _, ldPath := range ldPaths {
|
|
|
+ d, err := filepath.Abs(ldPath)
|
|
|
+ if err != nil {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ patterns = append(patterns, filepath.Join(d, baseLibName+"*"))
|
|
|
+ }
|
|
|
+ for _, pattern := range patterns {
|
|
|
+ // Ignore glob discovery errors
|
|
|
+ matches, _ := filepath.Glob(pattern)
|
|
|
+ for _, match := range matches {
|
|
|
+ // Resolve any links so we don't try the same lib multiple times
|
|
|
+ // and weed out any dups across globs
|
|
|
+ libPath := match
|
|
|
+ tmp := match
|
|
|
+ var err error
|
|
|
+ for ; err == nil; tmp, err = os.Readlink(libPath) {
|
|
|
+ if !filepath.IsAbs(tmp) {
|
|
|
+ tmp = filepath.Join(filepath.Dir(libPath), tmp)
|
|
|
+ }
|
|
|
+ libPath = tmp
|
|
|
+ }
|
|
|
+ new := true
|
|
|
+ for _, cmp := range gpuLibPaths {
|
|
|
+ if cmp == libPath {
|
|
|
+ new = false
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if new {
|
|
|
+ gpuLibPaths = append(gpuLibPaths, libPath)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ log.Printf("Discovered GPU libraries: %v", gpuLibPaths)
|
|
|
+ return gpuLibPaths
|
|
|
+}
|
|
|
+
|
|
|
+func LoadCUDAMgmt(cudaLibPaths []string) *C.cuda_handle_t {
|
|
|
+ var resp C.cuda_init_resp_t
|
|
|
+ for _, libPath := range cudaLibPaths {
|
|
|
+ lib := C.CString(libPath)
|
|
|
+ defer C.free(unsafe.Pointer(lib))
|
|
|
+ C.cuda_init(lib, &resp)
|
|
|
+ if resp.err != nil {
|
|
|
+ log.Printf("Unable to load CUDA management library %s: %s", libPath, C.GoString(resp.err))
|
|
|
+ C.free(unsafe.Pointer(resp.err))
|
|
|
+ } else {
|
|
|
+ return &resp.ch
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+func LoadROCMMgmt(rocmLibPaths []string) *C.rocm_handle_t {
|
|
|
+ var resp C.rocm_init_resp_t
|
|
|
+ for _, libPath := range rocmLibPaths {
|
|
|
+ lib := C.CString(libPath)
|
|
|
+ defer C.free(unsafe.Pointer(lib))
|
|
|
+ C.rocm_init(lib, &resp)
|
|
|
+ if resp.err != nil {
|
|
|
+ log.Printf("Unable to load ROCm management library %s: %s", libPath, C.GoString(resp.err))
|
|
|
+ C.free(unsafe.Pointer(resp.err))
|
|
|
+ } else {
|
|
|
+ return &resp.rh
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return nil
|
|
|
+}
|