|
@@ -17,6 +17,7 @@ import (
|
|
|
"os/exec"
|
|
|
"path"
|
|
|
"path/filepath"
|
|
|
+ "regexp"
|
|
|
"runtime"
|
|
|
"strconv"
|
|
|
"strings"
|
|
@@ -36,36 +37,99 @@ func osPath(llamaPath string) string {
|
|
|
return llamaPath
|
|
|
}
|
|
|
|
|
|
-func chooseRunner(gpuPath, cpuPath string) string {
|
|
|
- tmpDir, err := os.MkdirTemp("", "llama-*")
|
|
|
+func cudaVersion() (int, error) {
|
|
|
+ // first try nvcc, it gives the most accurate version if available
|
|
|
+ cmd := exec.Command("nvcc", "--version")
|
|
|
+ output, err := cmd.CombinedOutput()
|
|
|
+ if err == nil {
|
|
|
+ // regex to match the CUDA version line in nvcc --version output
|
|
|
+ re := regexp.MustCompile(`release (\d+\.\d+),`)
|
|
|
+ matches := re.FindStringSubmatch(string(output))
|
|
|
+ if len(matches) >= 2 {
|
|
|
+ cudaVersion := matches[1]
|
|
|
+ cudaVersionParts := strings.Split(cudaVersion, ".")
|
|
|
+ cudaMajorVersion, err := strconv.Atoi(cudaVersionParts[0])
|
|
|
+ if err == nil {
|
|
|
+ return cudaMajorVersion, nil
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // fallback to nvidia-smi
|
|
|
+ cmd = exec.Command("nvidia-smi")
|
|
|
+ output, err = cmd.CombinedOutput()
|
|
|
if err != nil {
|
|
|
- log.Fatalf("llama.cpp: failed to create temp dir: %v", err)
|
|
|
+ return -1, err
|
|
|
}
|
|
|
|
|
|
- llamaPath := osPath(gpuPath)
|
|
|
- if _, err := fs.Stat(llamaCppEmbed, llamaPath); err != nil {
|
|
|
- llamaPath = osPath(cpuPath)
|
|
|
- if _, err := fs.Stat(llamaCppEmbed, llamaPath); err != nil {
|
|
|
- log.Fatalf("llama.cpp executable not found")
|
|
|
- }
|
|
|
+ re := regexp.MustCompile(`CUDA Version: (\d+\.\d+)`)
|
|
|
+ matches := re.FindStringSubmatch(string(output))
|
|
|
+ if len(matches) < 2 {
|
|
|
+ return -1, errors.New("could not find CUDA version")
|
|
|
+ }
|
|
|
+
|
|
|
+ cudaVersion := matches[1]
|
|
|
+ cudaVersionParts := strings.Split(cudaVersion, ".")
|
|
|
+ cudaMajorVersion, err := strconv.Atoi(cudaVersionParts[0])
|
|
|
+ if err != nil {
|
|
|
+ return -1, err
|
|
|
+ }
|
|
|
+ return cudaMajorVersion, nil
|
|
|
+}
|
|
|
+
|
|
|
+func chooseRunner(runnerType string) string {
|
|
|
+ tmpDir, err := os.MkdirTemp("", "llama-*")
|
|
|
+ if err != nil {
|
|
|
+ log.Fatalf("llama.cpp: failed to create temp dir: %v", err)
|
|
|
}
|
|
|
|
|
|
+ cpuPath := osPath(path.Join("llama.cpp", runnerType, "build", "cpu", "bin"))
|
|
|
+ llamaPath := cpuPath
|
|
|
files := []string{"server"}
|
|
|
+
|
|
|
+ // Set OS specific llama.cpp runner paths
|
|
|
switch runtime.GOOS {
|
|
|
- case "windows":
|
|
|
- files = []string{"server.exe"}
|
|
|
case "darwin":
|
|
|
- if llamaPath == osPath(gpuPath) {
|
|
|
- files = append(files, "ggml-metal.metal")
|
|
|
- }
|
|
|
+ // TODO: change to check metal version
|
|
|
+ llamaPath = osPath(path.Join("llama.cpp", runnerType, "build", "gpu", "bin"))
|
|
|
+ files = append(files, "ggml-metal.metal")
|
|
|
case "linux":
|
|
|
- // check if there is a GPU available
|
|
|
- if _, err := CheckVRAM(); errors.Is(err, errNoGPU) {
|
|
|
- // this error was logged on start-up, so we don't need to log it again
|
|
|
- llamaPath = osPath(cpuPath)
|
|
|
+ cudaVersion, err := cudaVersion()
|
|
|
+ if err != nil {
|
|
|
+ // fallback to CPU runner in the following the CUDA version check
|
|
|
+ log.Printf("failed to get CUDA version: %v", err)
|
|
|
+ }
|
|
|
+
|
|
|
+ switch cudaVersion {
|
|
|
+ case 11, 12:
|
|
|
+ cudaDir := fmt.Sprintf("cuda-%d", cudaVersion)
|
|
|
+ llamaPath = osPath(path.Join("llama.cpp", runnerType, "build", cudaDir, "bin"))
|
|
|
+ default:
|
|
|
+ if cudaVersion != -1 {
|
|
|
+ // a valid version was returned but it is not supported
|
|
|
+ log.Printf("CUDA version %d not supported, falling back to CPU", cudaVersion)
|
|
|
+ }
|
|
|
+ llamaPath = cpuPath
|
|
|
+ }
|
|
|
+ case "windows":
|
|
|
+ // TODO: select windows GPU runner here when available
|
|
|
+ files = []string{"server.exe"}
|
|
|
+ default:
|
|
|
+ log.Printf("unknown OS, running on CPU: %s", runtime.GOOS)
|
|
|
+ }
|
|
|
+
|
|
|
+ // check if the runner exists, if not fallback to CPU runner
|
|
|
+ if _, err := fs.Stat(llamaCppEmbed, llamaPath); err != nil {
|
|
|
+ // fallback to CPU runner
|
|
|
+ llamaPath = cpuPath
|
|
|
+ files = []string{"server"}
|
|
|
+ if _, err := fs.Stat(llamaCppEmbed, llamaPath); err != nil {
|
|
|
+ log.Fatalf("llama.cpp executable not found")
|
|
|
}
|
|
|
+ log.Printf("llama.cpp %s executable not found, falling back to cpu", runnerType)
|
|
|
}
|
|
|
|
|
|
+ // copy the files locally to run the llama.cpp server
|
|
|
for _, f := range files {
|
|
|
srcPath := path.Join(llamaPath, f)
|
|
|
destPath := filepath.Join(tmpDir, f)
|