瀏覽代碼

Use our libraries first

Trying to live off the land for cuda libraries was not the right strategy.  We need to use the version we compiled against to ensure things work properly
Daniel Hiltgen 1 年之前
父節點
當前提交
380378cc80
共有 2 個文件被更改,包括 31 次插入15 次删除
  1. 7 0
      gpu/gpu.go
  2. 24 15
      llm/server.go

+ 7 - 0
gpu/gpu.go

@@ -166,6 +166,12 @@ func GetGPUInfo() GpuInfoList {
 		slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
 		slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
 	}
 	}
 
 
+	// On windows we bundle the nvidia library one level above the runner dir
+	depPath := ""
+	if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
+		depPath = filepath.Dir(envconfig.RunnersDir)
+	}
+
 	var memInfo C.mem_info_t
 	var memInfo C.mem_info_t
 	resp := []GpuInfo{}
 	resp := []GpuInfo{}
 
 
@@ -198,6 +204,7 @@ func GetGPUInfo() GpuInfoList {
 		gpuInfo.Major = int(memInfo.major)
 		gpuInfo.Major = int(memInfo.major)
 		gpuInfo.Minor = int(memInfo.minor)
 		gpuInfo.Minor = int(memInfo.minor)
 		gpuInfo.MinimumMemory = cudaMinimumMemory
 		gpuInfo.MinimumMemory = cudaMinimumMemory
+		gpuInfo.DependencyPath = depPath
 
 
 		// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
 		// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
 		resp = append(resp, gpuInfo)
 		resp = append(resp, gpuInfo)

+ 24 - 15
llm/server.go

@@ -233,13 +233,13 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 		if runtime.GOOS == "windows" {
 		if runtime.GOOS == "windows" {
 			pathEnv = "PATH"
 			pathEnv = "PATH"
 		}
 		}
-		// append the server directory to LD_LIBRARY_PATH/PATH
+		// prepend the server directory to LD_LIBRARY_PATH/PATH
 		libraryPaths := []string{dir}
 		libraryPaths := []string{dir}
 
 
 		if libraryPath, ok := os.LookupEnv(pathEnv); ok {
 		if libraryPath, ok := os.LookupEnv(pathEnv); ok {
 			// Append our runner directory to the path
 			// Append our runner directory to the path
 			// This will favor system libraries over our bundled library dependencies
 			// This will favor system libraries over our bundled library dependencies
-			libraryPaths = append(filepath.SplitList(libraryPath), libraryPaths...)
+			libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...)
 		}
 		}
 
 
 		// Note: we always put the dependency path first
 		// Note: we always put the dependency path first
@@ -275,15 +275,31 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 			sem:           semaphore.NewWeighted(int64(numParallel)),
 			sem:           semaphore.NewWeighted(int64(numParallel)),
 		}
 		}
 
 
-		libEnv := fmt.Sprintf("%s=%s", pathEnv, strings.Join(libraryPaths, string(filepath.ListSeparator)))
-		s.cmd.Env = append(os.Environ(), libEnv)
+		s.cmd.Env = os.Environ()
 		s.cmd.Stdout = os.Stdout
 		s.cmd.Stdout = os.Stdout
 		s.cmd.Stderr = s.status
 		s.cmd.Stderr = s.status
 
 
-		// TODO - multiple GPU selection logic...
-		key, val := gpu.GpuInfoList(gpus).GetVisibleDevicesEnv()
-		if key != "" {
-			s.cmd.Env = append(s.cmd.Env, key+"="+val)
+		visibleDevicesEnv, visibleDevicesEnvVal := gpu.GpuInfoList(gpus).GetVisibleDevicesEnv()
+		pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator))
+
+		// Update or add the path and visible devices variable with our adjusted version
+		pathNeeded := true
+		devicesNeeded := visibleDevicesEnv != ""
+		for i := range s.cmd.Env {
+			cmp := strings.SplitN(s.cmd.Env[i], "=", 2)
+			if strings.EqualFold(cmp[0], pathEnv) {
+				s.cmd.Env[i] = pathEnv + "=" + pathEnvVal
+				pathNeeded = false
+			} else if devicesNeeded && strings.EqualFold(cmp[0], visibleDevicesEnv) {
+				s.cmd.Env[i] = visibleDevicesEnv + "=" + visibleDevicesEnvVal
+				devicesNeeded = false
+			}
+		}
+		if pathNeeded {
+			s.cmd.Env = append(s.cmd.Env, pathEnv+"="+pathEnvVal)
+		}
+		if devicesNeeded {
+			s.cmd.Env = append(s.cmd.Env, visibleDevicesEnv+"="+visibleDevicesEnvVal)
 		}
 		}
 
 
 		slog.Info("starting llama server", "cmd", s.cmd.String())
 		slog.Info("starting llama server", "cmd", s.cmd.String())
@@ -300,13 +316,6 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 			continue
 			continue
 		}
 		}
 
 
-		// TODO - make sure this is all wired up correctly
-		// if err = s.WaitUntilRunning(); err != nil {
-		// 	slog.Error("error starting llama server", "server", servers[i], "error", err)
-		// 	s.Close()
-		// 	finalErr = err
-		// 	continue
-		// }
 		return s, nil
 		return s, nil
 	}
 	}