Browse Source

Use our libraries first

Trying to live off the land for cuda libraries was not the right strategy.  We need to use the version we compiled against to ensure things work properly
Daniel Hiltgen 1 năm trước cách đây
mục cha
commit
380378cc80
2 tập tin đã thay đổi với 31 bổ sung15 xóa
  1. 7 0
      gpu/gpu.go
  2. 24 15
      llm/server.go

+ 7 - 0
gpu/gpu.go

@@ -166,6 +166,12 @@ func GetGPUInfo() GpuInfoList {
 		slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
 	}
 
+	// On windows we bundle the nvidia library one level above the runner dir
+	depPath := ""
+	if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
+		depPath = filepath.Dir(envconfig.RunnersDir)
+	}
+
 	var memInfo C.mem_info_t
 	resp := []GpuInfo{}
 
@@ -198,6 +204,7 @@ func GetGPUInfo() GpuInfoList {
 		gpuInfo.Major = int(memInfo.major)
 		gpuInfo.Minor = int(memInfo.minor)
 		gpuInfo.MinimumMemory = cudaMinimumMemory
+		gpuInfo.DependencyPath = depPath
 
 		// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
 		resp = append(resp, gpuInfo)

+ 24 - 15
llm/server.go

@@ -233,13 +233,13 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 		if runtime.GOOS == "windows" {
 			pathEnv = "PATH"
 		}
-		// append the server directory to LD_LIBRARY_PATH/PATH
+		// prepend the server directory to LD_LIBRARY_PATH/PATH
 		libraryPaths := []string{dir}
 
 		if libraryPath, ok := os.LookupEnv(pathEnv); ok {
 			// Append our runner directory to the path
 			// This will favor system libraries over our bundled library dependencies
-			libraryPaths = append(filepath.SplitList(libraryPath), libraryPaths...)
+			libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...)
 		}
 
 		// Note: we always put the dependency path first
@@ -275,15 +275,31 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 			sem:           semaphore.NewWeighted(int64(numParallel)),
 		}
 
-		libEnv := fmt.Sprintf("%s=%s", pathEnv, strings.Join(libraryPaths, string(filepath.ListSeparator)))
-		s.cmd.Env = append(os.Environ(), libEnv)
+		s.cmd.Env = os.Environ()
 		s.cmd.Stdout = os.Stdout
 		s.cmd.Stderr = s.status
 
-		// TODO - multiple GPU selection logic...
-		key, val := gpu.GpuInfoList(gpus).GetVisibleDevicesEnv()
-		if key != "" {
-			s.cmd.Env = append(s.cmd.Env, key+"="+val)
+		visibleDevicesEnv, visibleDevicesEnvVal := gpu.GpuInfoList(gpus).GetVisibleDevicesEnv()
+		pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator))
+
+		// Update or add the path and visible devices variable with our adjusted version
+		pathNeeded := true
+		devicesNeeded := visibleDevicesEnv != ""
+		for i := range s.cmd.Env {
+			cmp := strings.SplitN(s.cmd.Env[i], "=", 2)
+			if strings.EqualFold(cmp[0], pathEnv) {
+				s.cmd.Env[i] = pathEnv + "=" + pathEnvVal
+				pathNeeded = false
+			} else if devicesNeeded && strings.EqualFold(cmp[0], visibleDevicesEnv) {
+				s.cmd.Env[i] = visibleDevicesEnv + "=" + visibleDevicesEnvVal
+				devicesNeeded = false
+			}
+		}
+		if pathNeeded {
+			s.cmd.Env = append(s.cmd.Env, pathEnv+"="+pathEnvVal)
+		}
+		if devicesNeeded {
+			s.cmd.Env = append(s.cmd.Env, visibleDevicesEnv+"="+visibleDevicesEnvVal)
 		}
 
 		slog.Info("starting llama server", "cmd", s.cmd.String())
@@ -300,13 +316,6 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 			continue
 		}
 
-		// TODO - make sure this is all wired up correctly
-		// if err = s.WaitUntilRunning(); err != nil {
-		// 	slog.Error("error starting llama server", "server", servers[i], "error", err)
-		// 	s.Close()
-		// 	finalErr = err
-		// 	continue
-		// }
 		return s, nil
 	}