1 年之前 · 380378cc80
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -166,6 +166,12 @@ func GetGPUInfo() GpuInfoList {
 
				 		slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
			
 
				 	}
			
 
				 
			
 
				+	// On windows we bundle the nvidia library one level above the runner dir
			
 
				+	depPath := ""
			
 
				+	if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
			
 
				+		depPath = filepath.Dir(envconfig.RunnersDir)
			
 
				+	}
			
 
				+
			
 
				 	var memInfo C.mem_info_t
			
 
				 	resp := []GpuInfo{}
			
 
				 
			
@@ -198,6 +204,7 @@ func GetGPUInfo() GpuInfoList {
 
				 		gpuInfo.Major = int(memInfo.major)
			
 
				 		gpuInfo.Minor = int(memInfo.minor)
			
 
				 		gpuInfo.MinimumMemory = cudaMinimumMemory
			
 
				+		gpuInfo.DependencyPath = depPath
			
 
				 
			
 
				 		// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
			
 
				 		resp = append(resp, gpuInfo)
			
--- a/llm/server.go
+++ b/llm/server.go
@@ -233,13 +233,13 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 
				 		if runtime.GOOS == "windows" {
			
 
				 			pathEnv = "PATH"
			
 
				 		}
			
 
				-		// append the server directory to LD_LIBRARY_PATH/PATH
			
 
				+		// prepend the server directory to LD_LIBRARY_PATH/PATH
			
 
				 		libraryPaths := []string{dir}
			
 
				 
			
 
				 		if libraryPath, ok := os.LookupEnv(pathEnv); ok {
			
 
				 			// Append our runner directory to the path
			
 
				 			// This will favor system libraries over our bundled library dependencies
			
 
				-			libraryPaths = append(filepath.SplitList(libraryPath), libraryPaths...)
			
 
				+			libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...)
			
 
				 		}
			
 
				 
			
 
				 		// Note: we always put the dependency path first
			
@@ -275,15 +275,31 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 
				 			sem:           semaphore.NewWeighted(int64(numParallel)),
			
 
				 		}
			
 
				 
			
 
				-		libEnv := fmt.Sprintf("%s=%s", pathEnv, strings.Join(libraryPaths, string(filepath.ListSeparator)))
			
 
				-		s.cmd.Env = append(os.Environ(), libEnv)
			
 
				+		s.cmd.Env = os.Environ()
			
 
				 		s.cmd.Stdout = os.Stdout
			
 
				 		s.cmd.Stderr = s.status
			
 
				 
			
 
				-		// TODO - multiple GPU selection logic...
			
 
				-		key, val := gpu.GpuInfoList(gpus).GetVisibleDevicesEnv()
			
 
				-		if key != "" {
			
 
				-			s.cmd.Env = append(s.cmd.Env, key+"="+val)
			
 
				+		visibleDevicesEnv, visibleDevicesEnvVal := gpu.GpuInfoList(gpus).GetVisibleDevicesEnv()
			
 
				+		pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator))
			
 
				+
			
 
				+		// Update or add the path and visible devices variable with our adjusted version
			
 
				+		pathNeeded := true
			
 
				+		devicesNeeded := visibleDevicesEnv != ""
			
 
				+		for i := range s.cmd.Env {
			
 
				+			cmp := strings.SplitN(s.cmd.Env[i], "=", 2)
			
 
				+			if strings.EqualFold(cmp[0], pathEnv) {
			
 
				+				s.cmd.Env[i] = pathEnv + "=" + pathEnvVal
			
 
				+				pathNeeded = false
			
 
				+			} else if devicesNeeded && strings.EqualFold(cmp[0], visibleDevicesEnv) {
			
 
				+				s.cmd.Env[i] = visibleDevicesEnv + "=" + visibleDevicesEnvVal
			
 
				+				devicesNeeded = false
			
 
				+			}
			
 
				+		}
			
 
				+		if pathNeeded {
			
 
				+			s.cmd.Env = append(s.cmd.Env, pathEnv+"="+pathEnvVal)
			
 
				+		}
			
 
				+		if devicesNeeded {
			
 
				+			s.cmd.Env = append(s.cmd.Env, visibleDevicesEnv+"="+visibleDevicesEnvVal)
			
 
				 		}
			
 
				 
			
 
				 		slog.Info("starting llama server", "cmd", s.cmd.String())
			
@@ -300,13 +316,6 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 
				 			continue
			
 
				 		}
			
 
				 
			
 
				-		// TODO - make sure this is all wired up correctly
			
 
				-		// if err = s.WaitUntilRunning(); err != nil {
			
 
				-		// 	slog.Error("error starting llama server", "server", servers[i], "error", err)
			
 
				-		// 	s.Close()
			
 
				-		// 	finalErr = err
			
 
				-		// 	continue
			
 
				-		// }
			
 
				 		return s, nil
			
 
				 	}