8 kuukautta sitten · f9e31da946
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -183,10 +183,17 @@ jobs:
 
				           name: windows-rocm-deps
			
 
				           path: dist/deps/*
			
 
				 
			
 
				-  # CUDA v11 generation step
			
 
				-  generate-windows-cuda-v11:
			
 
				+  # CUDA generation step
			
 
				+  generate-windows-cuda:
			
 
				     environment: release
			
 
				     runs-on: windows
			
 
				+    strategy:
			
 
				+      matrix:
			
 
				+        cuda:
			
 
				+          - version: "11"
			
 
				+            url: 'https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe'
			
 
				+          - version: "12"
			
 
				+            url: 'https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe'
			
 
				     env:
			
 
				       KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
			
 
				     steps:
			
@@ -220,11 +227,11 @@ jobs:
 
				         with:
			
 
				           go-version-file: go.mod
			
 
				           cache: true
			
 
				-      - name: 'Install CUDA'
			
 
				+      - name: 'Install CUDA ${{ matrix.cuda.version }}'
			
 
				         run: |
			
 
				           $ErrorActionPreference = "Stop"
			
 
				           write-host "downloading CUDA Installer"
			
 
				-          Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
			
 
				+          Invoke-WebRequest -Uri "${{ matrix.cuda.url }}" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
			
 
				           write-host "Installing CUDA"
			
 
				           Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
			
 
				           write-host "Completed CUDA"
			
@@ -256,7 +263,7 @@ jobs:
 
				           cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
			
 
				       - uses: actions/upload-artifact@v4
			
 
				         with:
			
 
				-          name: generate-windows-cuda-v11
			
 
				+          name: generate-windows-cuda-${{ matrix.cuda.version }}
			
 
				           path: |
			
 
				             llm/build/**/bin/*
			
 
				             dist/windows-amd64/**
			
@@ -265,95 +272,13 @@ jobs:
 
				           name: windows-cuda-deps
			
 
				           path: dist/deps/*
			
 
				 
			
 
				-  # CUDA v12 generation step
			
 
				-  generate-windows-cuda-v12:
			
 
				-    environment: release
			
 
				-    runs-on: windows
			
 
				-    env:
			
 
				-      KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
			
 
				-    steps:
			
 
				-      - uses: actions/checkout@v4
			
 
				-      - name: Set Version
			
 
				-        shell: bash
			
 
				-        run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
			
 
				-      - uses: 'google-github-actions/auth@v2'
			
 
				-        with:
			
 
				-          project_id: 'ollama'
			
 
				-          credentials_json: '${{ secrets.GOOGLE_SIGNING_CREDENTIALS }}'
			
 
				-      - run: echo "${{ vars.OLLAMA_CERT }}" > ollama_inc.crt
			
 
				-      - name: install Windows SDK 8.1 to get signtool
			
 
				-        run: |
			
 
				-          $ErrorActionPreference = "Stop"
			
 
				-          write-host "downloading SDK"
			
 
				-          Invoke-WebRequest -Uri "https://go.microsoft.com/fwlink/p/?LinkId=323507" -OutFile "${env:RUNNER_TEMP}\sdksetup.exe"
			
 
				-          Start-Process "${env:RUNNER_TEMP}\sdksetup.exe" -ArgumentList @("/q") -NoNewWindow -Wait
			
 
				-          write-host "Win SDK 8.1 installed"
			
 
				-          gci -path 'C:\Program Files (x86)\Windows Kits\' -r -fi 'signtool.exe'
			
 
				-      - name: install signing plugin
			
 
				-        run: |
			
 
				-          $ErrorActionPreference = "Stop"
			
 
				-          write-host "downloading plugin"
			
 
				-          Invoke-WebRequest -Uri "https://github.com/GoogleCloudPlatform/kms-integrations/releases/download/cng-v1.0/kmscng-1.0-windows-amd64.zip" -OutFile "${env:RUNNER_TEMP}\plugin.zip"
			
 
				-          Expand-Archive -Path "${env:RUNNER_TEMP}\plugin.zip" -DestinationPath ${env:RUNNER_TEMP}\plugin\
			
 
				-          write-host "Installing plugin"
			
 
				-          & "${env:RUNNER_TEMP}\plugin\*\kmscng.msi" /quiet
			
 
				-          write-host "plugin installed"
			
 
				-      - uses: actions/setup-go@v5
			
 
				-        with:
			
 
				-          go-version-file: go.mod
			
 
				-          cache: true
			
 
				-      - name: 'Install CUDA'
			
 
				-        run: |
			
 
				-          $ErrorActionPreference = "Stop"
			
 
				-          write-host "downloading CUDA Installer"
			
 
				-          Invoke-WebRequest -Uri "https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe" -OutFile "${env:RUNNER_TEMP}\cuda-install.exe"
			
 
				-          write-host "Installing CUDA"
			
 
				-          Start-Process "${env:RUNNER_TEMP}\cuda-install.exe" -ArgumentList '-s' -NoNewWindow -Wait
			
 
				-          write-host "Completed CUDA"
			
 
				-          $cudaPath=((resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0].path | split-path | split-path)
			
 
				-          $cudaVer=($cudaPath | split-path -leaf ) -replace 'v(\d+).(\d+)', '$1_$2' 
			
 
				-          echo "$cudaPath\bin" >> $env:GITHUB_PATH
			
 
				-          echo "CUDA_PATH=$cudaPath" >> $env:GITHUB_ENV
			
 
				-          echo "CUDA_PATH_V${cudaVer}=$cudaPath" >> $env:GITHUB_ENV
			
 
				-          echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" >> $env:GITHUB_ENV
			
 
				-      - name: 'Verify CUDA'
			
 
				-        run: nvcc -V
			
 
				-      - run: go get ./...
			
 
				-      - name: go generate
			
 
				-        run: |
			
 
				-          $gopath=(get-command go).source | split-path -parent
			
 
				-          $cudabin=(get-command nvcc).source | split-path
			
 
				-          & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
			
 
				-          cd $env:GITHUB_WORKSPACE
			
 
				-          $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
			
 
				-          $env:PATH="$gopath;$cudabin;$env:PATH"
			
 
				-          $env:OLLAMA_SKIP_CPU_GENERATE="1"
			
 
				-          go generate -x ./...
			
 
				-      - name: 'gather cuda dependencies'
			
 
				-        run: |
			
 
				-          $NVIDIA_DIR=(resolve-path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*\bin\')[0]
			
 
				-          md "dist\deps"
			
 
				-          cp "${NVIDIA_DIR}\cudart64_*.dll" "dist\deps\"
			
 
				-          cp "${NVIDIA_DIR}\cublas64_*.dll" "dist\deps\"
			
 
				-          cp "${NVIDIA_DIR}\cublasLt64_*.dll" "dist\deps\"
			
 
				-      - uses: actions/upload-artifact@v4
			
 
				-        with:
			
 
				-          name: generate-windows-cuda-v12
			
 
				-          path: |
			
 
				-            llm/build/**/bin/*
			
 
				-            dist/windows-amd64/**
			
 
				-      - uses: actions/upload-artifact@v4
			
 
				-        with:
			
 
				-          name: windows-cuda-deps
			
 
				-          path: dist/deps/*
			
 
				 
			
 
				   # Import the prior generation steps and build the final windows assets
			
 
				   build-windows:
			
 
				     environment: release
			
 
				     runs-on: windows
			
 
				     needs:
			
 
				-      - generate-windows-cuda-v11
			
 
				-      - generate-windows-cuda-v12
			
 
				+      - generate-windows-cuda
			
 
				       - generate-windows-rocm
			
 
				       - generate-windows-cpu
			
 
				     env:
			
@@ -397,7 +322,10 @@ jobs:
 
				           name: generate-windows-cpu
			
 
				       - uses: actions/download-artifact@v4
			
 
				         with:
			
 
				-          name: generate-windows-cuda-v11
			
 
				+          name: generate-windows-cuda-11
			
 
				+      - uses: actions/download-artifact@v4
			
 
				+        with:
			
 
				+          name: generate-windows-cuda-12
			
 
				       - uses: actions/download-artifact@v4
			
 
				         with:
			
 
				           name: windows-cuda-deps
			
--- a/docs/linux.md
+++ b/docs/linux.md
@@ -20,12 +20,12 @@ GPU.
 
				 
			
 
				 ## Manual install
			
 
				 
			
 
				-### Download the `ollama` tar file
			
 
				+### Download `ollama`
			
 
				 
			
 
				-Ollama is distributed as a tar file including GPU library dependencies.
			
 
				+Download and extract the Linux package:
			
 
				 
			
 
				 ```bash
			
 
				-curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar -C /usr -zxf -
			
 
				+curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr
			
 
				 ```
			
 
				 
			
 
				 ### Adding Ollama as a startup service (recommended)
			
@@ -95,7 +95,7 @@ curl -fsSL https://ollama.com/install.sh | sh
 
				 Or by downloading the ollama binary:
			
 
				 
			
 
				 ```bash
			
 
				-curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar -C /usr -zxf -
			
 
				+curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | sudo tar zx -C /usr
			
 
				 ```
			
 
				 
			
 
				 ## Installing specific versions
			
--- a/gpu/cuda_common.go
+++ b/gpu/cuda_common.go
@@ -28,7 +28,7 @@ func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
 
				 	return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",")
			
 
				 }
			
 
				 
			
 
				-func cudaGetVariant(gpuInfo CudaGPUInfo) string {
			
 
				+func cudaVariant(gpuInfo CudaGPUInfo) string {
			
 
				 	if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
			
 
				 		if CudaTegra != "" {
			
 
				 			ver := strings.Split(CudaTegra, ".")
			
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -225,7 +225,7 @@ func GetGPUInfo() GpuInfoList {
 
				 			return GpuInfoList{cpus[0].GpuInfo}
			
 
				 		}
			
 
				 
			
 
				-		depPath := GetDepDir()
			
 
				+		depPath := LibraryDir()
			
 
				 
			
 
				 		// Load ALL libraries
			
 
				 		cHandles = initCudaHandles()
			
@@ -264,20 +264,20 @@ func GetGPUInfo() GpuInfoList {
 
				 				gpuInfo.computeMajor = int(memInfo.major)
			
 
				 				gpuInfo.computeMinor = int(memInfo.minor)
			
 
				 				gpuInfo.MinimumMemory = cudaMinimumMemory
			
 
				-				cudaVariant := cudaGetVariant(gpuInfo)
			
 
				+				variant := cudaVariant(gpuInfo)
			
 
				 				if depPath != "" {
			
 
				 					gpuInfo.DependencyPath = depPath
			
 
				 					// Check for variant specific directory
			
 
				-					if cudaVariant != "" {
			
 
				-						if _, err := os.Stat(filepath.Join(depPath, "cuda_"+cudaVariant)); err == nil {
			
 
				-							gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+cudaVariant)
			
 
				+					if variant != "" {
			
 
				+						if _, err := os.Stat(filepath.Join(depPath, "cuda_"+variant)); err == nil {
			
 
				+							gpuInfo.DependencyPath = filepath.Join(depPath, "cuda_"+variant)
			
 
				 						}
			
 
				 					}
			
 
				 				}
			
 
				 				gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
			
 
				 				gpuInfo.DriverMajor = driverMajor
			
 
				 				gpuInfo.DriverMinor = driverMinor
			
 
				-				gpuInfo.Variant = cudaGetVariant(gpuInfo)
			
 
				+				gpuInfo.Variant = variant
			
 
				 
			
 
				 				// query the management library as well so we can record any skew between the two
			
 
				 				// which represents overhead on the GPU we must set aside on subsequent updates
			
@@ -468,7 +468,7 @@ func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
 
				 	slog.Debug("Searching for GPU library", "name", baseLibName)
			
 
				 
			
 
				 	// Start with our bundled libraries
			
 
				-	patterns := []string{filepath.Join(GetDepDir(), baseLibName)}
			
 
				+	patterns := []string{filepath.Join(LibraryDir(), baseLibName)}
			
 
				 
			
 
				 	switch runtime.GOOS {
			
 
				 	case "windows":
			
@@ -642,7 +642,7 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-func GetDepDir() string {
			
 
				+func LibraryDir() string {
			
 
				 	// On Windows/linux we bundle the dependencies at the same level as the executable
			
 
				 	appExe, err := os.Executable()
			
 
				 	if err != nil {
			
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -117,7 +117,7 @@ function build {
 
				     if ($cmakeDefs -contains "-G") {
			
 
				         $extra=@("-j8")
			
 
				     } else {
			
 
				-        $extra= @("--", "/p:CL_MPcount=8")
			
 
				+        $extra= @("--", "/maxCpuCount:8")
			
 
				     }
			
 
				     write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ }) $extra"
			
 
				     & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) $extra
			
@@ -273,7 +273,7 @@ function build_cuda() {
 
				             "-DGGML_CUDA=ON",
			
 
				             "-DGGML_AVX=on",
			
 
				             "-DGGML_AVX2=off",
			
 
				-            "-DCMAKE_CUDA_FLAGS=-t8",
			
 
				+            "-DCMAKE_CUDA_FLAGS=-t6",
			
 
				             "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}",
			
 
				             "-DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=$env:CUDA_PATH"
			
 
				             )