1 gadu atpakaļ · 3c8df3808b
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -11,12 +11,12 @@ jobs:
 
				   generate:
			
 
				     strategy:
			
 
				       matrix:
			
 
				-        os: [ubuntu-latest, macos-latest, windows-latest]
			
 
				+        os: [ubuntu-latest, macos-latest, windows-2019]
			
 
				         arch: [amd64, arm64]
			
 
				         exclude:
			
 
				           - os: ubuntu-latest
			
 
				             arch: arm64
			
 
				-          - os: windows-latest
			
 
				+          - os: windows-2019
			
 
				             arch: arm64
			
 
				     runs-on: ${{ matrix.os }}
			
 
				     env:
			
@@ -28,7 +28,18 @@ jobs:
 
				           go-version: '1.22'
			
 
				           cache: true
			
 
				       - run: go get ./...
			
 
				+      - run: |
			
 
				+          $gopath=(get-command go).source | split-path -parent
			
 
				+          & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
			
 
				+          cd $env:GITHUB_WORKSPACE
			
 
				+          $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
			
 
				+          $env:PATH="$gopath;$env:PATH"
			
 
				+          go generate -x ./...
			
 
				+        if: ${{ startsWith(matrix.os, 'windows-') }}
			
 
				+        name: "Windows Go Generate"
			
 
				       - run: go generate -x ./...
			
 
				+        if: ${{ ! startsWith(matrix.os, 'windows-') }}
			
 
				+        name: "Unix Go Generate"
			
 
				       - uses: actions/upload-artifact@v4
			
 
				         with:
			
 
				           name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
			
@@ -66,7 +77,6 @@ jobs:
 
				     strategy:
			
 
				       matrix:
			
 
				         rocm-version:
			
 
				-          - '5.7.1'
			
 
				           - '6.0'
			
 
				     runs-on: linux
			
 
				     container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}
			
@@ -95,12 +105,12 @@ jobs:
 
				   lint:
			
 
				     strategy:
			
 
				       matrix:
			
 
				-        os: [ubuntu-latest, macos-latest, windows-latest]
			
 
				+        os: [ubuntu-latest, macos-latest, windows-2019]
			
 
				         arch: [amd64, arm64]
			
 
				         exclude:
			
 
				           - os: ubuntu-latest
			
 
				             arch: arm64
			
 
				-          - os: windows-latest
			
 
				+          - os: windows-2019
			
 
				             arch: arm64
			
 
				           - os: macos-latest
			
 
				             arch: amd64
			
@@ -134,12 +144,12 @@ jobs:
 
				     needs: generate
			
 
				     strategy:
			
 
				       matrix:
			
 
				-        os: [ubuntu-latest, macos-latest, windows-latest]
			
 
				+        os: [ubuntu-latest, macos-latest, windows-2019]
			
 
				         arch: [amd64]
			
 
				         exclude:
			
 
				           - os: ubuntu-latest
			
 
				             arch: arm64
			
 
				-          - os: windows-latest
			
 
				+          - os: windows-2019
			
 
				             arch: arm64
			
 
				     runs-on: ${{ matrix.os }}
			
 
				     env:
			
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,7 @@
 
				 ARG GOLANG_VERSION=1.22.1
			
 
				 ARG CMAKE_VERSION=3.22.1
			
 
				 ARG CUDA_VERSION=11.3.1
			
 
				+ARG ROCM_VERSION=6.0
			
 
				 
			
 
				 # Copy the minimal context we need to run the generate scripts
			
 
				 FROM scratch AS llm-code
			
@@ -28,7 +29,7 @@ WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
 
				 ARG CGO_CFLAGS
			
 
				 RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
			
 
				 
			
 
				-FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64
			
 
				+FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS rocm-build-amd64
			
 
				 ARG CMAKE_VERSION
			
 
				 COPY ./scripts/rh_linux_deps.sh /
			
 
				 RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
			
@@ -39,18 +40,14 @@ WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
 
				 ARG CGO_CFLAGS
			
 
				 ARG AMDGPU_TARGETS
			
 
				 RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
			
 
				+RUN mkdir /tmp/scratch && \
			
 
				+    for dep in $(cat /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/x86_64/rocm*/lib/deps.txt) ; do \
			
 
				+        cp ${dep} /tmp/scratch/ || exit 1 ; \
			
 
				+    done && \
			
 
				+    (cd /opt/rocm/lib && tar cf - rocblas/library) | (cd /tmp/scratch/ && tar xf - ) && \
			
 
				+    mkdir -p /go/src/github.com/jmorganca/ollama/dist/deps/ && \
			
 
				+    (cd /tmp/scratch/ && tar czvf /go/src/github.com/jmorganca/ollama/dist/deps/rocm-amd64-deps.tgz . )
			
 
				 
			
 
				-FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64
			
 
				-ARG CMAKE_VERSION
			
 
				-COPY ./scripts/rh_linux_deps.sh /
			
 
				-RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
			
 
				-ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
			
 
				-ENV LIBRARY_PATH /opt/amdgpu/lib64
			
 
				-COPY --from=llm-code / /go/src/github.com/jmorganca/ollama/
			
 
				-WORKDIR /go/src/github.com/jmorganca/ollama/llm/generate
			
 
				-ARG CGO_CFLAGS
			
 
				-ARG AMDGPU_TARGETS
			
 
				-RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
			
 
				 
			
 
				 FROM --platform=linux/amd64 centos:7 AS cpu-builder-amd64
			
 
				 ARG CMAKE_VERSION
			
@@ -91,8 +88,8 @@ COPY . .
 
				 COPY --from=cpu_avx-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
			
 
				 COPY --from=cpu_avx2-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
			
 
				 COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
			
 
				-COPY --from=rocm-5-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
			
 
				-COPY --from=rocm-6-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
			
 
				+COPY --from=rocm-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
			
 
				+COPY --from=rocm-build-amd64 /go/src/github.com/jmorganca/ollama/dist/deps/ ./dist/deps/
			
 
				 ARG GOFLAGS
			
 
				 ARG CGO_CFLAGS
			
 
				 RUN go build .
			
@@ -117,7 +114,7 @@ RUN apt-get update && apt-get install -y ca-certificates
 
				 COPY --from=build-arm64 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
			
 
				 
			
 
				 # Radeon images are much larger so we keep it distinct from the CPU/CUDA image
			
 
				-FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete as runtime-rocm
			
 
				+FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete as runtime-rocm
			
 
				 RUN update-pciids
			
 
				 COPY --from=build-amd64 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
			
 
				 EXPOSE 11434
			
--- a/app/ollama.iss
+++ b/app/ollama.iss
@@ -91,6 +91,14 @@ Source: "..\ollama.exe"; DestDir: "{app}"; Flags: ignoreversion 64bit
 
				 Source: "..\dist\windeps\*.dll"; DestDir: "{app}"; Flags: ignoreversion 64bit
			
 
				 Source: "..\dist\ollama_welcome.ps1"; DestDir: "{app}"; Flags: ignoreversion
			
 
				 Source: ".\assets\app.ico"; DestDir: "{app}"; Flags: ignoreversion
			
 
				+; Assumes v5.7, may need adjustments for v6
			
 
				+#if GetEnv("HIP_PATH") != ""
			
 
				+  Source: "{#GetEnv('HIP_PATH')}\bin\hipblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
			
 
				+  Source: "{#GetEnv('HIP_PATH')}\bin\rocblas.dll"; DestDir: "{app}\rocm\"; Flags: ignoreversion
			
 
				+  ; amdhip64.dll dependency comes from the driver and must be installed already
			
 
				+  Source: "{#GetEnv('HIP_PATH')}\bin\rocblas\library\*"; DestDir: "{app}\rocm\rocblas\library\"; Flags: ignoreversion
			
 
				+#endif
			
 
				+
			
 
				 
			
 
				 [Icons]
			
 
				 Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; IconFilename: "{app}\app.ico"
			
--- a/docs/development.md
+++ b/docs/development.md
@@ -116,7 +116,7 @@ Note: The windows build for Ollama is still under development.
 
				 
			
 
				 Install required tools:
			
 
				 
			
 
				-- MSVC toolchain - C/C++ and cmake as minimal requirements
			
 
				+- MSVC toolchain - C/C++ and cmake as minimal requirements - You must build from a "Developer Shell" with the environment variables set
			
 
				 - go version 1.22 or higher
			
 
				 - MinGW (pick one variant) with GCC.
			
 
				   - <https://www.mingw-w64.org/>
			
@@ -132,6 +132,6 @@ go build .
 
				 
			
 
				 #### Windows CUDA (NVIDIA)
			
 
				 
			
 
				-In addition to the common Windows development tools described above, install:
			
 
				+In addition to the common Windows development tools described above, install CUDA **AFTER** you install MSVC.
			
 
				 
			
 
				 - [NVIDIA CUDA](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html)
			
--- a/docs/linux.md
+++ b/docs/linux.md
@@ -10,6 +10,14 @@ Install Ollama running this one-liner:
 
				 curl -fsSL https://ollama.com/install.sh | sh
			
 
				 ```
			
 
				 
			
 
				+## AMD Radeon GPU support
			
 
				+
			
 
				+While AMD has contributed the `amdgpu` driver upstream to the official linux
			
 
				+kernel source, the version is older and may not support all ROCm features. We
			
 
				+recommend you install the latest driver from
			
 
				+https://www.amd.com/en/support/linux-drivers for best support of your Radeon
			
 
				+GPU.
			
 
				+
			
 
				 ## Manual install
			
 
				 
			
 
				 ### Download the `ollama` binary
			
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -67,6 +67,43 @@ You can see what features your CPU has with the following.
 
				 cat /proc/cpuinfo| grep flags  | head -1

			
 
				 ```

			
 
				 

			
 
				+## AMD Radeon GPU Support

			
 
				+

			
 
				+Ollama leverages the AMD ROCm library, which does not support all AMD GPUs. In

			
 
				+some cases you can force the system to try to use a close GPU type.  For example

			
 
				+The Radeon RX 5400 is `gfx1034` (also known as 10.3.4) however, ROCm does not

			
 
				+support this patch-level, the closest support is `gfx1030`.  You can use the

			
 
				+environment variable `HSA_OVERRIDE_GFX_VERSION` with `x.y.z` syntax.  So for

			
 
				+example, to force the system to run on the RX 5400, you would set

			
 
				+`HSA_OVERRIDE_GFX_VERSION="10.3.0"` as an environment variable for the server.

			
 
				+

			
 
				+At this time, the known supported GPU types are the following: (This may change from

			
 
				+release to release)

			
 
				+- gfx900

			
 
				+- gfx906

			
 
				+- gfx908

			
 
				+- gfx90a

			
 
				+- gfx940

			
 
				+- gfx941

			
 
				+- gfx942

			
 
				+- gfx1030

			
 
				+- gfx1100

			
 
				+- gfx1101

			
 
				+- gfx1102

			
 
				+

			
 
				+This will not work for all unsupported GPUs.  Reach out on [Discord](https://discord.gg/ollama)

			
 
				+or file an [issue](https://github.com/ollama/ollama/issues) for additional help.

			
 
				+

			
 
				+

			
 
				+## Installing older versions on Linux

			
 
				+

			
 
				+If you run into problems on Linux and want to install an older version you can tell the install script

			
 
				+which version to install.

			
 
				+

			
 
				+```sh

			
 
				+curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION="0.1.27" sh

			
 
				+```

			
 
				+

			
 
				 ## Known issues

			
 
				 

			
 
				 * N/A
			
--- a/docs/windows.md
+++ b/docs/windows.md
@@ -4,7 +4,7 @@ Welcome to the Ollama Windows preview.
 
				 

			
 
				 No more WSL required!

			
 
				 

			
 
				-Ollama now runs as a native Windows application, including NVIDIA GPU support.

			
 
				+Ollama now runs as a native Windows application, including NVIDIA and AMD Radeon GPU support.

			
 
				 After installing Ollama Windows Preview, Ollama will run in the background and

			
 
				 the `ollama` command line is available in `cmd`, `powershell` or your favorite

			
 
				 terminal application. As usual the Ollama [api](./api.md) will be served on

			
@@ -21,6 +21,7 @@ Logs will often be helpful in dianosing the problem (see
 
				 

			
 
				 * Windows 10 or newer, Home or Pro

			
 
				 * NVIDIA 452.39 or newer Drivers if you have an NVIDIA card

			
 
				+* AMD Radeon Driver https://www.amd.com/en/support if you have a Radeon card

			
 
				 

			
 
				 ## API Access

			
 
				 

			
--- a/gpu/amd.go
+++ b/gpu/amd.go
@@ -1,101 +0,0 @@
 
				-package gpu
			
 
				-
			
 
				-import (
			
 
				-	"bufio"
			
 
				-	"errors"
			
 
				-	"fmt"
			
 
				-	"io"
			
 
				-	"log/slog"
			
 
				-	"os"
			
 
				-	"path/filepath"
			
 
				-	"strconv"
			
 
				-	"strings"
			
 
				-)
			
 
				-
			
 
				-// TODO - windows vs. non-windows vs darwin
			
 
				-
			
 
				-// Discovery logic for AMD/ROCm GPUs
			
 
				-
			
 
				-const (
			
 
				-	DriverVersionFile     = "/sys/module/amdgpu/version"
			
 
				-	GPUPropertiesFileGlob = "/sys/class/kfd/kfd/topology/nodes/*/properties"
			
 
				-	// TODO probably break these down per GPU to make the logic simpler
			
 
				-	GPUTotalMemoryFileGlob = "/sys/class/kfd/kfd/topology/nodes/*/mem_banks/*/properties" // size_in_bytes line
			
 
				-	GPUUsedMemoryFileGlob  = "/sys/class/kfd/kfd/topology/nodes/*/mem_banks/*/used_memory"
			
 
				-)
			
 
				-
			
 
				-func AMDDetected() bool {
			
 
				-	// Some driver versions (older?) don't have a version file, so just lookup the parent dir
			
 
				-	sysfsDir := filepath.Dir(DriverVersionFile)
			
 
				-	_, err := os.Stat(sysfsDir)
			
 
				-	if errors.Is(err, os.ErrNotExist) {
			
 
				-		slog.Debug("amd driver not detected " + sysfsDir)
			
 
				-		return false
			
 
				-	} else if err != nil {
			
 
				-		slog.Debug(fmt.Sprintf("error looking up amd driver %s %s", sysfsDir, err))
			
 
				-		return false
			
 
				-	}
			
 
				-	return true
			
 
				-}
			
 
				-
			
 
				-func AMDDriverVersion() (string, error) {
			
 
				-	_, err := os.Stat(DriverVersionFile)
			
 
				-	if err != nil {
			
 
				-		return "", fmt.Errorf("amdgpu file stat error: %s %w", DriverVersionFile, err)
			
 
				-	}
			
 
				-	fp, err := os.Open(DriverVersionFile)
			
 
				-	if err != nil {
			
 
				-		return "", err
			
 
				-	}
			
 
				-	defer fp.Close()
			
 
				-	verString, err := io.ReadAll(fp)
			
 
				-	if err != nil {
			
 
				-		return "", err
			
 
				-	}
			
 
				-	return strings.TrimSpace(string(verString)), nil
			
 
				-}
			
 
				-
			
 
				-func AMDGFXVersions() []Version {
			
 
				-	res := []Version{}
			
 
				-	matches, _ := filepath.Glob(GPUPropertiesFileGlob)
			
 
				-	for _, match := range matches {
			
 
				-		fp, err := os.Open(match)
			
 
				-		if err != nil {
			
 
				-			slog.Debug(fmt.Sprintf("failed to open sysfs node file %s: %s", match, err))
			
 
				-			continue
			
 
				-		}
			
 
				-		defer fp.Close()
			
 
				-
			
 
				-		scanner := bufio.NewScanner(fp)
			
 
				-		// optionally, resize scanner's capacity for lines over 64K, see next example
			
 
				-		for scanner.Scan() {
			
 
				-			line := strings.TrimSpace(scanner.Text())
			
 
				-			if strings.HasPrefix(line, "gfx_target_version") {
			
 
				-				ver := strings.Fields(line)
			
 
				-				if len(ver) != 2 || len(ver[1]) < 5 {
			
 
				-					slog.Debug("malformed " + line)
			
 
				-					continue
			
 
				-				}
			
 
				-				l := len(ver[1])
			
 
				-				patch, err1 := strconv.ParseUint(ver[1][l-2:l], 10, 32)
			
 
				-				minor, err2 := strconv.ParseUint(ver[1][l-4:l-2], 10, 32)
			
 
				-				major, err3 := strconv.ParseUint(ver[1][:l-4], 10, 32)
			
 
				-				if err1 != nil || err2 != nil || err3 != nil {
			
 
				-					slog.Debug("malformed int " + line)
			
 
				-					continue
			
 
				-				}
			
 
				-
			
 
				-				res = append(res, Version{
			
 
				-					Major: uint(major),
			
 
				-					Minor: uint(minor),
			
 
				-					Patch: uint(patch),
			
 
				-				})
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	return res
			
 
				-}
			
 
				-
			
 
				-func (v Version) ToGFXString() string {
			
 
				-	return fmt.Sprintf("gfx%d%d%d", v.Major, v.Minor, v.Patch)
			
 
				-}
			
--- a/gpu/amd_common.go
+++ b/gpu/amd_common.go
@@ -0,0 +1,58 @@
 
				+//go:build linux || windows
			
 
				+
			
 
				+package gpu
			
 
				+
			
 
				+import (
			
 
				+	"fmt"
			
 
				+	"log/slog"
			
 
				+	"os"
			
 
				+	"path/filepath"
			
 
				+	"strconv"
			
 
				+	"strings"
			
 
				+)
			
 
				+
			
 
				+// Determine if the given ROCm lib directory is usable by checking for existence of some glob patterns
			
 
				+func rocmLibUsable(libDir string) bool {
			
 
				+	slog.Debug("evaluating potential rocm lib dir " + libDir)
			
 
				+	for _, g := range ROCmLibGlobs {
			
 
				+		res, _ := filepath.Glob(filepath.Join(libDir, g))
			
 
				+		if len(res) == 0 {
			
 
				+			return false
			
 
				+		}
			
 
				+	}
			
 
				+	return true
			
 
				+}
			
 
				+
			
 
				+func GetSupportedGFX(libDir string) ([]string, error) {
			
 
				+	var ret []string
			
 
				+	files, err := filepath.Glob(filepath.Join(libDir, "rocblas", "library", "TensileLibrary_lazy_gfx*.dat"))
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	for _, file := range files {
			
 
				+		ret = append(ret, strings.TrimSuffix(strings.TrimPrefix(filepath.Base(file), "TensileLibrary_lazy_"), ".dat"))
			
 
				+	}
			
 
				+	return ret, nil
			
 
				+}
			
 
				+
			
 
				+func amdSetVisibleDevices(ids []int, skip map[int]interface{}) {
			
 
				+	// Set the visible devices if not already set
			
 
				+	// TODO - does sort order matter?
			
 
				+	devices := []string{}
			
 
				+	for i := range ids {
			
 
				+		slog.Debug(fmt.Sprintf("i=%d", i))
			
 
				+		if _, skipped := skip[i]; skipped {
			
 
				+			slog.Debug("skipped")
			
 
				+			continue
			
 
				+		}
			
 
				+		devices = append(devices, strconv.Itoa(i))
			
 
				+	}
			
 
				+	slog.Debug(fmt.Sprintf("devices=%v", devices))
			
 
				+
			
 
				+	val := strings.Join(devices, ",")
			
 
				+	err := os.Setenv("HIP_VISIBLE_DEVICES", val)
			
 
				+	if err != nil {
			
 
				+		slog.Warn(fmt.Sprintf("failed to set env: %s", err))
			
 
				+	}
			
 
				+	slog.Debug("HIP_VISIBLE_DEVICES=" + val)
			
 
				+}
			
--- a/gpu/amd_hip_windows.go
+++ b/gpu/amd_hip_windows.go
@@ -0,0 +1,141 @@
 
				+package gpu
			
 
				+
			
 
				+import (
			
 
				+	"fmt"
			
 
				+	"log/slog"
			
 
				+	"strconv"
			
 
				+	"syscall"
			
 
				+	"unsafe"
			
 
				+
			
 
				+	"golang.org/x/sys/windows"
			
 
				+)
			
 
				+
			
 
				+const (
			
 
				+	hipSuccess       = 0
			
 
				+	hipErrorNoDevice = 100
			
 
				+)
			
 
				+
			
 
				+type hipDevicePropMinimal struct {
			
 
				+	Name        [256]byte
			
 
				+	unused1     [140]byte
			
 
				+	GcnArchName [256]byte // gfx####
			
 
				+	iGPU        int       // Doesn't seem to actually report correctly
			
 
				+	unused2     [128]byte
			
 
				+}
			
 
				+
			
 
				+// Wrap the amdhip64.dll library for GPU discovery
			
 
				+type HipLib struct {
			
 
				+	dll                    windows.Handle
			
 
				+	hipGetDeviceCount      uintptr
			
 
				+	hipGetDeviceProperties uintptr
			
 
				+	hipMemGetInfo          uintptr
			
 
				+	hipSetDevice           uintptr
			
 
				+	hipDriverGetVersion    uintptr
			
 
				+}
			
 
				+
			
 
				+func NewHipLib() (*HipLib, error) {
			
 
				+	h, err := windows.LoadLibrary("amdhip64.dll")
			
 
				+	if err != nil {
			
 
				+		return nil, fmt.Errorf("unable to load amdhip64.dll: %w", err)
			
 
				+	}
			
 
				+	hl := &HipLib{}
			
 
				+	hl.dll = h
			
 
				+	hl.hipGetDeviceCount, err = windows.GetProcAddress(hl.dll, "hipGetDeviceCount")
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	hl.hipGetDeviceProperties, err = windows.GetProcAddress(hl.dll, "hipGetDeviceProperties")
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	hl.hipMemGetInfo, err = windows.GetProcAddress(hl.dll, "hipMemGetInfo")
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	hl.hipSetDevice, err = windows.GetProcAddress(hl.dll, "hipSetDevice")
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	hl.hipDriverGetVersion, err = windows.GetProcAddress(hl.dll, "hipDriverGetVersion")
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	return hl, nil
			
 
				+}
			
 
				+
			
 
				+// The hip library only evaluates the HIP_VISIBLE_DEVICES variable at startup
			
 
				+// so we have to unload/reset the library after we do our initial discovery
			
 
				+// to make sure our updates to that variable are processed by llama.cpp
			
 
				+func (hl *HipLib) Release() {
			
 
				+	err := windows.FreeLibrary(hl.dll)
			
 
				+	if err != nil {
			
 
				+		slog.Warn(fmt.Sprintf("failed to unload amdhip64.dll: %s", err))
			
 
				+	}
			
 
				+	hl.dll = 0
			
 
				+}
			
 
				+
			
 
				+func (hl *HipLib) AMDDriverVersion() (string, error) {
			
 
				+	if hl.dll == 0 {
			
 
				+		return "", fmt.Errorf("dll has been unloaded")
			
 
				+	}
			
 
				+	var version int
			
 
				+	status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version)))
			
 
				+	if status != hipSuccess {
			
 
				+		return "", fmt.Errorf("failed call to hipDriverGetVersion: %d %s", status, err)
			
 
				+	}
			
 
				+	return strconv.Itoa(version), nil
			
 
				+}
			
 
				+
			
 
				+func (hl *HipLib) HipGetDeviceCount() int {
			
 
				+	if hl.dll == 0 {
			
 
				+		slog.Error("dll has been unloaded")
			
 
				+		return 0
			
 
				+	}
			
 
				+	var count int
			
 
				+	status, _, err := syscall.SyscallN(hl.hipGetDeviceCount, uintptr(unsafe.Pointer(&count)))
			
 
				+	if status == hipErrorNoDevice {
			
 
				+		slog.Info("AMD ROCm reports no devices found")
			
 
				+		return 0
			
 
				+	}
			
 
				+	if status != hipSuccess {
			
 
				+		slog.Warn(fmt.Sprintf("failed call to hipGetDeviceCount: %d %s", status, err))
			
 
				+	}
			
 
				+	return count
			
 
				+}
			
 
				+
			
 
				+func (hl *HipLib) HipSetDevice(device int) error {
			
 
				+	if hl.dll == 0 {
			
 
				+		return fmt.Errorf("dll has been unloaded")
			
 
				+	}
			
 
				+	status, _, err := syscall.SyscallN(hl.hipSetDevice, uintptr(device))
			
 
				+	if status != hipSuccess {
			
 
				+		return fmt.Errorf("failed call to hipSetDevice: %d %s", status, err)
			
 
				+	}
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+func (hl *HipLib) HipGetDeviceProperties(device int) (*hipDevicePropMinimal, error) {
			
 
				+	if hl.dll == 0 {
			
 
				+		return nil, fmt.Errorf("dll has been unloaded")
			
 
				+	}
			
 
				+	var props hipDevicePropMinimal
			
 
				+	status, _, err := syscall.SyscallN(hl.hipGetDeviceProperties, uintptr(unsafe.Pointer(&props)), uintptr(device))
			
 
				+	if status != hipSuccess {
			
 
				+		return nil, fmt.Errorf("failed call to hipGetDeviceProperties: %d %s", status, err)
			
 
				+	}
			
 
				+	return &props, nil
			
 
				+}
			
 
				+
			
 
				+// free, total, err
			
 
				+func (hl *HipLib) HipMemGetInfo() (uint64, uint64, error) {
			
 
				+	if hl.dll == 0 {
			
 
				+		return 0, 0, fmt.Errorf("dll has been unloaded")
			
 
				+	}
			
 
				+	var totalMemory uint64
			
 
				+	var freeMemory uint64
			
 
				+	status, _, err := syscall.SyscallN(hl.hipMemGetInfo, uintptr(unsafe.Pointer(&freeMemory)), uintptr(unsafe.Pointer(&totalMemory)))
			
 
				+	if status != hipSuccess {
			
 
				+		return 0, 0, fmt.Errorf("failed call to hipMemGetInfo: %d %s", status, err)
			
 
				+	}
			
 
				+	return freeMemory, totalMemory, nil
			
 
				+}
			
--- a/gpu/amd_linux.go
+++ b/gpu/amd_linux.go
@@ -0,0 +1,411 @@
 
				+package gpu
			
 
				+
			
 
				+import (
			
 
				+	"bufio"
			
 
				+	"errors"
			
 
				+	"fmt"
			
 
				+	"io"
			
 
				+	"log/slog"
			
 
				+	"os"
			
 
				+	"path/filepath"
			
 
				+	"slices"
			
 
				+	"strconv"
			
 
				+	"strings"
			
 
				+
			
 
				+	"github.com/jmorganca/ollama/version"
			
 
				+)
			
 
				+
			
 
				+// Discovery logic for AMD/ROCm GPUs
			
 
				+
			
 
				+const (
			
 
				+	curlMsg               = "curl -fsSL https://github.com/ollama/ollama/releases/download/v%s/rocm-amd64-deps.tgz | tar -zxf - -C %s"
			
 
				+	DriverVersionFile     = "/sys/module/amdgpu/version"
			
 
				+	AMDNodesSysfsDir      = "/sys/class/kfd/kfd/topology/nodes/"
			
 
				+	GPUPropertiesFileGlob = AMDNodesSysfsDir + "*/properties"
			
 
				+
			
 
				+	// Prefix with the node dir
			
 
				+	GPUTotalMemoryFileGlob = "mem_banks/*/properties" // size_in_bytes line
			
 
				+	GPUUsedMemoryFileGlob  = "mem_banks/*/used_memory"
			
 
				+	RocmStandardLocation   = "/opt/rocm/lib"
			
 
				+)
			
 
				+
			
 
				+var (
			
 
				+	// Used to validate if the given ROCm lib is usable
			
 
				+	ROCmLibGlobs = []string{"libhipblas.so.2*", "rocblas"} // TODO - probably include more coverage of files here...
			
 
				+)
			
 
				+
			
 
				+// Gather GPU information from the amdgpu driver if any supported GPUs are detected
			
 
				+// HIP_VISIBLE_DEVICES will be set if we detect a mix of unsupported and supported devices
			
 
				+// and the user hasn't already set this variable
			
 
				+func AMDGetGPUInfo(resp *GpuInfo) {
			
 
				+	// TODO - DRY this out with windows
			
 
				+	if !AMDDetected() {
			
 
				+		return
			
 
				+	}
			
 
				+	skip := map[int]interface{}{}
			
 
				+
			
 
				+	// Opportunistic logging of driver version to aid in troubleshooting
			
 
				+	ver, err := AMDDriverVersion()
			
 
				+	if err == nil {
			
 
				+		slog.Info("AMD Driver: " + ver)
			
 
				+	} else {
			
 
				+		// TODO - if we see users crash and burn with the upstreamed kernel this can be adjusted to hard-fail rocm support and fallback to CPU
			
 
				+		slog.Warn(fmt.Sprintf("ollama recommends running the https://www.amd.com/en/support/linux-drivers: %s", err))
			
 
				+	}
			
 
				+
			
 
				+	// If the user has specified exactly which GPUs to use, look up their memory
			
 
				+	visibleDevices := os.Getenv("HIP_VISIBLE_DEVICES")
			
 
				+	if visibleDevices != "" {
			
 
				+		ids := []int{}
			
 
				+		for _, idStr := range strings.Split(visibleDevices, ",") {
			
 
				+			id, err := strconv.Atoi(idStr)
			
 
				+			if err != nil {
			
 
				+				slog.Warn(fmt.Sprintf("malformed HIP_VISIBLE_DEVICES=%s %s", visibleDevices, err))
			
 
				+			} else {
			
 
				+				ids = append(ids, id)
			
 
				+			}
			
 
				+		}
			
 
				+		amdProcMemLookup(resp, nil, ids)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	// Gather GFX version information from all detected cards
			
 
				+	gfx := AMDGFXVersions()
			
 
				+	verStrings := []string{}
			
 
				+	for i, v := range gfx {
			
 
				+		verStrings = append(verStrings, v.ToGFXString())
			
 
				+		if v.Major == 0 {
			
 
				+			// Silently skip CPUs
			
 
				+			skip[i] = struct{}{}
			
 
				+			continue
			
 
				+		}
			
 
				+		if v.Major < 9 {
			
 
				+			// TODO consider this a build-time setting if we can support 8xx family GPUs
			
 
				+			slog.Warn(fmt.Sprintf("amdgpu [%d] too old %s", i, v.ToGFXString()))
			
 
				+			skip[i] = struct{}{}
			
 
				+		}
			
 
				+	}
			
 
				+	slog.Info(fmt.Sprintf("detected amdgpu versions %v", verStrings))
			
 
				+
			
 
				+	// Abort if all GPUs are skipped
			
 
				+	if len(skip) >= len(gfx) {
			
 
				+		slog.Info("all detected amdgpus are skipped, falling back to CPU")
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	// If we got this far, then we have at least 1 GPU that's a ROCm candidate, so make sure we have a lib
			
 
				+	libDir, err := AMDValidateLibDir()
			
 
				+	if err != nil {
			
 
				+		slog.Warn(fmt.Sprintf("unable to verify rocm library, will use cpu: %s", err))
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	gfxOverride := os.Getenv("HSA_OVERRIDE_GFX_VERSION")
			
 
				+	if gfxOverride == "" {
			
 
				+		supported, err := GetSupportedGFX(libDir)
			
 
				+		if err != nil {
			
 
				+			slog.Warn(fmt.Sprintf("failed to lookup supported GFX types, falling back to CPU mode: %s", err))
			
 
				+			return
			
 
				+		}
			
 
				+		slog.Debug(fmt.Sprintf("rocm supported GPU types %v", supported))
			
 
				+
			
 
				+		for i, v := range gfx {
			
 
				+			if !slices.Contains[[]string, string](supported, v.ToGFXString()) {
			
 
				+				slog.Warn(fmt.Sprintf("amdgpu [%d] %s is not supported by %s %v", i, v.ToGFXString(), libDir, supported))
			
 
				+				// TODO - consider discrete markdown just for ROCM troubleshooting?
			
 
				+				slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage")
			
 
				+				skip[i] = struct{}{}
			
 
				+			} else {
			
 
				+				slog.Info(fmt.Sprintf("amdgpu [%d] %s is supported", i, v.ToGFXString()))
			
 
				+			}
			
 
				+		}
			
 
				+	} else {
			
 
				+		slog.Debug("skipping rocm gfx compatibility check with HSA_OVERRIDE_GFX_VERSION=" + gfxOverride)
			
 
				+	}
			
 
				+
			
 
				+	if len(skip) >= len(gfx) {
			
 
				+		slog.Info("all detected amdgpus are skipped, falling back to CPU")
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	ids := make([]int, len(gfx))
			
 
				+	i := 0
			
 
				+	for k := range gfx {
			
 
				+		ids[i] = k
			
 
				+		i++
			
 
				+	}
			
 
				+	amdProcMemLookup(resp, skip, ids)
			
 
				+	if resp.memInfo.DeviceCount == 0 {
			
 
				+		return
			
 
				+	}
			
 
				+	if len(skip) > 0 {
			
 
				+		amdSetVisibleDevices(ids, skip)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// Walk the sysfs nodes for the available GPUs and gather information from them
			
 
				+// skipping over any devices in the skip map
			
 
				+func amdProcMemLookup(resp *GpuInfo, skip map[int]interface{}, ids []int) {
			
 
				+	resp.memInfo.DeviceCount = 0
			
 
				+	resp.memInfo.TotalMemory = 0
			
 
				+	resp.memInfo.FreeMemory = 0
			
 
				+	if len(ids) == 0 {
			
 
				+		slog.Debug("discovering all amdgpu devices")
			
 
				+		entries, err := os.ReadDir(AMDNodesSysfsDir)
			
 
				+		if err != nil {
			
 
				+			slog.Warn(fmt.Sprintf("failed to read amdgpu sysfs %s - %s", AMDNodesSysfsDir, err))
			
 
				+			return
			
 
				+		}
			
 
				+		for _, node := range entries {
			
 
				+			if !node.IsDir() {
			
 
				+				continue
			
 
				+			}
			
 
				+			id, err := strconv.Atoi(node.Name())
			
 
				+			if err != nil {
			
 
				+				slog.Warn("malformed amdgpu sysfs node id " + node.Name())
			
 
				+				continue
			
 
				+			}
			
 
				+			ids = append(ids, id)
			
 
				+		}
			
 
				+	}
			
 
				+	slog.Debug(fmt.Sprintf("discovering amdgpu devices %v", ids))
			
 
				+
			
 
				+	for _, id := range ids {
			
 
				+		if _, skipped := skip[id]; skipped {
			
 
				+			continue
			
 
				+		}
			
 
				+		totalMemory := uint64(0)
			
 
				+		usedMemory := uint64(0)
			
 
				+		propGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(id), GPUTotalMemoryFileGlob)
			
 
				+		propFiles, err := filepath.Glob(propGlob)
			
 
				+		if err != nil {
			
 
				+			slog.Warn(fmt.Sprintf("error looking up total GPU memory: %s %s", propGlob, err))
			
 
				+		}
			
 
				+		// 1 or more memory banks - sum the values of all of them
			
 
				+		for _, propFile := range propFiles {
			
 
				+			fp, err := os.Open(propFile)
			
 
				+			if err != nil {
			
 
				+				slog.Warn(fmt.Sprintf("failed to open sysfs node file %s: %s", propFile, err))
			
 
				+				continue
			
 
				+			}
			
 
				+			defer fp.Close()
			
 
				+			scanner := bufio.NewScanner(fp)
			
 
				+			for scanner.Scan() {
			
 
				+				line := strings.TrimSpace(scanner.Text())
			
 
				+				if strings.HasPrefix(line, "size_in_bytes") {
			
 
				+					ver := strings.Fields(line)
			
 
				+					if len(ver) != 2 {
			
 
				+						slog.Warn("malformed " + line)
			
 
				+						continue
			
 
				+					}
			
 
				+					bankSizeInBytes, err := strconv.ParseUint(ver[1], 10, 64)
			
 
				+					if err != nil {
			
 
				+						slog.Warn("malformed int " + line)
			
 
				+						continue
			
 
				+					}
			
 
				+					totalMemory += bankSizeInBytes
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		if totalMemory == 0 {
			
 
				+			continue
			
 
				+		}
			
 
				+		usedGlob := filepath.Join(AMDNodesSysfsDir, strconv.Itoa(id), GPUUsedMemoryFileGlob)
			
 
				+		usedFiles, err := filepath.Glob(usedGlob)
			
 
				+		if err != nil {
			
 
				+			slog.Warn(fmt.Sprintf("error looking up used GPU memory: %s %s", usedGlob, err))
			
 
				+			continue
			
 
				+		}
			
 
				+		for _, usedFile := range usedFiles {
			
 
				+			fp, err := os.Open(usedFile)
			
 
				+			if err != nil {
			
 
				+				slog.Warn(fmt.Sprintf("failed to open sysfs node file %s: %s", usedFile, err))
			
 
				+				continue
			
 
				+			}
			
 
				+			defer fp.Close()
			
 
				+			data, err := io.ReadAll(fp)
			
 
				+			if err != nil {
			
 
				+				slog.Warn(fmt.Sprintf("failed to read sysfs node file %s: %s", usedFile, err))
			
 
				+				continue
			
 
				+			}
			
 
				+			used, err := strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
			
 
				+			if err != nil {
			
 
				+				slog.Warn(fmt.Sprintf("malformed used memory %s: %s", string(data), err))
			
 
				+				continue
			
 
				+			}
			
 
				+			usedMemory += used
			
 
				+		}
			
 
				+		slog.Info(fmt.Sprintf("[%d] amdgpu totalMemory %d", id, totalMemory))
			
 
				+		slog.Info(fmt.Sprintf("[%d] amdgpu freeMemory  %d", id, (totalMemory - usedMemory)))
			
 
				+		resp.memInfo.DeviceCount++
			
 
				+		resp.memInfo.TotalMemory += totalMemory
			
 
				+		resp.memInfo.FreeMemory += (totalMemory - usedMemory)
			
 
				+	}
			
 
				+	if resp.memInfo.DeviceCount > 0 {
			
 
				+		resp.Library = "rocm"
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// Quick check for AMD driver so we can skip amdgpu discovery if not present
			
 
				+func AMDDetected() bool {
			
 
				+	// Some driver versions (older?) don't have a version file, so just lookup the parent dir
			
 
				+	sysfsDir := filepath.Dir(DriverVersionFile)
			
 
				+	_, err := os.Stat(sysfsDir)
			
 
				+	if errors.Is(err, os.ErrNotExist) {
			
 
				+		slog.Debug("amdgpu driver not detected " + sysfsDir)
			
 
				+		return false
			
 
				+	} else if err != nil {
			
 
				+		slog.Debug(fmt.Sprintf("error looking up amd driver %s %s", sysfsDir, err))
			
 
				+		return false
			
 
				+	}
			
 
				+	return true
			
 
				+}
			
 
				+
			
 
				+func setupLink(source, target string) error {
			
 
				+	if err := os.RemoveAll(target); err != nil {
			
 
				+		return fmt.Errorf("failed to remove old rocm directory %s %w", target, err)
			
 
				+	}
			
 
				+	if err := os.Symlink(source, target); err != nil {
			
 
				+		return fmt.Errorf("failed to create link %s => %s %w", source, target, err)
			
 
				+	}
			
 
				+	slog.Debug(fmt.Sprintf("host rocm linked %s => %s", source, target))
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+// Ensure the AMD rocm lib dir is wired up
			
 
				+// Prefer to use host installed ROCm, as long as it meets our minimum requirements
			
 
				+// failing that, tell the user how to download it on their own
			
 
				+func AMDValidateLibDir() (string, error) {
			
 
				+	// We rely on the rpath compiled into our library to find rocm
			
 
				+	// so we establish a symlink to wherever we find it on the system
			
 
				+	// to $AssetsDir/rocm
			
 
				+
			
 
				+	// If we already have a rocm dependency wired, nothing more to do
			
 
				+	assetsDir, err := AssetsDir()
			
 
				+	if err != nil {
			
 
				+		return "", fmt.Errorf("unable to lookup lib dir: %w", err)
			
 
				+	}
			
 
				+	// Versioned directory
			
 
				+	rocmTargetDir := filepath.Join(assetsDir, "rocm")
			
 
				+	if rocmLibUsable(rocmTargetDir) {
			
 
				+		return rocmTargetDir, nil
			
 
				+	}
			
 
				+	// Parent dir (unversioned)
			
 
				+	commonRocmDir := filepath.Join(filepath.Dir(assetsDir), "rocm")
			
 
				+	if rocmLibUsable(commonRocmDir) {
			
 
				+		return rocmTargetDir, setupLink(commonRocmDir, rocmTargetDir)
			
 
				+	}
			
 
				+
			
 
				+	// Prefer explicit HIP env var
			
 
				+	hipPath := os.Getenv("HIP_PATH")
			
 
				+	if hipPath != "" {
			
 
				+		hipLibDir := filepath.Join(hipPath, "lib")
			
 
				+		if rocmLibUsable(hipLibDir) {
			
 
				+			slog.Debug("detected ROCM via HIP_PATH=" + hipPath)
			
 
				+			return rocmTargetDir, setupLink(hipLibDir, rocmTargetDir)
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// Scan the library path for potential matches
			
 
				+	ldPaths := strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
			
 
				+	for _, ldPath := range ldPaths {
			
 
				+		d, err := filepath.Abs(ldPath)
			
 
				+		if err != nil {
			
 
				+			continue
			
 
				+		}
			
 
				+		if rocmLibUsable(d) {
			
 
				+			return rocmTargetDir, setupLink(d, rocmTargetDir)
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// Well known location(s)
			
 
				+	if rocmLibUsable("/opt/rocm/lib") {
			
 
				+		return rocmTargetDir, setupLink("/opt/rocm/lib", rocmTargetDir)
			
 
				+	}
			
 
				+	err = os.MkdirAll(rocmTargetDir, 0755)
			
 
				+	if err != nil {
			
 
				+		return "", fmt.Errorf("failed to create empty rocm dir %s %w", rocmTargetDir, err)
			
 
				+	}
			
 
				+
			
 
				+	// If we still haven't found a usable rocm, the user will have to download it on their own
			
 
				+	slog.Warn("amdgpu detected, but no compatible rocm library found.  Either install rocm v6, or run the following")
			
 
				+	slog.Warn(fmt.Sprintf(curlMsg, version.Version, rocmTargetDir))
			
 
				+	return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
			
 
				+}
			
 
				+
			
 
				+func AMDDriverVersion() (string, error) {
			
 
				+	_, err := os.Stat(DriverVersionFile)
			
 
				+	if err != nil {
			
 
				+		return "", fmt.Errorf("amdgpu version file missing: %s %w", DriverVersionFile, err)
			
 
				+	}
			
 
				+	fp, err := os.Open(DriverVersionFile)
			
 
				+	if err != nil {
			
 
				+		return "", err
			
 
				+	}
			
 
				+	defer fp.Close()
			
 
				+	verString, err := io.ReadAll(fp)
			
 
				+	if err != nil {
			
 
				+		return "", err
			
 
				+	}
			
 
				+	return strings.TrimSpace(string(verString)), nil
			
 
				+}
			
 
				+
			
 
				+func AMDGFXVersions() map[int]Version {
			
 
				+	res := map[int]Version{}
			
 
				+	matches, _ := filepath.Glob(GPUPropertiesFileGlob)
			
 
				+	for _, match := range matches {
			
 
				+		fp, err := os.Open(match)
			
 
				+		if err != nil {
			
 
				+			slog.Debug(fmt.Sprintf("failed to open sysfs node file %s: %s", match, err))
			
 
				+			continue
			
 
				+		}
			
 
				+		defer fp.Close()
			
 
				+		i, err := strconv.Atoi(filepath.Base(filepath.Dir(match)))
			
 
				+		if err != nil {
			
 
				+			slog.Debug(fmt.Sprintf("failed to parse node ID %s", err))
			
 
				+			continue
			
 
				+		}
			
 
				+
			
 
				+		scanner := bufio.NewScanner(fp)
			
 
				+		for scanner.Scan() {
			
 
				+			line := strings.TrimSpace(scanner.Text())
			
 
				+			if strings.HasPrefix(line, "gfx_target_version") {
			
 
				+				ver := strings.Fields(line)
			
 
				+				if len(ver) != 2 || len(ver[1]) < 5 {
			
 
				+
			
 
				+					if ver[1] == "0" {
			
 
				+						// Silently skip the CPU
			
 
				+						continue
			
 
				+					} else {
			
 
				+						slog.Debug("malformed " + line)
			
 
				+					}
			
 
				+					res[i] = Version{
			
 
				+						Major: 0,
			
 
				+						Minor: 0,
			
 
				+						Patch: 0,
			
 
				+					}
			
 
				+					continue
			
 
				+				}
			
 
				+				l := len(ver[1])
			
 
				+				patch, err1 := strconv.ParseUint(ver[1][l-2:l], 10, 32)
			
 
				+				minor, err2 := strconv.ParseUint(ver[1][l-4:l-2], 10, 32)
			
 
				+				major, err3 := strconv.ParseUint(ver[1][:l-4], 10, 32)
			
 
				+				if err1 != nil || err2 != nil || err3 != nil {
			
 
				+					slog.Debug("malformed int " + line)
			
 
				+					continue
			
 
				+				}
			
 
				+
			
 
				+				res[i] = Version{
			
 
				+					Major: uint(major),
			
 
				+					Minor: uint(minor),
			
 
				+					Patch: uint(patch),
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	return res
			
 
				+}
			
 
				+
			
 
				+func (v Version) ToGFXString() string {
			
 
				+	return fmt.Sprintf("gfx%d%d%d", v.Major, v.Minor, v.Patch)
			
 
				+}
			
--- a/gpu/amd_windows.go
+++ b/gpu/amd_windows.go
@@ -0,0 +1,190 @@
 
				+package gpu
			
 
				+
			
 
				+import (
			
 
				+	"bytes"
			
 
				+	"fmt"
			
 
				+	"log/slog"
			
 
				+	"os"
			
 
				+	"path/filepath"
			
 
				+	"slices"
			
 
				+	"strings"
			
 
				+)
			
 
				+
			
 
				+const (
			
 
				+	RocmStandardLocation = "C:\\Program Files\\AMD\\ROCm\\5.7\\bin" // TODO glob?
			
 
				+
			
 
				+	// TODO  We're lookinng for this exact name to detect iGPUs since hipGetDeviceProperties never reports integrated==true
			
 
				+	iGPUName = "AMD Radeon(TM) Graphics"
			
 
				+)
			
 
				+
			
 
				+var (
			
 
				+	// Used to validate if the given ROCm lib is usable
			
 
				+	ROCmLibGlobs = []string{"hipblas.dll", "rocblas"} // TODO - probably include more coverage of files here...
			
 
				+)
			
 
				+
			
 
				+func AMDGetGPUInfo(resp *GpuInfo) {
			
 
				+	hl, err := NewHipLib()
			
 
				+	if err != nil {
			
 
				+		slog.Debug(err.Error())
			
 
				+		return
			
 
				+	}
			
 
				+	defer hl.Release()
			
 
				+	skip := map[int]interface{}{}
			
 
				+	ids := []int{}
			
 
				+	resp.memInfo.DeviceCount = 0
			
 
				+	resp.memInfo.TotalMemory = 0
			
 
				+	resp.memInfo.FreeMemory = 0
			
 
				+
			
 
				+	ver, err := hl.AMDDriverVersion()
			
 
				+	if err == nil {
			
 
				+		slog.Info("AMD Driver: " + ver)
			
 
				+	} else {
			
 
				+		// For now this is benign, but we may eventually need to fail compatibility checks
			
 
				+		slog.Debug(fmt.Sprintf("error looking up amd driver version: %s", err))
			
 
				+	}
			
 
				+
			
 
				+	// Note: the HIP library automatically handles HIP_VISIBLE_DEVICES
			
 
				+	count := hl.HipGetDeviceCount()
			
 
				+	if count == 0 {
			
 
				+		return
			
 
				+	}
			
 
				+	libDir, err := AMDValidateLibDir()
			
 
				+	if err != nil {
			
 
				+		slog.Warn(fmt.Sprintf("unable to verify rocm library, will use cpu: %s", err))
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	var supported []string
			
 
				+	gfxOverride := os.Getenv("HSA_OVERRIDE_GFX_VERSION")
			
 
				+	if gfxOverride == "" {
			
 
				+		supported, err = GetSupportedGFX(libDir)
			
 
				+		if err != nil {
			
 
				+			slog.Warn(fmt.Sprintf("failed to lookup supported GFX types, falling back to CPU mode: %s", err))
			
 
				+			return
			
 
				+		}
			
 
				+	} else {
			
 
				+		slog.Debug("skipping rocm gfx compatibility check with HSA_OVERRIDE_GFX_VERSION=" + gfxOverride)
			
 
				+	}
			
 
				+
			
 
				+	slog.Info(fmt.Sprintf("detected %d hip devices", count))
			
 
				+	for i := 0; i < count; i++ {
			
 
				+		ids = append(ids, i)
			
 
				+		err = hl.HipSetDevice(i)
			
 
				+		if err != nil {
			
 
				+			slog.Warn(fmt.Sprintf("[%d] %s", i, err))
			
 
				+			skip[i] = struct{}{}
			
 
				+			continue
			
 
				+		}
			
 
				+
			
 
				+		props, err := hl.HipGetDeviceProperties(i)
			
 
				+		if err != nil {
			
 
				+			slog.Warn(fmt.Sprintf("[%d] %s", i, err))
			
 
				+			skip[i] = struct{}{}
			
 
				+			continue
			
 
				+		}
			
 
				+		n := bytes.IndexByte(props.Name[:], 0)
			
 
				+		name := string(props.Name[:n])
			
 
				+		slog.Info(fmt.Sprintf("[%d] Name: %s", i, name))
			
 
				+		n = bytes.IndexByte(props.GcnArchName[:], 0)
			
 
				+		gfx := string(props.GcnArchName[:n])
			
 
				+		slog.Info(fmt.Sprintf("[%d] GcnArchName: %s", i, gfx))
			
 
				+		//slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY!  Always 0
			
 
				+		// TODO  Why isn't props.iGPU accurate!?
			
 
				+		if strings.EqualFold(name, iGPUName) {
			
 
				+			slog.Info(fmt.Sprintf("iGPU detected [%d] skipping", i))
			
 
				+			skip[i] = struct{}{}
			
 
				+			continue
			
 
				+		}
			
 
				+		if gfxOverride == "" {
			
 
				+			if !slices.Contains[[]string, string](supported, gfx) {
			
 
				+				slog.Warn(fmt.Sprintf("amdgpu [%d] %s is not supported by %s %v", i, gfx, libDir, supported))
			
 
				+				// TODO - consider discrete markdown just for ROCM troubleshooting?
			
 
				+				slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage")
			
 
				+				skip[i] = struct{}{}
			
 
				+				continue
			
 
				+			} else {
			
 
				+				slog.Info(fmt.Sprintf("amdgpu [%d] %s is supported", i, gfx))
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		totalMemory, freeMemory, err := hl.HipMemGetInfo()
			
 
				+		if err != nil {
			
 
				+			slog.Warn(fmt.Sprintf("[%d] %s", i, err))
			
 
				+			continue
			
 
				+		}
			
 
				+
			
 
				+		// TODO according to docs, freeMem may lie on windows!
			
 
				+		slog.Info(fmt.Sprintf("[%d] Total Mem: %d", i, totalMemory))
			
 
				+		slog.Info(fmt.Sprintf("[%d] Free Mem:  %d", i, freeMemory))
			
 
				+		resp.memInfo.DeviceCount++
			
 
				+		resp.memInfo.TotalMemory += totalMemory
			
 
				+		resp.memInfo.FreeMemory += freeMemory
			
 
				+	}
			
 
				+	if resp.memInfo.DeviceCount > 0 {
			
 
				+		resp.Library = "rocm"
			
 
				+	}
			
 
				+	// Abort if all GPUs are skipped
			
 
				+	if len(skip) >= count {
			
 
				+		slog.Info("all detected amdgpus are skipped, falling back to CPU")
			
 
				+		return
			
 
				+	}
			
 
				+	if len(skip) > 0 {
			
 
				+		amdSetVisibleDevices(ids, skip)
			
 
				+	}
			
 
				+	UpdatePath(libDir)
			
 
				+}
			
 
				+
			
 
				+func AMDValidateLibDir() (string, error) {
			
 
				+	// On windows non-admins typically can't create links
			
 
				+	// so instead of trying to rely on rpath and a link in
			
 
				+	// $LibDir/rocm, we instead rely on setting PATH to point
			
 
				+	// to the location of the ROCm library
			
 
				+
			
 
				+	// Installer payload location
			
 
				+	exe, err := os.Executable()
			
 
				+	if err == nil {
			
 
				+		rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm")
			
 
				+		if rocmLibUsable(rocmTargetDir) {
			
 
				+			slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
			
 
				+			return rocmTargetDir, nil
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// If we already have a rocm dependency wired, nothing more to do
			
 
				+	libDir, err := AssetsDir()
			
 
				+	if err != nil {
			
 
				+		return "", fmt.Errorf("unable to lookup lib dir: %w", err)
			
 
				+	}
			
 
				+	rocmTargetDir := filepath.Join(libDir, "rocm")
			
 
				+	if rocmLibUsable(rocmTargetDir) {
			
 
				+		return rocmTargetDir, nil
			
 
				+	}
			
 
				+
			
 
				+	// Prefer explicit HIP env var
			
 
				+	hipPath := os.Getenv("HIP_PATH")
			
 
				+	if hipPath != "" {
			
 
				+		hipLibDir := filepath.Join(hipPath, "bin")
			
 
				+		if rocmLibUsable(hipLibDir) {
			
 
				+			slog.Debug("detected ROCM via HIP_PATH=" + hipPath)
			
 
				+			return hipLibDir, nil
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// Well known location(s)
			
 
				+	if rocmLibUsable(RocmStandardLocation) {
			
 
				+		return RocmStandardLocation, nil
			
 
				+	}
			
 
				+
			
 
				+	// Installer payload (if we're running from some other location)
			
 
				+	localAppData := os.Getenv("LOCALAPPDATA")
			
 
				+	appDir := filepath.Join(localAppData, "Programs", "Ollama")
			
 
				+	rocmTargetDir = filepath.Join(appDir, "rocm")
			
 
				+	if rocmLibUsable(rocmTargetDir) {
			
 
				+		slog.Debug("detected ollama installed ROCm at " + rocmTargetDir)
			
 
				+		return rocmTargetDir, nil
			
 
				+	}
			
 
				+
			
 
				+	// Should not happen on windows since we include it in the installer, but stand-alone binary might hit this
			
 
				+	slog.Warn("amdgpu detected, but no compatible rocm library found.  Please install ROCm v6")
			
 
				+	return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
			
 
				+}
			
--- a/gpu/assets.go
+++ b/gpu/assets.go
@@ -0,0 +1,60 @@
 
				+package gpu
			
 
				+
			
 
				+import (
			
 
				+	"fmt"
			
 
				+	"log/slog"
			
 
				+	"os"
			
 
				+	"path/filepath"
			
 
				+	"runtime"
			
 
				+	"strings"
			
 
				+
			
 
				+	"github.com/jmorganca/ollama/version"
			
 
				+)
			
 
				+
			
 
				+func AssetsDir() (string, error) {
			
 
				+	home, err := os.UserHomeDir()
			
 
				+	if err != nil {
			
 
				+		return "", err
			
 
				+	}
			
 
				+	baseDir := filepath.Join(home, ".ollama", "assets")
			
 
				+	libDirs, err := os.ReadDir(baseDir)
			
 
				+	if err == nil {
			
 
				+		for _, d := range libDirs {
			
 
				+			if d.Name() == version.Version {
			
 
				+				continue
			
 
				+			}
			
 
				+			// Special case the rocm dependencies, which are handled by the installer
			
 
				+			if d.Name() == "rocm" {
			
 
				+				continue
			
 
				+			}
			
 
				+			slog.Debug("stale lib detected, cleaning up " + d.Name())
			
 
				+			err = os.RemoveAll(filepath.Join(baseDir, d.Name()))
			
 
				+			if err != nil {
			
 
				+				slog.Warn(fmt.Sprintf("unable to clean up stale library %s: %s", filepath.Join(baseDir, d.Name()), err))
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	return filepath.Join(baseDir, version.Version), nil
			
 
				+}
			
 
				+
			
 
				+func UpdatePath(dir string) {
			
 
				+	if runtime.GOOS == "windows" {
			
 
				+		tmpDir := filepath.Dir(dir)
			
 
				+		pathComponents := strings.Split(os.Getenv("PATH"), ";")
			
 
				+		i := 0
			
 
				+		for _, comp := range pathComponents {
			
 
				+			if strings.EqualFold(comp, dir) {
			
 
				+				return
			
 
				+			}
			
 
				+			// Remove any other prior paths to our temp dir
			
 
				+			if !strings.HasPrefix(strings.ToLower(comp), strings.ToLower(tmpDir)) {
			
 
				+				pathComponents[i] = comp
			
 
				+				i++
			
 
				+			}
			
 
				+		}
			
 
				+		newPath := strings.Join(append([]string{dir}, pathComponents...), ";")
			
 
				+		slog.Info(fmt.Sprintf("Updating PATH to %s", newPath))
			
 
				+		os.Setenv("PATH", newPath)
			
 
				+	}
			
 
				+	// linux and darwin rely on rpath
			
 
				+}
			
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -24,7 +24,6 @@ import (
 
				 
			
 
				 type handles struct {
			
 
				 	cuda *C.cuda_handle_t
			
 
				-	rocm *C.rocm_handle_t
			
 
				 }
			
 
				 
			
 
				 var gpuMutex sync.Mutex
			
@@ -54,39 +53,23 @@ var CudaWindowsGlobs = []string{
 
				 	"c:\\Windows\\System32\\nvml.dll",
			
 
				 }
			
 
				 
			
 
				-var RocmLinuxGlobs = []string{
			
 
				-	"/opt/rocm*/lib*/librocm_smi64.so*",
			
 
				-}
			
 
				-
			
 
				-var RocmWindowsGlobs = []string{
			
 
				-	"c:\\Windows\\System32\\rocm_smi64.dll",
			
 
				-}
			
 
				-
			
 
				 // Note: gpuMutex must already be held
			
 
				 func initGPUHandles() {
			
 
				 
			
 
				 	// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
			
 
				 
			
 
				-	gpuHandles = &handles{nil, nil}
			
 
				+	gpuHandles = &handles{nil}
			
 
				 	var cudaMgmtName string
			
 
				 	var cudaMgmtPatterns []string
			
 
				-	var rocmMgmtName string
			
 
				-	var rocmMgmtPatterns []string
			
 
				 	switch runtime.GOOS {
			
 
				 	case "windows":
			
 
				 		cudaMgmtName = "nvml.dll"
			
 
				 		cudaMgmtPatterns = make([]string, len(CudaWindowsGlobs))
			
 
				 		copy(cudaMgmtPatterns, CudaWindowsGlobs)
			
 
				-		rocmMgmtName = "rocm_smi64.dll"
			
 
				-		rocmMgmtPatterns = make([]string, len(RocmWindowsGlobs))
			
 
				-		copy(rocmMgmtPatterns, RocmWindowsGlobs)
			
 
				 	case "linux":
			
 
				 		cudaMgmtName = "libnvidia-ml.so"
			
 
				 		cudaMgmtPatterns = make([]string, len(CudaLinuxGlobs))
			
 
				 		copy(cudaMgmtPatterns, CudaLinuxGlobs)
			
 
				-		rocmMgmtName = "librocm_smi64.so"
			
 
				-		rocmMgmtPatterns = make([]string, len(RocmLinuxGlobs))
			
 
				-		copy(rocmMgmtPatterns, RocmLinuxGlobs)
			
 
				 	default:
			
 
				 		return
			
 
				 	}
			
@@ -101,16 +84,6 @@ func initGPUHandles() {
 
				 			return
			
 
				 		}
			
 
				 	}
			
 
				-
			
 
				-	rocmLibPaths := FindGPULibs(rocmMgmtName, rocmMgmtPatterns)
			
 
				-	if len(rocmLibPaths) > 0 {
			
 
				-		rocm := LoadROCMMgmt(rocmLibPaths)
			
 
				-		if rocm != nil {
			
 
				-			slog.Info("Radeon GPU detected")
			
 
				-			gpuHandles.rocm = rocm
			
 
				-			return
			
 
				-		}
			
 
				-	}
			
 
				 }
			
 
				 
			
 
				 func GetGPUInfo() GpuInfo {
			
@@ -149,66 +122,10 @@ func GetGPUInfo() GpuInfo {
 
				 				slog.Info(fmt.Sprintf("CUDA GPU is too old. Falling back to CPU mode. Compute Capability detected: %d.%d", cc.major, cc.minor))
			
 
				 			}
			
 
				 		}
			
 
				-	} else if AMDDetected() && gpuHandles.rocm != nil && (cpuVariant != "" || runtime.GOARCH != "amd64") {
			
 
				-		ver, err := AMDDriverVersion()
			
 
				-		if err == nil {
			
 
				-			slog.Info("AMD Driver: " + ver)
			
 
				-		} else {
			
 
				-			// For now this is benign, but we may eventually need to fail compatibility checks
			
 
				-			slog.Debug("error looking up amd driver version: %s", err)
			
 
				-		}
			
 
				-		gfx := AMDGFXVersions()
			
 
				-		tooOld := false
			
 
				-		for _, v := range gfx {
			
 
				-			if v.Major < 9 {
			
 
				-				slog.Info("AMD GPU too old, falling back to CPU " + v.ToGFXString())
			
 
				-				tooOld = true
			
 
				-				break
			
 
				-			}
			
 
				-
			
 
				-			// TODO - remap gfx strings for unsupporetd minor/patch versions to supported for the same major
			
 
				-			// e.g. gfx1034 works if we map it to gfx1030 at runtime
			
 
				-
			
 
				-		}
			
 
				-		if !tooOld {
			
 
				-			// TODO - this algo can be shifted over to use sysfs instead of the rocm info library...
			
 
				-			C.rocm_check_vram(*gpuHandles.rocm, &memInfo)
			
 
				-			if memInfo.err != nil {
			
 
				-				slog.Info(fmt.Sprintf("error looking up ROCm GPU memory: %s", C.GoString(memInfo.err)))
			
 
				-				C.free(unsafe.Pointer(memInfo.err))
			
 
				-			} else if memInfo.igpu_index >= 0 && memInfo.count == 1 {
			
 
				-				// Only one GPU detected and it appears to be an integrated GPU - skip it
			
 
				-				slog.Info("ROCm unsupported integrated GPU detected")
			
 
				-			} else if memInfo.count > 0 {
			
 
				-				if memInfo.igpu_index >= 0 {
			
 
				-					// We have multiple GPUs reported, and one of them is an integrated GPU
			
 
				-					// so we have to set the env var to bypass it
			
 
				-					// If the user has specified their own ROCR_VISIBLE_DEVICES, don't clobber it
			
 
				-					val := os.Getenv("ROCR_VISIBLE_DEVICES")
			
 
				-					if val == "" {
			
 
				-						devices := []string{}
			
 
				-						for i := 0; i < int(memInfo.count); i++ {
			
 
				-							if i == int(memInfo.igpu_index) {
			
 
				-								continue
			
 
				-							}
			
 
				-							devices = append(devices, strconv.Itoa(i))
			
 
				-						}
			
 
				-						val = strings.Join(devices, ",")
			
 
				-						os.Setenv("ROCR_VISIBLE_DEVICES", val)
			
 
				-					}
			
 
				-					slog.Info(fmt.Sprintf("ROCm integrated GPU detected - ROCR_VISIBLE_DEVICES=%s", val))
			
 
				-				}
			
 
				-				resp.Library = "rocm"
			
 
				-				var version C.rocm_version_resp_t
			
 
				-				C.rocm_get_version(*gpuHandles.rocm, &version)
			
 
				-				verString := C.GoString(version.str)
			
 
				-				if version.status == 0 {
			
 
				-					resp.Variant = "v" + verString
			
 
				-				} else {
			
 
				-					slog.Info(fmt.Sprintf("failed to look up ROCm version: %s", verString))
			
 
				-				}
			
 
				-				C.free(unsafe.Pointer(version.str))
			
 
				-			}
			
 
				+	} else {
			
 
				+		AMDGetGPUInfo(&resp)
			
 
				+		if resp.Library != "" {
			
 
				+			return resp
			
 
				 		}
			
 
				 	}
			
 
				 	if resp.Library == "" {
			
@@ -338,23 +255,6 @@ func LoadCUDAMgmt(cudaLibPaths []string) *C.cuda_handle_t {
 
				 	return nil
			
 
				 }
			
 
				 
			
 
				-func LoadROCMMgmt(rocmLibPaths []string) *C.rocm_handle_t {
			
 
				-	var resp C.rocm_init_resp_t
			
 
				-	resp.rh.verbose = getVerboseState()
			
 
				-	for _, libPath := range rocmLibPaths {
			
 
				-		lib := C.CString(libPath)
			
 
				-		defer C.free(unsafe.Pointer(lib))
			
 
				-		C.rocm_init(lib, &resp)
			
 
				-		if resp.err != nil {
			
 
				-			slog.Info(fmt.Sprintf("Unable to load ROCm management library %s: %s", libPath, C.GoString(resp.err)))
			
 
				-			C.free(unsafe.Pointer(resp.err))
			
 
				-		} else {
			
 
				-			return &resp.rh
			
 
				-		}
			
 
				-	}
			
 
				-	return nil
			
 
				-}
			
 
				-
			
 
				 func getVerboseState() C.uint16_t {
			
 
				 	if debug := os.Getenv("OLLAMA_DEBUG"); debug != "" {
			
 
				 		return C.uint16_t(1)
			
--- a/gpu/gpu_info.h
+++ b/gpu/gpu_info.h
@@ -53,7 +53,6 @@ void cpu_check_ram(mem_info_t *resp);
 
				 #endif
			
 
				 
			
 
				 #include "gpu_info_cuda.h"
			
 
				-#include "gpu_info_rocm.h"
			
 
				 
			
 
				 #endif  // __GPU_INFO_H__
			
 
				 #endif  // __APPLE__
			
--- a/gpu/gpu_info_cuda.c
+++ b/gpu/gpu_info_cuda.c
@@ -124,31 +124,31 @@ void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
 
				       // When in verbose mode, report more information about
			
 
				       // the card we discover, but don't fail on error
			
 
				       ret = (*h.nvmlDeviceGetName)(device, buf, buflen);
			
 
				-      if (ret != RSMI_STATUS_SUCCESS) {
			
 
				+      if (ret != NVML_SUCCESS) {
			
 
				         LOG(h.verbose, "nvmlDeviceGetName failed: %d\n", ret);
			
 
				       } else {
			
 
				         LOG(h.verbose, "[%d] CUDA device name: %s\n", i, buf);
			
 
				       }
			
 
				       ret = (*h.nvmlDeviceGetBoardPartNumber)(device, buf, buflen);
			
 
				-      if (ret != RSMI_STATUS_SUCCESS) {
			
 
				+      if (ret != NVML_SUCCESS) {
			
 
				         LOG(h.verbose, "nvmlDeviceGetBoardPartNumber failed: %d\n", ret);
			
 
				       } else {
			
 
				         LOG(h.verbose, "[%d] CUDA part number: %s\n", i, buf);
			
 
				       }
			
 
				       ret = (*h.nvmlDeviceGetSerial)(device, buf, buflen);
			
 
				-      if (ret != RSMI_STATUS_SUCCESS) {
			
 
				+      if (ret != NVML_SUCCESS) {
			
 
				         LOG(h.verbose, "nvmlDeviceGetSerial failed: %d\n", ret);
			
 
				       } else {
			
 
				         LOG(h.verbose, "[%d] CUDA S/N: %s\n", i, buf);
			
 
				       }
			
 
				       ret = (*h.nvmlDeviceGetVbiosVersion)(device, buf, buflen);
			
 
				-      if (ret != RSMI_STATUS_SUCCESS) {
			
 
				+      if (ret != NVML_SUCCESS) {
			
 
				         LOG(h.verbose, "nvmlDeviceGetVbiosVersion failed: %d\n", ret);
			
 
				       } else {
			
 
				         LOG(h.verbose, "[%d] CUDA vbios version: %s\n", i, buf);
			
 
				       }
			
 
				       ret = (*h.nvmlDeviceGetBrand)(device, &brand);
			
 
				-      if (ret != RSMI_STATUS_SUCCESS) {
			
 
				+      if (ret != NVML_SUCCESS) {
			
 
				         LOG(h.verbose, "nvmlDeviceGetBrand failed: %d\n", ret);
			
 
				       } else {
			
 
				         LOG(h.verbose, "[%d] CUDA brand: %d\n", i, brand);
			
--- a/gpu/gpu_info_rocm.c
+++ b/gpu/gpu_info_rocm.c
@@ -1,198 +0,0 @@
 
				-#ifndef __APPLE__
			
 
				-
			
 
				-#include "gpu_info_rocm.h"
			
 
				-
			
 
				-#include <string.h>
			
 
				-
			
 
				-void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp) {
			
 
				-  rsmi_status_t ret;
			
 
				-  resp->err = NULL;
			
 
				-  const int buflen = 256;
			
 
				-  char buf[buflen + 1];
			
 
				-  int i;
			
 
				-  struct lookup {
			
 
				-    char *s;
			
 
				-    void **p;
			
 
				-  } l[] = {
			
 
				-      {"rsmi_init", (void *)&resp->rh.rsmi_init},
			
 
				-      {"rsmi_shut_down", (void *)&resp->rh.rsmi_shut_down},
			
 
				-      {"rsmi_dev_memory_total_get", (void *)&resp->rh.rsmi_dev_memory_total_get},
			
 
				-      {"rsmi_dev_memory_usage_get", (void *)&resp->rh.rsmi_dev_memory_usage_get},
			
 
				-      {"rsmi_version_get", (void *)&resp->rh.rsmi_version_get},
			
 
				-      {"rsmi_num_monitor_devices", (void*)&resp->rh.rsmi_num_monitor_devices},
			
 
				-      {"rsmi_dev_id_get", (void*)&resp->rh.rsmi_dev_id_get},
			
 
				-      {"rsmi_dev_name_get", (void *)&resp->rh.rsmi_dev_name_get},
			
 
				-      {"rsmi_dev_brand_get", (void *)&resp->rh.rsmi_dev_brand_get},
			
 
				-      {"rsmi_dev_vendor_name_get", (void *)&resp->rh.rsmi_dev_vendor_name_get},
			
 
				-      {"rsmi_dev_vram_vendor_get", (void *)&resp->rh.rsmi_dev_vram_vendor_get},
			
 
				-      {"rsmi_dev_serial_number_get", (void *)&resp->rh.rsmi_dev_serial_number_get},
			
 
				-      {"rsmi_dev_subsystem_name_get", (void *)&resp->rh.rsmi_dev_subsystem_name_get},
			
 
				-      {"rsmi_dev_vbios_version_get", (void *)&resp->rh.rsmi_dev_vbios_version_get},
			
 
				-      {NULL, NULL},
			
 
				-  };
			
 
				-
			
 
				-  resp->rh.handle = LOAD_LIBRARY(rocm_lib_path, RTLD_LAZY);
			
 
				-  if (!resp->rh.handle) {
			
 
				-    char *msg = LOAD_ERR();
			
 
				-    snprintf(buf, buflen,
			
 
				-             "Unable to load %s library to query for Radeon GPUs: %s\n",
			
 
				-             rocm_lib_path, msg);
			
 
				-    free(msg);
			
 
				-    resp->err = strdup(buf);
			
 
				-    return;
			
 
				-  }
			
 
				-
			
 
				-  // TODO once we've squashed the remaining corner cases remove this log
			
 
				-  LOG(resp->rh.verbose, "wiring rocm management library functions in %s\n", rocm_lib_path);
			
 
				-
			
 
				-  for (i = 0; l[i].s != NULL; i++) {
			
 
				-    // TODO once we've squashed the remaining corner cases remove this log
			
 
				-    LOG(resp->rh.verbose, "dlsym: %s\n", l[i].s);
			
 
				-
			
 
				-    *l[i].p = LOAD_SYMBOL(resp->rh.handle, l[i].s);
			
 
				-    if (!l[i].p) {
			
 
				-      resp->rh.handle = NULL;
			
 
				-      char *msg = LOAD_ERR();
			
 
				-      LOG(resp->rh.verbose, "dlerr: %s\n", msg);
			
 
				-      UNLOAD_LIBRARY(resp->rh.handle);
			
 
				-      snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
			
 
				-               msg);
			
 
				-      free(msg);
			
 
				-      resp->err = strdup(buf);
			
 
				-      return;
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  ret = (*resp->rh.rsmi_init)(0);
			
 
				-  if (ret != RSMI_STATUS_SUCCESS) {
			
 
				-    LOG(resp->rh.verbose, "rsmi_init err: %d\n", ret);
			
 
				-    UNLOAD_LIBRARY(resp->rh.handle);
			
 
				-    resp->rh.handle = NULL;
			
 
				-    snprintf(buf, buflen, "rocm vram init failure: %d", ret);
			
 
				-    resp->err = strdup(buf);
			
 
				-  }
			
 
				-
			
 
				-  return;
			
 
				-}
			
 
				-
			
 
				-void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) {
			
 
				-  resp->err = NULL;
			
 
				-  resp->igpu_index = -1;
			
 
				-  uint64_t totalMem = 0;
			
 
				-  uint64_t usedMem = 0;
			
 
				-  rsmi_status_t ret;
			
 
				-  const int buflen = 256;
			
 
				-  char buf[buflen + 1];
			
 
				-  int i;
			
 
				-
			
 
				-  if (h.handle == NULL) {
			
 
				-    resp->err = strdup("rocm handle not initialized");
			
 
				-    return;
			
 
				-  }
			
 
				-
			
 
				-  ret = (*h.rsmi_num_monitor_devices)(&resp->count);
			
 
				-  if (ret != RSMI_STATUS_SUCCESS) {
			
 
				-    snprintf(buf, buflen, "unable to get device count: %d", ret);
			
 
				-    resp->err = strdup(buf);
			
 
				-    return;
			
 
				-  }
			
 
				-  LOG(h.verbose, "discovered %d ROCm GPU Devices\n", resp->count);
			
 
				-
			
 
				-  resp->total = 0;
			
 
				-  resp->free = 0;
			
 
				-  for (i = 0; i < resp->count; i++) {
			
 
				-    if (h.verbose) {
			
 
				-      // When in verbose mode, report more information about
			
 
				-      // the card we discover, but don't fail on error
			
 
				-      ret = (*h.rsmi_dev_name_get)(i, buf, buflen);
			
 
				-      if (ret != RSMI_STATUS_SUCCESS) {
			
 
				-        LOG(h.verbose, "rsmi_dev_name_get failed: %d\n", ret);
			
 
				-      } else {
			
 
				-        LOG(h.verbose, "[%d] ROCm device name: %s\n", i, buf);
			
 
				-      }
			
 
				-      ret = (*h.rsmi_dev_brand_get)(i, buf, buflen);
			
 
				-      if (ret != RSMI_STATUS_SUCCESS) {
			
 
				-        LOG(h.verbose, "rsmi_dev_brand_get failed: %d\n", ret);
			
 
				-      } else {
			
 
				-        LOG(h.verbose, "[%d] ROCm brand: %s\n", i, buf);
			
 
				-      }
			
 
				-      ret = (*h.rsmi_dev_vendor_name_get)(i, buf, buflen);
			
 
				-      if (ret != RSMI_STATUS_SUCCESS) {
			
 
				-        LOG(h.verbose, "rsmi_dev_vendor_name_get failed: %d\n", ret);
			
 
				-      } else {
			
 
				-        LOG(h.verbose, "[%d] ROCm vendor: %s\n", i, buf);
			
 
				-      }
			
 
				-      ret = (*h.rsmi_dev_vram_vendor_get)(i, buf, buflen);
			
 
				-      if (ret != RSMI_STATUS_SUCCESS) {
			
 
				-        LOG(h.verbose, "rsmi_dev_vram_vendor_get failed: %d\n", ret);
			
 
				-      } else {
			
 
				-        LOG(h.verbose, "[%d] ROCm VRAM vendor: %s\n", i, buf);
			
 
				-      }
			
 
				-      ret = (*h.rsmi_dev_serial_number_get)(i, buf, buflen);
			
 
				-      if (ret != RSMI_STATUS_SUCCESS) {
			
 
				-        LOG(h.verbose, "rsmi_dev_serial_number_get failed: %d\n", ret);
			
 
				-      } else {
			
 
				-        LOG(h.verbose, "[%d] ROCm S/N: %s\n", i, buf);
			
 
				-      }
			
 
				-      ret = (*h.rsmi_dev_subsystem_name_get)(i, buf, buflen);
			
 
				-      if (ret != RSMI_STATUS_SUCCESS) {
			
 
				-        LOG(h.verbose, "rsmi_dev_subsystem_name_get failed: %d\n", ret);
			
 
				-      } else {
			
 
				-        LOG(h.verbose, "[%d] ROCm subsystem name: %s\n", i, buf);
			
 
				-      }
			
 
				-      ret = (*h.rsmi_dev_vbios_version_get)(i, buf, buflen);
			
 
				-      if (ret != RSMI_STATUS_SUCCESS) {
			
 
				-        LOG(h.verbose, "rsmi_dev_vbios_version_get failed: %d\n", ret);
			
 
				-      } else {
			
 
				-        LOG(h.verbose, "[%d] ROCm vbios version: %s\n", i, buf);
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    // Get total memory - used memory for available memory
			
 
				-    ret = (*h.rsmi_dev_memory_total_get)(i, RSMI_MEM_TYPE_VRAM, &totalMem);
			
 
				-    if (ret != RSMI_STATUS_SUCCESS) {
			
 
				-      snprintf(buf, buflen, "rocm total mem lookup failure: %d", ret);
			
 
				-      resp->err = strdup(buf);
			
 
				-      return;
			
 
				-    }
			
 
				-    ret = (*h.rsmi_dev_memory_usage_get)(i, RSMI_MEM_TYPE_VRAM, &usedMem);
			
 
				-    if (ret != RSMI_STATUS_SUCCESS) {
			
 
				-      snprintf(buf, buflen, "rocm usage mem lookup failure: %d", ret);
			
 
				-      resp->err = strdup(buf);
			
 
				-      return;
			
 
				-    }
			
 
				-    LOG(h.verbose, "[%d] ROCm totalMem %ld\n", i, totalMem);
			
 
				-    LOG(h.verbose, "[%d] ROCm usedMem %ld\n", i, usedMem);
			
 
				-    if (totalMem < 1024 * 1024 * 1024) {
			
 
				-      // Do not add up integrated GPU memory capacity, it's a bogus 512M, and actually uses system memory
			
 
				-      LOG(h.verbose, "[%d] ROCm integrated GPU\n", i);
			
 
				-      resp->igpu_index = i;
			
 
				-    } else {
			
 
				-      resp->total += totalMem;
			
 
				-      resp->free += totalMem - usedMem;
			
 
				-    }
			
 
				-  }
			
 
				-}
			
 
				-
			
 
				-void rocm_get_version(rocm_handle_t h, rocm_version_resp_t *resp) {
			
 
				-  const int buflen = 256;
			
 
				-  char buf[buflen + 1];
			
 
				-  if (h.handle == NULL) {
			
 
				-    resp->str = strdup("rocm handle not initialized");
			
 
				-    resp->status = 1;
			
 
				-    return;
			
 
				-  }
			
 
				-  rsmi_version_t ver;
			
 
				-  rsmi_status_t ret;
			
 
				-  ret = h.rsmi_version_get(&ver);
			
 
				-  if (ret != RSMI_STATUS_SUCCESS) {
			
 
				-    snprintf(buf, buflen, "unexpected response on version lookup %d", ret);
			
 
				-    resp->status = 1;
			
 
				-  } else {
			
 
				-    snprintf(buf, buflen, "%d", ver.major);
			
 
				-    resp->status = 0;
			
 
				-  }
			
 
				-  resp->str = strdup(buf);
			
 
				-}
			
 
				-
			
 
				-#endif  // __APPLE__
			
--- a/gpu/gpu_info_rocm.h
+++ b/gpu/gpu_info_rocm.h
@@ -1,59 +0,0 @@
 
				-#ifndef __APPLE__
			
 
				-#ifndef __GPU_INFO_ROCM_H__
			
 
				-#define __GPU_INFO_ROCM_H__
			
 
				-#include "gpu_info.h"
			
 
				-
			
 
				-// Just enough typedef's to dlopen/dlsym for memory information
			
 
				-typedef enum rsmi_status_return {
			
 
				-  RSMI_STATUS_SUCCESS = 0,
			
 
				-  // Other values omitted for now...
			
 
				-} rsmi_status_t;
			
 
				-
			
 
				-typedef enum rsmi_memory_type {
			
 
				-  RSMI_MEM_TYPE_VRAM = 0,
			
 
				-  RSMI_MEM_TYPE_VIS_VRAM,
			
 
				-  RSMI_MEM_TYPE_GTT,
			
 
				-} rsmi_memory_type_t;
			
 
				-
			
 
				- typedef struct {
			
 
				-     uint32_t major;     
			
 
				-     uint32_t minor;     
			
 
				-     uint32_t patch;     
			
 
				-     const char *build;  
			
 
				- } rsmi_version_t;
			
 
				-
			
 
				-typedef struct rocm_handle {
			
 
				-  void *handle;
			
 
				-  uint16_t verbose;
			
 
				-  rsmi_status_t (*rsmi_init)(uint64_t);
			
 
				-  rsmi_status_t (*rsmi_shut_down)(void);
			
 
				-  rsmi_status_t (*rsmi_dev_memory_total_get)(uint32_t, rsmi_memory_type_t, uint64_t *);
			
 
				-  rsmi_status_t (*rsmi_dev_memory_usage_get)(uint32_t, rsmi_memory_type_t, uint64_t *);
			
 
				-  rsmi_status_t (*rsmi_version_get) (rsmi_version_t *version);
			
 
				-  rsmi_status_t (*rsmi_num_monitor_devices) (uint32_t *);
			
 
				-  rsmi_status_t (*rsmi_dev_id_get)(uint32_t, uint16_t *);
			
 
				-  rsmi_status_t (*rsmi_dev_name_get) (uint32_t,char *,size_t);
			
 
				-  rsmi_status_t (*rsmi_dev_brand_get) (uint32_t, char *, uint32_t);		
			
 
				-  rsmi_status_t (*rsmi_dev_vendor_name_get) (uint32_t, char *, uint32_t);		
			
 
				-  rsmi_status_t (*rsmi_dev_vram_vendor_get) (uint32_t, char *, uint32_t);		
			
 
				-  rsmi_status_t (*rsmi_dev_serial_number_get) (uint32_t, char *, uint32_t);		
			
 
				-  rsmi_status_t (*rsmi_dev_subsystem_name_get) (uint32_t, char *, uint32_t);		
			
 
				-  rsmi_status_t (*rsmi_dev_vbios_version_get) (uint32_t, char *, uint32_t);		
			
 
				-} rocm_handle_t;
			
 
				-
			
 
				-typedef struct rocm_init_resp {
			
 
				-  char *err;  // If err is non-null handle is invalid
			
 
				-  rocm_handle_t rh;
			
 
				-} rocm_init_resp_t;
			
 
				-
			
 
				-typedef struct rocm_version_resp {
			
 
				-  rsmi_status_t status;
			
 
				-  char *str; // Contains version or error string if status != 0 
			
 
				-} rocm_version_resp_t;
			
 
				-
			
 
				-void rocm_init(char *rocm_lib_path, rocm_init_resp_t *resp);
			
 
				-void rocm_check_vram(rocm_handle_t rh, mem_info_t *resp);
			
 
				-void rocm_get_version(rocm_handle_t rh, rocm_version_resp_t *resp);
			
 
				-
			
 
				-#endif  // __GPU_INFO_ROCM_H__
			
 
				-#endif  // __APPLE__
			
--- a/llm/dyn_ext_server.c
+++ b/llm/dyn_ext_server.c
@@ -14,17 +14,14 @@
 
				 #define LOAD_LIBRARY(lib, flags) LoadLibrary(lib)
			
 
				 #define LOAD_SYMBOL(handle, sym) GetProcAddress(handle, sym)
			
 
				 #define UNLOAD_LIBRARY(handle) FreeLibrary(handle)
			
 
				-inline char *LOAD_ERR() {
			
 
				-  LPSTR messageBuffer = NULL;
			
 
				-  size_t size = FormatMessageA(
			
 
				-      FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
			
 
				-          FORMAT_MESSAGE_IGNORE_INSERTS,
			
 
				-      NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
			
 
				-      (LPSTR)&messageBuffer, 0, NULL);
			
 
				-  char *resp = strdup(messageBuffer);
			
 
				-  LocalFree(messageBuffer);
			
 
				-  return resp;
			
 
				-}
			
 
				+#define LOAD_ERR() ({\
			
 
				+  LPSTR messageBuffer = NULL; \
			
 
				+  size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, \
			
 
				+                                 NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL); \
			
 
				+  char *resp = strdup(messageBuffer); \
			
 
				+  LocalFree(messageBuffer); \
			
 
				+  resp; \
			
 
				+})
			
 
				 #else
			
 
				 #include <dlfcn.h>
			
 
				 #define LOAD_LIBRARY(lib, flags) dlopen(lib, flags)
			
--- a/llm/dyn_ext_server.go
+++ b/llm/dyn_ext_server.go
@@ -28,13 +28,13 @@ import (
 
				 	"log/slog"
			
 
				 	"os"
			
 
				 	"path/filepath"
			
 
				-	"runtime"
			
 
				 	"strings"
			
 
				 	"sync"
			
 
				 	"time"
			
 
				 	"unsafe"
			
 
				 
			
 
				 	"github.com/jmorganca/ollama/api"
			
 
				+	"github.com/jmorganca/ollama/gpu"
			
 
				 )
			
 
				 
			
 
				 type dynExtServer struct {
			
@@ -72,7 +72,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
 
				 		slog.Info("concurrent llm servers not yet supported, waiting for prior server to complete")
			
 
				 		mutex.Lock()
			
 
				 	}
			
 
				-	updatePath(filepath.Dir(library))
			
 
				+	gpu.UpdatePath(filepath.Dir(library))
			
 
				 	libPath := C.CString(library)
			
 
				 	defer C.free(unsafe.Pointer(libPath))
			
 
				 	resp := newExtServerResp(512)
			
@@ -148,6 +148,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
 
				 	}
			
 
				 
			
 
				 	slog.Info("Initializing llama server")
			
 
				+	slog.Debug(fmt.Sprintf("server params: %+v", sparams))
			
 
				 	initResp := newExtServerResp(128)
			
 
				 	defer freeExtServerResp(initResp)
			
 
				 	C.dyn_llama_server_init(llm.s, &sparams, &initResp)
			
@@ -365,25 +366,3 @@ func (llm *dynExtServer) Close() {
 
				 	C.dyn_llama_server_stop(llm.s)
			
 
				 	mutex.Unlock()
			
 
				 }
			
 
				-
			
 
				-func updatePath(dir string) {
			
 
				-	if runtime.GOOS == "windows" {
			
 
				-		tmpDir := filepath.Dir(dir)
			
 
				-		pathComponents := strings.Split(os.Getenv("PATH"), ";")
			
 
				-		i := 0
			
 
				-		for _, comp := range pathComponents {
			
 
				-			if strings.EqualFold(comp, dir) {
			
 
				-				return
			
 
				-			}
			
 
				-			// Remove any other prior paths to our temp dir
			
 
				-			if !strings.HasPrefix(strings.ToLower(comp), strings.ToLower(tmpDir)) {
			
 
				-				pathComponents[i] = comp
			
 
				-				i++
			
 
				-			}
			
 
				-		}
			
 
				-		newPath := strings.Join(append([]string{dir}, pathComponents...), ";")
			
 
				-		slog.Info(fmt.Sprintf("Updating PATH to %s", newPath))
			
 
				-		os.Setenv("PATH", newPath)
			
 
				-	}
			
 
				-	// linux and darwin rely on rpath
			
 
				-}
			
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -179,17 +179,21 @@ fi
 
				 
			
 
				 if [ -d "${ROCM_PATH}" ]; then
			
 
				     echo "ROCm libraries detected - building dynamic ROCm library"
			
 
				-    if [ -f ${ROCM_PATH}/lib/librocm_smi64.so.? ]; then
			
 
				-        ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocm_smi64.so.? | cut -f3 -d. || true)
			
 
				+    if [ -f ${ROCM_PATH}/lib/librocblas.so.*.*.????? ]; then
			
 
				+        ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)
			
 
				     fi
			
 
				     init_vars
			
 
				     CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
			
 
				     BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/rocm${ROCM_VARIANT}"
			
 
				-    EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,${ROCM_PATH}/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
			
 
				+    EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,\$ORIGIN/../rocm/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
			
 
				     build
			
 
				 
			
 
				-    # Note: the ROCM libs and runtime library files are too large to embed, so we depend on
			
 
				-    #       them being present at runtime on the host
			
 
				+    # Record the ROCM dependencies
			
 
				+    rm -f "${BUILD_DIR}/lib/deps.txt"
			
 
				+    touch "${BUILD_DIR}/lib/deps.txt"
			
 
				+    for dep in $(ldd "${BUILD_DIR}/lib/libext_server.so" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo ); do
			
 
				+        echo "${dep}" >> "${BUILD_DIR}/lib/deps.txt"
			
 
				+    done
			
 
				     compress_libs
			
 
				 fi
			
 
				 
			
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -2,19 +2,52 @@
 
				 
			
 
				 $ErrorActionPreference = "Stop"
			
 
				 
			
 
				+function amdGPUs {
			
 
				+    if ($env:AMDGPU_TARGETS) {
			
 
				+        return $env:AMDGPU_TARGETS
			
 
				+    }
			
 
				+    # TODO - load from some common data file for linux + windows build consistency
			
 
				+    $GPU_LIST = @(
			
 
				+        "gfx900"
			
 
				+        "gfx906:xnack-"
			
 
				+        "gfx908:xnack-"
			
 
				+        "gfx90a:xnack+"
			
 
				+        "gfx90a:xnack-"
			
 
				+        "gfx1010"
			
 
				+        "gfx1012"
			
 
				+        "gfx1030"
			
 
				+        "gfx1100"
			
 
				+        "gfx1101"
			
 
				+        "gfx1102"
			
 
				+    )
			
 
				+    $GPU_LIST -join ';'
			
 
				+}
			
 
				+
			
 
				 function init_vars {
			
 
				+    # Verify the environment is a Developer Shell for MSVC 2019
			
 
				+    write-host $env:VSINSTALLDIR
			
 
				+    if (($env:VSINSTALLDIR -eq $null)) {
			
 
				+        Write-Error "`r`nBUILD ERROR - YOUR DEVELOPMENT ENVIRONMENT IS NOT SET UP CORRECTLY`r`nTo build Ollama you must run from an MSVC Developer Shell`r`nSee .\docs\development.md for instructions to set up your dev environment"
			
 
				+        exit 1
			
 
				+    }
			
 
				     $script:SRC_DIR = $(resolve-path "..\..\")
			
 
				     $script:llamacppDir = "../llama.cpp"
			
 
				-    $script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off",  "-A", "x64")
			
 
				+    $script:cmakeDefs = @(
			
 
				+        "-DBUILD_SHARED_LIBS=on",
			
 
				+        "-DLLAMA_NATIVE=off"
			
 
				+        )
			
 
				     $script:cmakeTargets = @("ext_server")
			
 
				     $script:ARCH = "amd64" # arm not yet supported.
			
 
				     if ($env:CGO_CFLAGS -contains "-g") {
			
 
				-        $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on")
			
 
				+        $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
			
 
				         $script:config = "RelWithDebInfo"
			
 
				     } else {
			
 
				-        $script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off")
			
 
				+        $script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off", "-DCMAKE_BUILD_TYPE=Release")
			
 
				         $script:config = "Release"
			
 
				     }
			
 
				+    if ($null -ne $env:CMAKE_SYSTEM_VERSION) {
			
 
				+        $script:cmakeDefs += @("-DCMAKE_SYSTEM_VERSION=${env:CMAKE_SYSTEM_VERSION}")
			
 
				+    }
			
 
				     # Try to find the CUDA dir
			
 
				     if ($env:CUDA_LIB_DIR -eq $null) {
			
 
				         $d=(get-command -ea 'silentlycontinue' nvcc).path
			
@@ -157,7 +190,7 @@ apply_patches
 
				 $script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
			
 
				 
			
 
				 init_vars
			
 
				-$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
			
 
				+$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
			
 
				 $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu"
			
 
				 write-host "Building LCD CPU"
			
 
				 build
			
@@ -166,7 +199,7 @@ sign
 
				 compress_libs
			
 
				 
			
 
				 init_vars
			
 
				-$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
			
 
				+$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
			
 
				 $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx"
			
 
				 write-host "Building AVX CPU"
			
 
				 build
			
@@ -175,7 +208,7 @@ sign
 
				 compress_libs
			
 
				 
			
 
				 init_vars
			
 
				-$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
			
 
				+$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
			
 
				 $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx2"
			
 
				 write-host "Building AVX2 CPU"
			
 
				 build
			
@@ -192,18 +225,51 @@ if ($null -ne $script:CUDA_LIB_DIR) {
 
				     }
			
 
				     init_vars
			
 
				     $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
			
 
				-    $script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
			
 
				+    $script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
			
 
				+    write-host "Building CUDA"
			
 
				     build
			
 
				     install
			
 
				     sign
			
 
				     compress_libs
			
 
				 }
			
 
				-# TODO - actually implement ROCm support on windows
			
 
				-$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/rocm"
			
 
				 
			
 
				-rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
			
 
				-md "${script:buildDir}/lib" -ea 0 > $null
			
 
				-echo $null >> "${script:buildDir}/lib/.generated"
			
 
				+if ($null -ne $env:HIP_PATH) {
			
 
				+    $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
			
 
				+    if ($null -ne $script:ROCM_VERSION) {
			
 
				+        $script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
			
 
				+    }
			
 
				+
			
 
				+    init_vars
			
 
				+    $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
			
 
				+    $script:cmakeDefs += @(
			
 
				+        "-G", "Ninja", 
			
 
				+        "-DCMAKE_C_COMPILER=clang.exe",
			
 
				+        "-DCMAKE_CXX_COMPILER=clang++.exe",
			
 
				+        "-DLLAMA_HIPBLAS=on",
			
 
				+        "-DLLAMA_AVX=on",
			
 
				+        "-DLLAMA_AVX2=off",
			
 
				+        "-DCMAKE_POSITION_INDEPENDENT_CODE=on",
			
 
				+        "-DAMDGPU_TARGETS=$(amdGPUs)",
			
 
				+        "-DGPU_TARGETS=$(amdGPUs)"
			
 
				+        )
			
 
				+
			
 
				+    # Make sure the ROCm binary dir is first in the path
			
 
				+    $env:PATH="$env:HIP_PATH\bin;$env:VSINSTALLDIR\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja;$env:PATH"
			
 
				+
			
 
				+    # We have to clobber the LIB var from the developer shell for clang to work properly
			
 
				+    $env:LIB=""
			
 
				+
			
 
				+    write-host "Building ROCm"
			
 
				+    build
			
 
				+    # Ninja doesn't prefix with config name
			
 
				+    ${script:config}=""
			
 
				+    install
			
 
				+    if ($null -ne $script:DUMPBIN) {
			
 
				+        & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/${script:config}/ext_server.dll" | select-string ".dll"
			
 
				+    }
			
 
				+    sign
			
 
				+    compress_libs
			
 
				+}
			
 
				 
			
 
				 cleanup
			
 
				 write-host "`ngo generate completed"
			
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -19,7 +19,7 @@ type LLM interface {
 
				 	Close()
			
 
				 }
			
 
				 
			
 
				-func New(workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
			
 
				+func New(model string, adapters, projectors []string, opts api.Options) (LLM, error) {
			
 
				 	if _, err := os.Stat(model); err != nil {
			
 
				 		return nil, err
			
 
				 	}
			
@@ -120,15 +120,15 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
 
				 
			
 
				 	opts.RopeFrequencyBase = 0.0
			
 
				 	opts.RopeFrequencyScale = 0.0
			
 
				-	return newLlmServer(info, workDir, model, adapters, projectors, opts)
			
 
				+	return newLlmServer(info, model, adapters, projectors, opts)
			
 
				 }
			
 
				 
			
 
				 // Give any native cgo implementations an opportunity to initialize
			
 
				-func Init(workdir string) error {
			
 
				-	return nativeInit(workdir)
			
 
				+func Init() error {
			
 
				+	return nativeInit()
			
 
				 }
			
 
				 
			
 
				-func newLlmServer(gpuInfo gpu.GpuInfo, workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
			
 
				+func newLlmServer(gpuInfo gpu.GpuInfo, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
			
 
				 	dynLibs := getDynLibs(gpuInfo)
			
 
				 
			
 
				 	// Check to see if the user has requested a specific library instead of auto-detecting
			
@@ -147,7 +147,7 @@ func newLlmServer(gpuInfo gpu.GpuInfo, workDir, model string, adapters, projecto
 
				 	_, err := os.Stat(dynLibs[0])
			
 
				 	if err != nil {
			
 
				 		slog.Info(fmt.Sprintf("%s has disappeared, reloading libraries", dynLibs[0]))
			
 
				-		err = nativeInit(workDir)
			
 
				+		err = nativeInit()
			
 
				 		if err != nil {
			
 
				 			return nil, err
			
 
				 		}
			
--- a/llm/payload_common.go
+++ b/llm/payload_common.go
@@ -103,10 +103,14 @@ func rocmDynLibPresent() bool {
 
				 	return false
			
 
				 }
			
 
				 
			
 
				-func nativeInit(workdir string) error {
			
 
				+func nativeInit() error {
			
 
				 	slog.Info("Extracting dynamic libraries...")
			
 
				+	assetsDir, err := gpu.AssetsDir()
			
 
				+	if err != nil {
			
 
				+		return err
			
 
				+	}
			
 
				 	if runtime.GOOS == "darwin" {
			
 
				-		err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
			
 
				+		err := extractPayloadFiles(assetsDir, "llama.cpp/ggml-metal.metal")
			
 
				 		if err != nil {
			
 
				 			if err == payloadMissing {
			
 
				 				// TODO perhaps consider this a hard failure on arm macs?
			
@@ -115,10 +119,10 @@ func nativeInit(workdir string) error {
 
				 			}
			
 
				 			return err
			
 
				 		}
			
 
				-		os.Setenv("GGML_METAL_PATH_RESOURCES", workdir)
			
 
				+		os.Setenv("GGML_METAL_PATH_RESOURCES", assetsDir)
			
 
				 	}
			
 
				 
			
 
				-	libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/*/lib/*")
			
 
				+	libs, err := extractDynamicLibs(assetsDir, "llama.cpp/build/*/*/*/lib/*")
			
 
				 	if err != nil {
			
 
				 		if err == payloadMissing {
			
 
				 			slog.Info(fmt.Sprintf("%s", payloadMissing))
			
@@ -149,17 +153,13 @@ func nativeInit(workdir string) error {
 
				 	return nil
			
 
				 }
			
 
				 
			
 
				-func extractDynamicLibs(workDir, glob string) ([]string, error) {
			
 
				+func extractDynamicLibs(assetsDir, glob string) ([]string, error) {
			
 
				 	files, err := fs.Glob(libEmbed, glob)
			
 
				 	if err != nil || len(files) == 0 {
			
 
				 		return nil, payloadMissing
			
 
				 	}
			
 
				 	libs := []string{}
			
 
				 
			
 
				-	// TODO consider making this idempotent with some sort of persistent directory (where we store models probably)
			
 
				-	// and tracking by version so we don't reexpand the files every time
			
 
				-	// Also maybe consider lazy loading only what is needed
			
 
				-
			
 
				 	g := new(errgroup.Group)
			
 
				 	for _, file := range files {
			
 
				 		pathComps := strings.Split(file, "/")
			
@@ -172,14 +172,14 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
 
				 		g.Go(func() error {
			
 
				 			// llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY
			
 
				 			// Include the variant in the path to avoid conflicts between multiple server libs
			
 
				-			targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
			
 
				+			targetDir := filepath.Join(assetsDir, pathComps[pathComponentCount-3])
			
 
				 			srcFile, err := libEmbed.Open(file)
			
 
				 			if err != nil {
			
 
				 				return fmt.Errorf("read payload %s: %v", file, err)
			
 
				 			}
			
 
				 			defer srcFile.Close()
			
 
				 			if err := os.MkdirAll(targetDir, 0o755); err != nil {
			
 
				-				return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
			
 
				+				return fmt.Errorf("create payload lib dir %s: %v", assetsDir, err)
			
 
				 			}
			
 
				 			src := io.Reader(srcFile)
			
 
				 			filename := file
			
@@ -196,19 +196,13 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
 
				 				libs = append(libs, destFile)
			
 
				 			}
			
 
				 
			
 
				-			_, err = os.Stat(destFile)
			
 
				-			switch {
			
 
				-			case errors.Is(err, os.ErrNotExist):
			
 
				-				destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
			
 
				-				if err != nil {
			
 
				-					return fmt.Errorf("write payload %s: %v", file, err)
			
 
				-				}
			
 
				-				defer destFile.Close()
			
 
				-				if _, err := io.Copy(destFile, src); err != nil {
			
 
				-					return fmt.Errorf("copy payload %s: %v", file, err)
			
 
				-				}
			
 
				-			case err != nil:
			
 
				-				return fmt.Errorf("stat payload %s: %v", file, err)
			
 
				+			destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
			
 
				+			if err != nil {
			
 
				+				return fmt.Errorf("write payload %s: %v", file, err)
			
 
				+			}
			
 
				+			defer destFp.Close()
			
 
				+			if _, err := io.Copy(destFp, src); err != nil {
			
 
				+				return fmt.Errorf("copy payload %s: %v", file, err)
			
 
				 			}
			
 
				 			return nil
			
 
				 		})
			
@@ -216,7 +210,7 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
 
				 	return libs, g.Wait()
			
 
				 }
			
 
				 
			
 
				-func extractPayloadFiles(workDir, glob string) error {
			
 
				+func extractPayloadFiles(assetsDir, glob string) error {
			
 
				 	files, err := fs.Glob(libEmbed, glob)
			
 
				 	if err != nil || len(files) == 0 {
			
 
				 		return payloadMissing
			
@@ -228,8 +222,8 @@ func extractPayloadFiles(workDir, glob string) error {
 
				 			return fmt.Errorf("read payload %s: %v", file, err)
			
 
				 		}
			
 
				 		defer srcFile.Close()
			
 
				-		if err := os.MkdirAll(workDir, 0o755); err != nil {
			
 
				-			return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
			
 
				+		if err := os.MkdirAll(assetsDir, 0o755); err != nil {
			
 
				+			return fmt.Errorf("create payload lib dir %s: %v", assetsDir, err)
			
 
				 		}
			
 
				 		src := io.Reader(srcFile)
			
 
				 		filename := file
			
@@ -241,20 +235,22 @@ func extractPayloadFiles(workDir, glob string) error {
 
				 			filename = strings.TrimSuffix(filename, ".gz")
			
 
				 		}
			
 
				 
			
 
				-		destFile := filepath.Join(workDir, filepath.Base(filename))
			
 
				+		destFile := filepath.Join(assetsDir, filepath.Base(filename))
			
 
				 		_, err = os.Stat(destFile)
			
 
				 		switch {
			
 
				 		case errors.Is(err, os.ErrNotExist):
			
 
				-			destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
			
 
				+			destFp, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
			
 
				 			if err != nil {
			
 
				 				return fmt.Errorf("write payload %s: %v", file, err)
			
 
				 			}
			
 
				-			defer destFile.Close()
			
 
				-			if _, err := io.Copy(destFile, src); err != nil {
			
 
				+			defer destFp.Close()
			
 
				+			if _, err := io.Copy(destFp, src); err != nil {
			
 
				 				return fmt.Errorf("copy payload %s: %v", file, err)
			
 
				 			}
			
 
				 		case err != nil:
			
 
				 			return fmt.Errorf("stat payload %s: %v", file, err)
			
 
				+		case err == nil:
			
 
				+			slog.Debug("payload already exists: " + destFile)
			
 
				 		}
			
 
				 	}
			
 
				 	return nil
			
--- a/llm/payload_linux.go
+++ b/llm/payload_linux.go
@@ -4,5 +4,5 @@ import (
 
				 	"embed"
			
 
				 )
			
 
				 
			
 
				-//go:embed llama.cpp/build/linux/*/*/lib/*.so*
			
 
				+//go:embed llama.cpp/build/linux/*/*/lib/*
			
 
				 var libEmbed embed.FS
			
--- a/scripts/build_linux.sh
+++ b/scripts/build_linux.sh
@@ -22,5 +22,6 @@ for TARGETARCH in ${BUILD_ARCH}; do
 
				         .
			
 
				     docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
			
 
				     docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH
			
 
				+    docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/dist/deps/ ./dist/
			
 
				     docker rm builder-$TARGETARCH
			
 
				 done
			
--- a/server/routes.go
+++ b/server/routes.go
@@ -66,8 +66,6 @@ var defaultSessionDuration = 5 * time.Minute
 
				 
			
 
				 // load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
			
 
				 func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.Duration) error {
			
 
				-	workDir := c.GetString("workDir")
			
 
				-
			
 
				 	needLoad := loaded.runner == nil || // is there a model loaded?
			
 
				 		loaded.ModelPath != model.ModelPath || // has the base model changed?
			
 
				 		!reflect.DeepEqual(loaded.AdapterPaths, model.AdapterPaths) || // have the adapters changed?
			
@@ -82,7 +80,7 @@ func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.D
 
				 			loaded.Options = nil
			
 
				 		}
			
 
				 
			
 
				-		llmRunner, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, model.ProjectorPaths, opts)
			
 
				+		llmRunner, err := llm.New(model.ModelPath, model.AdapterPaths, model.ProjectorPaths, opts)
			
 
				 		if err != nil {
			
 
				 			// some older models are not compatible with newer versions of llama.cpp
			
 
				 			// show a generalized compatibility error until there is a better way to
			
@@ -1035,7 +1033,7 @@ func Serve(ln net.Listener) error {
 
				 		os.Exit(0)
			
 
				 	}()
			
 
				 
			
 
				-	if err := llm.Init(s.WorkDir); err != nil {
			
 
				+	if err := llm.Init(); err != nil {
			
 
				 		return fmt.Errorf("unable to initialize llm library %w", err)
			
 
				 	}
			
 
				 	if runtime.GOOS == "linux" { // TODO - windows too