vor 2 Monaten · e91ae3d47d
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -111,13 +111,13 @@ jobs:
 
															           - os: windows
														
 
															             arch: amd64
														
 
															             preset: 'CUDA 12'
														
 
															-            install: https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe
														
 
															-            cuda-version: '12.4'
														
 
															+            install: https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_571.96_windows.exe
														
 
															+            cuda-version: '12.8'
														
 
															           - os: windows
														
 
															             arch: amd64
														
 
															             preset: 'ROCm 6'
														
 
															-            install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe
														
 
															-            rocm-version: '6.1'
														
 
															+            install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
														
 
															+            rocm-version: '6.2'
														
 
															     runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
														
 
															     environment: release
														
 
															     env:
														
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -81,7 +81,7 @@ jobs:
 
															             install: https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_522.06_windows.exe
														
 
															             flags: '-DCMAKE_CUDA_ARCHITECTURES=87'
														
 
															           - preset: ROCm
														
 
															-            install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe
														
 
															+            install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
														
 
															             flags: '-DAMDGPU_TARGETS=gfx1010'
														
 
															     runs-on: windows
														
 
															     steps:
														
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,20 +4,22 @@ ARG FLAVOR=${TARGETARCH}
 
															 ARG ROCMVERSION=6.3.3
														
 
															 ARG JETPACK5VERSION=r35.4.1
														
 
															-ARG JETPACK6VERSION=r36.2.0
														
 
															+ARG JETPACK6VERSION=r36.4.0
														
 
															 ARG CMAKEVERSION=3.31.2
														
 
															+# CUDA v11 requires gcc v10.  v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
														
 
															 FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
														
 
															-RUN sed -i -e 's/mirror.centos.org/vault.centos.org/g' -e 's/^#.*baseurl=http/baseurl=http/g' -e 's/^mirrorlist=http/#mirrorlist=http/g' /etc/yum.repos.d/*.repo \
														
 
															-    && yum install -y yum-utils gcc-toolset-11-gcc gcc-toolset-11-gcc-c++ \
														
 
															-    && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo \
														
 
															-    && curl -s -L https://github.com/ccache/ccache/releases/download/v4.10.2/ccache-4.10.2-linux-x86_64.tar.xz | tar -Jx -C /usr/local/bin --strip-components 1
														
 
															-ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH
														
 
															-
														
 
															-FROM --platform=linux/arm64 rockylinux:8 AS base-arm64
														
 
															+RUN yum install -y yum-utils \
														
 
															+    && yum-config-manager --add-repo https://dl.rockylinux.org/vault/rocky/8.5/AppStream/\$basearch/os/ \
														
 
															+    && rpm --import https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-8 \
														
 
															+    && dnf install -y yum-utils ccache gcc-toolset-10-gcc-10.2.1-8.2.el8 gcc-toolset-10-gcc-c++-10.2.1-8.2.el8 \
														
 
															+    && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
														
 
															+ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
														
 
															+
														
 
															+FROM --platform=linux/arm64 almalinux:8 AS base-arm64
														
 
															 # install epel-release for ccache
														
 
															 RUN yum install -y yum-utils epel-release \
														
 
															-    && yum install -y clang ccache \
														
 
															+    && dnf install -y clang ccache \
														
 
															     && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
														
 
															 ENV CC=clang CXX=clang++
														
@@ -29,7 +31,8 @@ COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 
															 ENV LDFLAGS=-s
														
 
															 FROM base AS cpu
														
 
															-# amd64 uses gcc which requires gcc-toolset-11 for AVX extensions while arm64 uses clang
														
 
															+RUN dnf install -y gcc-toolset-11-gcc gcc-toolset-11-gcc-c++
														
 
															+ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH
														
 
															 RUN --mount=type=cache,target=/root/.ccache \
														
 
															     cmake --preset 'CPU' \
														
 
															         && cmake --build --parallel --preset 'CPU' \
														
@@ -37,7 +40,7 @@ RUN --mount=type=cache,target=/root/.ccache \
 
															 FROM base AS cuda-11
														
 
															 ARG CUDA11VERSION=11.3
														
 
															-RUN yum install -y cuda-toolkit-${CUDA11VERSION//./-}
														
 
															+RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-}
														
 
															 ENV PATH=/usr/local/cuda-11/bin:$PATH
														
 
															 RUN --mount=type=cache,target=/root/.ccache \
														
 
															     cmake --preset 'CUDA 11' \
														
@@ -45,8 +48,8 @@ RUN --mount=type=cache,target=/root/.ccache \
 
															         && cmake --install build --component CUDA --strip --parallel 8
														
 
															 FROM base AS cuda-12
														
 
															-ARG CUDA12VERSION=12.4
														
 
															-RUN yum install -y cuda-toolkit-${CUDA12VERSION//./-}
														
 
															+ARG CUDA12VERSION=12.8
														
 
															+RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
														
 
															 ENV PATH=/usr/local/cuda-12/bin:$PATH
														
 
															 RUN --mount=type=cache,target=/root/.ccache \
														
 
															     cmake --preset 'CUDA 12' \
														
@@ -54,6 +57,7 @@ RUN --mount=type=cache,target=/root/.ccache \
 
															         && cmake --install build --component CUDA --strip --parallel 8
														
 
															 FROM base AS rocm-6
														
 
															+ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH
														
 
															 RUN --mount=type=cache,target=/root/.ccache \
														
 
															     cmake --preset 'ROCm 6' \
														
 
															         && cmake --build --parallel --preset 'ROCm 6' \
														
--- a/scripts/build_docker.sh
+++ b/scripts/build_docker.sh
@@ -28,7 +28,7 @@ if echo $PLATFORM | grep "amd64" > /dev/null; then
 
															         ${LOAD_OR_PUSH} \
														
 
															         --platform=linux/amd64 \
														
 
															         ${OLLAMA_COMMON_BUILD_ARGS} \
														
 
															-        --target runtime-rocm \
														
 
															+        --build-arg FLAVOR=rocm \
														
 
															         -f Dockerfile \
														
 
															         -t ${FINAL_IMAGE_REPO}:$VERSION-rocm \
														
 
															         .
														
--- a/scripts/build_linux.sh
+++ b/scripts/build_linux.sh
@@ -22,8 +22,34 @@ docker buildx build \
 
															         -f Dockerfile \
														
 
															         .
														
 
															+if echo $PLATFORM | grep "amd64" > /dev/null; then
														
 
															+    outDir="./dist"
														
 
															+    if echo $PLATFORM | grep "," > /dev/null ; then
														
 
															+       outDir="./dist/linux_amd64"
														
 
															+    fi
														
 
															+    docker buildx build \
														
 
															+        --output type=local,dest=${outDir} \
														
 
															+        --platform=linux/amd64 \
														
 
															+        ${OLLAMA_COMMON_BUILD_ARGS} \
														
 
															+        --build-arg FLAVOR=rocm \
														
 
															+        --target archive \
														
 
															+        -f Dockerfile \
														
 
															+        .
														
 
															+fi
														
 
															+
														
 
															 # buildx behavior changes for single vs. multiplatform
														
 
															-if echo $PLATFORM | grep "," > /dev/null ; then 
														
 
															-        mv -f ./dist/linux_*64/ollama* ./dist/
														
 
															-        rmdir ./dist/linux_*64
														
 
															+echo "Compressing linux tar bundles..."
														
 
															+if echo $PLATFORM | grep "," > /dev/null ; then
														
 
															+        tar c -C ./dist/linux_arm64 --exclude cuda_jetpack5 --exclude cuda_jetpack6 . | pigz -9vc >./dist/ollama-linux-arm64.tgz
														
 
															+        tar c -C ./dist/linux_arm64 ./lib/ollama/cuda_jetpack5  | pigz -9vc >./dist/ollama-linux-arm64-jetpack5.tgz
														
 
															+        tar c -C ./dist/linux_arm64 ./lib/ollama/cuda_jetpack6  | pigz -9vc >./dist/ollama-linux-arm64-jetpack6.tgz
														
 
															+        tar c -C ./dist/linux_amd64 --exclude rocm . | pigz -9vc >./dist/ollama-linux-amd64.tgz
														
 
															+        tar c -C ./dist/linux_amd64 ./lib/ollama/rocm  | pigz -9vc >./dist/ollama-linux-amd64-rocm.tgz
														
 
															+elif echo $PLATFORM | grep "arm64" > /dev/null ; then
														
 
															+        tar c -C ./dist/ --exclude cuda_jetpack5 --exclude cuda_jetpack6 bin lib | pigz -9vc >./dist/ollama-linux-arm64.tgz
														
 
															+        tar c -C ./dist/ ./lib/ollama/cuda_jetpack5  | pigz -9vc >./dist/ollama-linux-arm64-jetpack5.tgz
														
 
															+        tar c -C ./dist/ ./lib/ollama/cuda_jetpack6  | pigz -9vc >./dist/ollama-linux-arm64-jetpack6.tgz
														
 
															+elif echo $PLATFORM | grep "amd64" > /dev/null ; then
														
 
															+        tar c -C ./dist/ --exclude rocm bin lib | pigz -9vc >./dist/ollama-linux-amd64.tgz
														
 
															+        tar c -C ./dist/ ./lib/ollama/rocm  | pigz -9vc >./dist/ollama-linux-amd64-rocm.tgz
														
 
															 fi
														
--- a/scripts/build_windows.ps1
+++ b/scripts/build_windows.ps1
@@ -26,6 +26,9 @@ function checkEnv() {
 
															         $MSVC_INSTALL=(Get-CimInstance MSFT_VSInstance -Namespace root/cimv2/vs)[0].InstallLocation
														
 
															         $env:VCToolsRedistDir=(get-item "${MSVC_INSTALL}\VC\Redist\MSVC\*")[0]
														
 
															     }
														
 
															+    if (-Not (get-command -ErrorAction silent ninja)) {
														
 
															+        $script:NINJA_DIR=(gci -path (Get-CimInstance MSFT_VSInstance -Namespace root/cimv2/vs)[0].InstallLocation -r -fi ninja.exe) | split-path -parent
														
 
															+    }
														
 
															     # Locate CUDA versions
														
 
															     # Note: this assumes every version found will be built
														
 
															     $cudaList=(get-item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\" -ea 'silentlycontinue')
														
@@ -75,6 +78,7 @@ function checkEnv() {
 
															     } else {
														
 
															         write-host "Code signing disabled - please set KEY_CONTAINERS to sign and copy ollama_inc.crt to the top of the source tree"
														
 
															     }
														
 
															+    $script:JOBS=((Get-CimInstance Win32_ComputerSystem).NumberOfLogicalProcessors)
														
 
															 }
														
@@ -83,51 +87,57 @@ function buildOllama() {
 
															         Remove-Item -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}"
														
 
															         New-Item "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" -ItemType Directory -ea 0
														
 
															-
														
 
															-        # Default first, then conditionall ROCm and cuda v11
														
 
															-        write-host "Building Default native backend libraries"
														
 
															-         $env:CMAKE_GENERATOR="ninja"
														
 
															-        & cmake --preset Default
														
 
															+        & cmake --fresh --preset CPU --install-prefix $script:DIST_DIR
														
 
															         if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
														
 
															-        & cmake --build --preset Default -j 12
														
 
															+        & cmake --build --preset CPU --parallel $script:JOBS
														
 
															         if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
														
 
															-        & cmake --install build -j 12
														
 
															-        
														
 
															-        # TODO - add steps for v11 and ROCm
														
 
															-        #
														
 
															-        # if ("$script:CUDA_DIRS".Contains("v11") -and "$script:CUDA_DIRS".Contains("v12")) {
														
 
															-        #     # We assume the default is v12, so override for v11
														
 
															-        #     $origCUDA_PATH=$env:CUDA_PATH
														
 
															-        #     $hashEnv = @{}
														
 
															-        #     Get-ChildItem env: | foreach { $hashEnv[$_.Name] = $_.Value }
														
 
															-        #     $hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V11")) { $v11="$_" }}
														
 
															-        #     write-host "$v11"
														
 
															-        #     # $env:CUDA_PATH=$hashEnv[$v11]
														
 
															-        #     # $env:CUDACXX=$hashEnv[$v11]+"\bin\nvcc.exe"
														
 
															-        #     $env:CUDAToolkit_ROOT=$hashEnv[$v11]
														
 
															-        #     # ls env:
														
 
															-        #     write-host "Building CUDA v11 backend libraries"
														
 
															-        #     & cmake --preset "CUDA 11"
														
 
															-        #     $env:CUDA_PATH=$origCUDA_PATH
														
 
															-        #     exit(1)
														
 
															-        #     if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
														
 
															-        #     # & cmake --build --preset "CUDA 11" -j 12
														
 
															-        #     # if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
														
 
															-        # }
														
 
															-
														
 
															-        # if ($env:HIP_PATH) {
														
 
															-        #     write-host "Building ROCm backend libraries"
														
 
															-        #     $env:HIPCXX="${env:HIP_PATH}\bin\clang++.exe"
														
 
															-        #     $env:HIP_PLATFORM="amd"
														
 
															-        #     $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
														
 
															-        #     & cmake --preset "ROCm"
														
 
															-        #     $env:HIPCXX=""
														
 
															-        #     $env:HIP_PLATFORM=""
														
 
															-        #     $env:CMAKE_PREFIX_PATH=""
														
 
															-        #     if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
														
 
															-        #     & cmake --build --preset "ROCm" -j 12
														
 
															-        #     if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
														
 
															-        # }
														
 
															+        & cmake --install build --component CPU --strip
														
 
															+        if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
														
 
															+
														
 
															+        $hashEnv = @{}
														
 
															+        Get-ChildItem env: | foreach { $hashEnv[$_.Name] = $_.Value }
														
 
															+        if ("$script:CUDA_DIRS".Contains("v11")) {
														
 
															+            $hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V11")) { $v11="$_" }}
														
 
															+            $env:CUDAToolkit_ROOT=$hashEnv[$v11]
														
 
															+            write-host "Building CUDA v11 backend libraries"
														
 
															+            # Note: cuda v11 requires msvc 2019 so force the older generator
														
 
															+            # to avoid 2022 (or newer) from being used as the default
														
 
															+            & cmake --fresh --preset "CUDA 11" -G "Visual Studio 16 2019" --install-prefix $script:DIST_DIR
														
 
															+            if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
														
 
															+            & cmake --build --preset "CUDA 11" --parallel $script:JOBS
														
 
															+            if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
														
 
															+            & cmake --install build --component "CUDA" --strip
														
 
															+            if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
														
 
															+        }
														
 
															+        if ("$script:CUDA_DIRS".Contains("v12")) {
														
 
															+            $hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V12")) { $v12="$_" }}
														
 
															+            $env:CUDAToolkit_ROOT=$hashEnv[$v12]
														
 
															+            write-host "Building CUDA v12 backend libraries"
														
 
															+            & cmake --fresh --preset "CUDA 12" --install-prefix $script:DIST_DIR
														
 
															+            if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
														
 
															+            & cmake --build --preset "CUDA 12" --parallel $script:JOBS
														
 
															+            if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
														
 
															+            & cmake --install build --component "CUDA" --strip
														
 
															+            if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
														
 
															+        }
														
 
															+        if ($env:HIP_PATH) {
														
 
															+            write-host "Building ROCm backend libraries"
														
 
															+            if ($null -ne $script:NINJA_DIR) {
														
 
															+                $env:PATH="$script:NINJA_DIR;$env:PATH"
														
 
															+            }
														
 
															+            $env:HIPCXX="${env:HIP_PATH}\bin\clang++.exe"
														
 
															+            $env:HIP_PLATFORM="amd"
														
 
															+            $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
														
 
															+            & cmake --fresh --preset "ROCm 6" -G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ --install-prefix $script:DIST_DIR
														
 
															+            if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
														
 
															+            $env:HIPCXX=""
														
 
															+            $env:HIP_PLATFORM=""
														
 
															+            $env:CMAKE_PREFIX_PATH=""
														
 
															+            & cmake --build --preset "ROCm" --parallel $script:JOBS
														
 
															+            if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
														
 
															+            & cmake --install build --component "HIP" --strip
														
 
															+            if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
														
 
															+        }
														
 
															     } else {
														
 
															         write-host "Skipping generate step with OLLAMA_SKIP_GENERATE set"
														
 
															     }