7 months ago · cd5c8f6471
--- a/.dockerignore
+++ b/.dockerignore
@@ -7,3 +7,5 @@ llm/llama.cpp
 
															 .env
														
 
															 .cache
														
 
															 test_data
														
 
															+llm/build
														
 
															+llama/build
														
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -102,8 +102,8 @@ jobs:
 
															         with:
														
 
															           name: generate-windows-cpu
														
 
															           path: |
														
 
															-            llm/build/**/bin/*
														
 
															-            llm/build/**/*.a
														
 
															+            build/**/*
														
 
															+            build/**/*.a
														
 
															             dist/windows-amd64/**
														
 
															   # ROCm generation step
														
@@ -176,7 +176,7 @@ jobs:
 
															         with:
														
 
															           name: generate-windows-rocm
														
 
															           path: |
														
 
															-            llm/build/**/bin/*
														
 
															+            build/**/*
														
 
															             dist/windows-amd64/**
														
 
															       - uses: actions/upload-artifact@v4
														
 
															         with:
														
@@ -265,7 +265,7 @@ jobs:
 
															         with:
														
 
															           name: generate-windows-cuda-${{ matrix.cuda.version }}
														
 
															           path: |
														
 
															-            llm/build/**/bin/*
														
 
															+            build/**/*
														
 
															             dist/windows-amd64/**
														
 
															       - uses: actions/upload-artifact@v4
														
 
															         with:
														
@@ -338,7 +338,7 @@ jobs:
 
															       - uses: actions/download-artifact@v4
														
 
															         with:
														
 
															           name: generate-windows-rocm
														
 
															-      - run: dir llm/build
														
 
															+      - run: dir build
														
 
															       - run: |
														
 
															           $gopath=(get-command go).source | split-path -parent
														
 
															           & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
														
@@ -359,9 +359,7 @@ jobs:
 
															     environment: release
														
 
															     runs-on: linux
														
 
															     env:
														
 
															-      OLLAMA_SKIP_MANIFEST_CREATE: '1'
														
 
															       BUILD_ARCH: amd64
														
 
															-      PUSH: '1'
														
 
															     steps:
														
 
															       - uses: actions/checkout@v4
														
 
															         with:
														
@@ -369,14 +367,8 @@ jobs:
 
															       - name: Set Version
														
 
															         shell: bash
														
 
															         run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
														
 
															-      - name: Login to Docker Hub
														
 
															-        uses: docker/login-action@v3
														
 
															-        with:
														
 
															-          username: ${{ vars.DOCKER_USER }}
														
 
															-          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
														
 
															       - run: |
														
 
															           ./scripts/build_linux.sh
														
 
															-          ./scripts/build_docker.sh
														
 
															       - uses: actions/upload-artifact@v4
														
 
															         with:
														
 
															           name: dist-linux-amd64
														
@@ -390,9 +382,7 @@ jobs:
 
															     environment: release
														
 
															     runs-on: linux-arm64
														
 
															     env:
														
 
															-      OLLAMA_SKIP_MANIFEST_CREATE: '1'
														
 
															       BUILD_ARCH: arm64
														
 
															-      PUSH: '1'
														
 
															     steps:
														
 
															       - uses: actions/checkout@v4
														
 
															         with:
														
@@ -421,14 +411,8 @@ jobs:
 
															           sudo usermod -aG docker $USER
														
 
															           sudo apt-get install acl
														
 
															           sudo setfacl --modify user:$USER:rw /var/run/docker.sock
														
 
															-      - name: Login to Docker Hub
														
 
															-        uses: docker/login-action@v3
														
 
															-        with:
														
 
															-          username: ${{ vars.DOCKER_USER }}
														
 
															-          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
														
 
															       - run: |
														
 
															           ./scripts/build_linux.sh
														
 
															-          ./scripts/build_docker.sh
														
 
															       - uses: actions/upload-artifact@v4
														
 
															         with:
														
 
															           name: dist-linux-arm64
														
@@ -436,6 +420,181 @@ jobs:
 
															             dist/*linux*
														
 
															             !dist/*-cov
														
 
															+  # Container image build
														
 
															+  build-linux:
														
 
															+    environment: release
														
 
															+    strategy:
														
 
															+      matrix:
														
 
															+        runner:
														
 
															+          - linux
														
 
															+          - linux-arm64
														
 
															+    runs-on: ${{ matrix.runner }}
														
 
															+    env:
														
 
															+      FINAL_IMAGE_REPO: ollama/ollama
														
 
															+    steps:
														
 
															+      - uses: actions/checkout@v4
														
 
															+        with:
														
 
															+          submodules: recursive
														
 
															+      - name: 'Install Docker'
														
 
															+        if: ${{ startsWith(matrix.runner, 'linux-arm64') }}
														
 
															+        run: |
														
 
															+          sudo apt-get update
														
 
															+          sudo apt-get install -y ca-certificates curl
														
 
															+          sudo install -m 0755 -d /etc/apt/keyrings
														
 
															+          sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
														
 
															+          sudo chmod a+r /etc/apt/keyrings/docker.asc
														
 
															+          echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
														
 
															+            $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
														
 
															+            sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
														
 
															+          sudo apt-get update
														
 
															+          sudo apt-get install -y docker-ce docker-ce-cli containerd.io
														
 
															+          sudo usermod -aG docker $USER
														
 
															+          sudo apt-get install acl
														
 
															+          sudo setfacl --modify user:$USER:rw /var/run/docker.sock
														
 
															+      - name: Docker meta
														
 
															+        id: meta
														
 
															+        uses: docker/metadata-action@v5
														
 
															+        with:
														
 
															+          images: ${{ env.FINAL_IMAGE_REPO }}
														
 
															+          flavor: |
														
 
															+            latest=false
														
 
															+          tags: |
														
 
															+            type=ref,event=tag
														
 
															+            type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
														
 
															+            type=semver,pattern={{version}}
														
 
															+      - name: Set Version
														
 
															+        shell: bash
														
 
															+        run: |
														
 
															+          machine=$(uname -m)
														
 
															+          case ${machine} in
														
 
															+            x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;;
														
 
															+            aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;;
														
 
															+          esac >>$GITHUB_ENV
														
 
															+          echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
														
 
															+      - name: Set up Docker Buildx
														
 
															+        uses: docker/setup-buildx-action@v3
														
 
															+      - name: Login to Docker Hub
														
 
															+        uses: docker/login-action@v3
														
 
															+        with:
														
 
															+          username: ${{ vars.DOCKER_USER }}
														
 
															+          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
														
 
															+      - name: Build and push by digest
														
 
															+        id: build
														
 
															+        uses: docker/build-push-action@v6
														
 
															+        with:
														
 
															+          context: "."
														
 
															+          platforms: linux/${{ env.ARCH }}
														
 
															+          build-args: |
														
 
															+            GOFLAGS
														
 
															+          outputs: type=image,name=${{ env.FINAL_IMAGE_REPO }},push-by-digest=true,name-canonical=true,push=true
														
 
															+      - name: Export digest
														
 
															+        run: |
														
 
															+          mkdir -p /tmp/digests
														
 
															+          digest="${{ steps.build.outputs.digest }}"
														
 
															+          touch "/tmp/digests/${digest#sha256:}"
														
 
															+      - name: Upload digest
														
 
															+        uses: actions/upload-artifact@v4
														
 
															+        with:
														
 
															+          name: digests-${{ env.PLATFORM_PAIR }}
														
 
															+          path: /tmp/digests/*
														
 
															+          if-no-files-found: error
														
 
															+          retention-days: 1
														
 
															+  merge:
														
 
															+    environment: release
														
 
															+    runs-on: linux
														
 
															+    needs:
														
 
															+      - build-linux
														
 
															+    env:
														
 
															+      FINAL_IMAGE_REPO: ollama/ollama
														
 
															+    steps:
														
 
															+      - uses: actions/checkout@v4
														
 
															+        with:
														
 
															+          submodules: recursive
														
 
															+      - name: Download digests
														
 
															+        uses: actions/download-artifact@v4
														
 
															+        with:
														
 
															+          path: /tmp/digests
														
 
															+          pattern: digests-*
														
 
															+          merge-multiple: true
														
 
															+      - name: Set up Docker Buildx
														
 
															+        uses: docker/setup-buildx-action@v3
														
 
															+      - name: Docker meta
														
 
															+        id: meta
														
 
															+        uses: docker/metadata-action@v5
														
 
															+        with:
														
 
															+          images: ${{ env.FINAL_IMAGE_REPO }}
														
 
															+          flavor: |
														
 
															+            latest=false
														
 
															+          tags: |
														
 
															+            type=ref,event=tag
														
 
															+            type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
														
 
															+            type=semver,pattern={{version}}
														
 
															+      - name: Set Version
														
 
															+        shell: bash
														
 
															+        run: |
														
 
															+          machine=$(uname -m)
														
 
															+          case ${machine} in
														
 
															+            x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;;
														
 
															+            aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;;
														
 
															+          esac >>$GITHUB_ENV
														
 
															+          echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
														
 
															+      - name: Login to Docker Hub
														
 
															+        uses: docker/login-action@v3
														
 
															+        with:
														
 
															+          username: ${{ vars.DOCKER_USER }}
														
 
															+          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
														
 
															+      - name: Create manifest list and push
														
 
															+        working-directory: /tmp/digests
														
 
															+        run: |
														
 
															+          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
														
 
															+            $(printf '${{ env.FINAL_IMAGE_REPO }}@sha256:%s ' *)
														
 
															+      - name: Inspect image
														
 
															+        run: |
														
 
															+          docker buildx imagetools inspect ${{ env.FINAL_IMAGE_REPO }}:${{ steps.meta.outputs.version }}          
														
 
															+  build-linux-rocm:
														
 
															+    environment: release
														
 
															+    runs-on: linux
														
 
															+    env:
														
 
															+      FINAL_IMAGE_REPO: ollama/ollama
														
 
															+      ARCH: amd64
														
 
															+      PLATFORM_PAIR: linux-amd64
														
 
															+    steps:
														
 
															+      - uses: actions/checkout@v4
														
 
															+        with:
														
 
															+          submodules: recursive
														
 
															+      - name: Docker meta
														
 
															+        id: meta
														
 
															+        uses: docker/metadata-action@v5
														
 
															+        with:
														
 
															+          images: ${{ env.FINAL_IMAGE_REPO }}
														
 
															+          flavor: |
														
 
															+            latest=false
														
 
															+          tags: |
														
 
															+            type=ref,event=tag
														
 
															+            type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
														
 
															+            type=semver,pattern={{version}}
														
 
															+      - name: Set Version
														
 
															+        shell: bash
														
 
															+        run: |
														
 
															+          echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
														
 
															+      - name: Set up Docker Buildx
														
 
															+        uses: docker/setup-buildx-action@v3
														
 
															+      - name: Login to Docker Hub
														
 
															+        uses: docker/login-action@v3
														
 
															+        with:
														
 
															+          username: ${{ vars.DOCKER_USER }}
														
 
															+          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
														
 
															+      - name: Build and push by digest
														
 
															+        id: build
														
 
															+        uses: docker/build-push-action@v6
														
 
															+        with:
														
 
															+          context: "."
														
 
															+          target: runtime-rocm
														
 
															+          build-args: |
														
 
															+            GOFLAGS
														
 
															+          tags: ${{ env.FINAL_IMAGE_REPO }}:${{ env.DOCKER_METADATA_OUTPUT_VERSION}}-rocm,${{ env.FINAL_IMAGE_REPO }}:rocm
														
 
															+          push: true
														
 
															+
														
 
															   # Aggregate all the assets and ship a release
														
 
															   release:
														
 
															     needs:
														
@@ -448,8 +607,6 @@ jobs:
 
															     permissions:
														
 
															       contents: write
														
 
															     env:
														
 
															-      OLLAMA_SKIP_IMAGE_BUILD: '1'
														
 
															-      PUSH: '1'
														
 
															       GH_TOKEN: ${{ github.token }}
														
 
															     steps:
														
 
															       - uses: actions/checkout@v4
														
@@ -458,12 +615,6 @@ jobs:
 
															         run: |
														
 
															           echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
														
 
															           echo "RELEASE_VERSION=$(echo ${GITHUB_REF_NAME} | cut -f1 -d-)" >> $GITHUB_ENV
														
 
															-      - name: Login to Docker Hub
														
 
															-        uses: docker/login-action@v3
														
 
															-        with:
														
 
															-          username: ${{ vars.DOCKER_USER }}
														
 
															-          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
														
 
															-      - run: ./scripts/build_docker.sh
														
 
															       - name: Retrieve built artifact
														
 
															         uses: actions/download-artifact@v4
														
 
															         with:
														
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -81,12 +81,6 @@ jobs:
 
															         if: ${{ ! startsWith(matrix.os, 'windows-') }}
														
 
															         name: 'Unix Go Generate'
														
 
															       - run: go build .
														
 
															-      - uses: actions/upload-artifact@v4
														
 
															-        with:
														
 
															-          name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
														
 
															-          path: |
														
 
															-            llm/build/**/bin/*
														
 
															-            llm/build/**/*.a
														
 
															   generate-cuda:
														
 
															     needs: [changes]
														
 
															     if: ${{ needs.changes.outputs.GENERATE_CUDA == 'True' }}
														
@@ -114,12 +108,6 @@ jobs:
 
															           go generate -x ./...
														
 
															         env:
														
 
															           OLLAMA_SKIP_CPU_GENERATE: '1'
														
 
															-      - uses: actions/upload-artifact@v4
														
 
															-        with:
														
 
															-          name: cuda-${{ matrix.cuda-version }}-libraries
														
 
															-          path: |
														
 
															-            llm/build/**/bin/*
														
 
															-            dist/windows-amd64/**
														
 
															   generate-rocm:
														
 
															     needs: [changes]
														
 
															     if: ${{ needs.changes.outputs.GENERATE_ROCM == 'True' }}
														
@@ -147,12 +135,6 @@ jobs:
 
															           go generate -x ./...
														
 
															         env:
														
 
															           OLLAMA_SKIP_CPU_GENERATE: '1'
														
 
															-      - uses: actions/upload-artifact@v4
														
 
															-        with:
														
 
															-          name: rocm-${{ matrix.rocm-version }}-libraries
														
 
															-          path: |
														
 
															-            llm/build/**/bin/*
														
 
															-            dist/windows-amd64/**
														
 
															   # ROCm generation step
														
 
															   generate-windows-rocm:
														
@@ -189,7 +171,6 @@ jobs:
 
															         name: go generate
														
 
															         env:
														
 
															           OLLAMA_SKIP_CPU_GENERATE: '1'
														
 
															-      # TODO - do we need any artifacts?
														
 
															   # CUDA generation step
														
 
															   generate-windows-cuda:
														
@@ -231,7 +212,6 @@ jobs:
 
															           go generate -x ./...
														
 
															         env:
														
 
															           OLLAMA_SKIP_CPU_GENERATE: '1'
														
 
															-      # TODO - do we need any artifacts?
														
 
															   lint:
														
 
															     strategy:
														
@@ -263,14 +243,6 @@ jobs:
 
															             arm64) echo ARCH=arm64 ;;
														
 
															           esac >>$GITHUB_ENV
														
 
															         shell: bash
														
 
															-      - run: |
														
 
															-          mkdir -p llm/build/linux/$ARCH/stub/bin
														
 
															-          touch llm/build/linux/$ARCH/stub/bin/ollama_llama_server
														
 
															-        if: ${{ startsWith(matrix.os, 'ubuntu-') }}
														
 
															-      - run: |
														
 
															-          mkdir -p llm/build/darwin/$ARCH/stub/bin
														
 
															-          touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server
														
 
															-        if: ${{ startsWith(matrix.os, 'macos-') }}
														
 
															       - uses: golangci/golangci-lint-action@v6
														
 
															         with:
														
 
															           args: --timeout 8m0s -v
														
@@ -301,23 +273,10 @@ jobs:
 
															           cache: true
														
 
															       - run: |
														
 
															           case ${{ matrix.arch }} in
														
 
															-            amd64) echo ARCH=x86_64 ;;
														
 
															+            amd64) echo ARCH=amd64 ;;
														
 
															             arm64) echo ARCH=arm64 ;;
														
 
															           esac >>$GITHUB_ENV
														
 
															         shell: bash
														
 
															-      - run: |
														
 
															-          mkdir -p llm/build/linux/$ARCH/stub/bin
														
 
															-          touch llm/build/linux/$ARCH/stub/bin/ollama_llama_server
														
 
															-        if: ${{ startsWith(matrix.os, 'ubuntu-') }}
														
 
															-      - run: |
														
 
															-          mkdir -p llm/build/darwin/$ARCH/stub/bin
														
 
															-          touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server
														
 
															-        if: ${{ startsWith(matrix.os, 'macos-') }}
														
 
															-        shell: bash
														
 
															       - run: go generate ./...
														
 
															       - run: go build
														
 
															       - run: go test -v ./...
														
 
															-      - uses: actions/upload-artifact@v4
														
 
															-        with:
														
 
															-          name: ${{ matrix.os }}-binaries
														
 
															-          path: ollama
														
--- a/.gitignore
+++ b/.gitignore
@@ -12,4 +12,7 @@ ggml-metal.metal
 
															 test_data
														
 
															 *.crt
														
 
															 llm/build
														
 
															+build/*/*/*
														
 
															+!build/**/placeholder
														
 
															+llama/build
														
 
															 __debug_bin*
														
--- a/Dockerfile
+++ b/Dockerfile
@@ -47,7 +47,7 @@ RUN --mount=type=cache,target=/root/.ccache \
 
															     OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
														
 
															     bash gen_linux.sh
														
 
															-FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64
														
 
															+FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-runner-arm64
														
 
															 ARG CMAKE_VERSION
														
 
															 COPY ./scripts/rh_linux_deps.sh /
														
 
															 RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
														
@@ -63,7 +63,7 @@ RUN OLLAMA_SKIP_STATIC_GENERATE=1 \
 
															     CUDA_VARIANT="_v11" \
														
 
															     bash gen_linux.sh
														
 
															-FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-server-arm64
														
 
															+FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-runner-arm64
														
 
															 ARG CMAKE_VERSION
														
 
															 COPY ./scripts/rh_linux_deps.sh /
														
 
															 RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
														
@@ -143,64 +143,103 @@ RUN --mount=type=cache,target=/root/.ccache \
 
															     OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
														
 
															-# Intermediate stage used for ./scripts/build_linux.sh
														
 
															+# Intermediate stages used for ./scripts/build_linux.sh
														
 
															 FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
														
 
															 ENV CGO_ENABLED=1
														
 
															 WORKDIR /go/src/github.com/ollama/ollama
														
 
															 COPY . .
														
 
															-COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
														
 
															-COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
														
 
															-COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
														
 
															+COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/ llm/build/
														
 
															+COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/build/ build/
														
 
															+COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/build/ build/
														
 
															 COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
														
 
															-COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
														
 
															+COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/build/ build/
														
 
															 COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
														
 
															-COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
														
 
															+COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/build/ build/
														
 
															 COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
														
 
															-COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
														
 
															+COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/build/ build/
														
 
															 ARG GOFLAGS
														
 
															 ARG CGO_CFLAGS
														
 
															 RUN --mount=type=cache,target=/root/.ccache \
														
 
															     go build -trimpath -o dist/linux-amd64/bin/ollama .
														
 
															+RUN cd dist/linux-$GOARCH && \
														
 
															+    tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
														
 
															+RUN cd dist/linux-$GOARCH-rocm && \
														
 
															+    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz
														
 
															-# Intermediate stage used for ./scripts/build_linux.sh
														
 
															 FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
														
 
															 ENV CGO_ENABLED=1
														
 
															 ARG GOLANG_VERSION
														
 
															 WORKDIR /go/src/github.com/ollama/ollama
														
 
															 COPY . .
														
 
															-COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
														
 
															-COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
														
 
															-COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
														
 
															-COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
														
 
															-COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
														
 
															+COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/ llm/build/
														
 
															+COPY --from=cuda-11-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
														
 
															+COPY --from=cuda-11-build-runner-arm64 /go/src/github.com/ollama/ollama/build/ build/
														
 
															+COPY --from=cuda-12-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
														
 
															+COPY --from=cuda-12-build-runner-arm64 /go/src/github.com/ollama/ollama/build/ build/
														
 
															 ARG GOFLAGS
														
 
															 ARG CGO_CFLAGS
														
 
															 RUN --mount=type=cache,target=/root/.ccache \
														
 
															     go build -trimpath -o dist/linux-arm64/bin/ollama .
														
 
															+RUN cd dist/linux-$GOARCH && \
														
 
															+    tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
														
 
															-# Strip out ROCm dependencies to keep the primary image lean
														
 
															-FROM --platform=linux/amd64 ubuntu:22.04 AS amd64-libs-without-rocm
														
 
															-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /scratch/
														
 
															-RUN cd /scratch/ollama/ && rm -rf rocblas libamd* libdrm* libroc* libhip* libhsa*
														
 
															+FROM --platform=linux/amd64 scratch AS dist-amd64
														
 
															+COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
														
 
															+FROM --platform=linux/arm64 scratch AS dist-arm64
														
 
															+COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
														
 
															+FROM dist-$TARGETARCH as dist
														
 
															+
														
 
															+
														
 
															+# Optimized container images do not cary nested payloads
														
 
															+FROM --platform=linux/amd64 static-build-amd64 AS container-build-amd64
														
 
															+WORKDIR /go/src/github.com/ollama/ollama
														
 
															+COPY . .
														
 
															+ARG GOFLAGS
														
 
															+ARG CGO_CFLAGS
														
 
															+RUN --mount=type=cache,target=/root/.ccache \
														
 
															+    go build -trimpath -o dist/linux-amd64/bin/ollama .
														
 
															+
														
 
															+FROM --platform=linux/arm64 static-build-arm64 AS container-build-arm64
														
 
															+WORKDIR /go/src/github.com/ollama/ollama
														
 
															+COPY . .
														
 
															+ARG GOFLAGS
														
 
															+ARG CGO_CFLAGS
														
 
															+RUN --mount=type=cache,target=/root/.ccache \
														
 
															+    go build -trimpath -o dist/linux-arm64/bin/ollama .
														
 
															-# Runtime stages
														
 
															 FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64
														
 
															-COPY --from=amd64-libs-without-rocm /scratch/ /lib/
														
 
															-RUN apt-get update && apt-get install -y ca-certificates && \
														
 
															+RUN apt-get update && \
														
 
															+    apt-get install -y ca-certificates && \
														
 
															     apt-get clean && rm -rf /var/lib/apt/lists/*
														
 
															-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
														
 
															+COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
														
 
															+COPY --from=cpu-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
														
 
															+COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
														
 
															+COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
														
 
															+COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
														
 
															+COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
														
 
															 FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64
														
 
															-COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
														
 
															-RUN apt-get update && apt-get install -y ca-certificates && \
														
 
															+RUN apt-get update && \
														
 
															+    apt-get install -y ca-certificates && \
														
 
															     apt-get clean && rm -rf /var/lib/apt/lists/*
														
 
															-COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
														
 
															+COPY --from=container-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
														
 
															+COPY --from=cpu-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
														
 
															+COPY --from=cuda-11-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
														
 
															+COPY --from=cuda-12-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
														
 
															-# Radeon images are much larger so we keep it distinct from the CPU/CUDA image
														
 
															-FROM  rocm/dev-centos-7:${ROCM_VERSION}-complete AS runtime-rocm
														
 
															-RUN update-pciids
														
 
															-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
														
 
															-RUN ln -s /opt/rocm/lib /lib/ollama
														
 
															+# ROCm libraries larger so we keep it distinct from the CPU/CUDA image
														
 
															+FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm
														
 
															+# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer
														
 
															+# across releases
														
 
															+COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/
														
 
															+RUN apt-get update && \
														
 
															+    apt-get install -y ca-certificates && \
														
 
															+    apt-get clean && rm -rf /var/lib/apt/lists/*
														
 
															+COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
														
 
															+COPY --from=cpu-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
														
 
															+COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
														
 
															+COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
														
 
															+COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
														
 
															 EXPOSE 11434
														
 
															 ENV OLLAMA_HOST=0.0.0.0
														
--- a/build/darwin/amd64/placeholder
+++ b/build/darwin/amd64/placeholder
@@ -0,0 +1 @@
 
															+This is here to make sure the build/ directory exists for the go:embed command
														
--- a/build/darwin/arm64/placeholder
+++ b/build/darwin/arm64/placeholder
@@ -0,0 +1 @@
 
															+This is here to make sure the build/ directory exists for the go:embed command
														
--- a/build/embed_darwin_amd64.go
+++ b/build/embed_darwin_amd64.go
@@ -0,0 +1,8 @@
 
															+package build
														
 
															+
														
 
															+import "embed"
														
 
															+
														
 
															+// Darwin payloads separated by architecture to avoid duplicate payloads when cross compiling
														
 
															+
														
 
															+//go:embed darwin/amd64/*
														
 
															+var EmbedFS embed.FS
														
--- a/build/embed_darwin_arm64.go
+++ b/build/embed_darwin_arm64.go
@@ -0,0 +1,8 @@
 
															+package build
														
 
															+
														
 
															+import "embed"
														
 
															+
														
 
															+// Darwin payloads separated by architecture to avoid duplicate payloads when cross compiling
														
 
															+
														
 
															+//go:embed darwin/arm64/*
														
 
															+var EmbedFS embed.FS
														
--- a/build/embed_linux.go
+++ b/build/embed_linux.go
@@ -0,0 +1,6 @@
 
															+package build
														
 
															+
														
 
															+import "embed"
														
 
															+
														
 
															+//go:embed linux/*
														
 
															+var EmbedFS embed.FS
														
--- a/build/embed_unused.go
+++ b/build/embed_unused.go
@@ -0,0 +1,8 @@
 
															+//go:build !linux && !darwin
														
 
															+
														
 
															+package build
														
 
															+
														
 
															+import "embed"
														
 
															+
														
 
															+// unused on windows
														
 
															+var EmbedFS embed.FS
														
--- a/build/linux/amd64/placeholder
+++ b/build/linux/amd64/placeholder
@@ -0,0 +1 @@
 
															+This is here to make sure the build/ directory exists for the go:embed command
														
--- a/build/linux/arm64/placeholder
+++ b/build/linux/arm64/placeholder
@@ -0,0 +1 @@
 
															+This is here to make sure the build/ directory exists for the go:embed command
														
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -179,53 +179,6 @@ var (
 
															 	HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
														
 
															 )
														
 
															-func RunnersDir() (p string) {
														
 
															-	if p := Var("OLLAMA_RUNNERS_DIR"); p != "" {
														
 
															-		return p
														
 
															-	}
														
 
															-
														
 
															-	if runtime.GOOS != "windows" {
														
 
															-		return
														
 
															-	}
														
 
															-
														
 
															-	defer func() {
														
 
															-		if p == "" {
														
 
															-			slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama/runners'")
														
 
															-		}
														
 
															-	}()
														
 
															-
														
 
															-	// On Windows we do not carry the payloads inside the main executable
														
 
															-	exe, err := os.Executable()
														
 
															-	if err != nil {
														
 
															-		return
														
 
															-	}
														
 
															-
														
 
															-	cwd, err := os.Getwd()
														
 
															-	if err != nil {
														
 
															-		return
														
 
															-	}
														
 
															-
														
 
															-	var paths []string
														
 
															-	for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), LibRelativeToExe()), cwd} {
														
 
															-		paths = append(paths,
														
 
															-			root,
														
 
															-			filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
														
 
															-			filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH),
														
 
															-		)
														
 
															-	}
														
 
															-
														
 
															-	// Try a few variations to improve developer experience when building from source in the local tree
														
 
															-	for _, path := range paths {
														
 
															-		candidate := filepath.Join(path, "lib", "ollama", "runners")
														
 
															-		if _, err := os.Stat(candidate); err == nil {
														
 
															-			p = candidate
														
 
															-			break
														
 
															-		}
														
 
															-	}
														
 
															-
														
 
															-	return p
														
 
															-}
														
 
															-
														
 
															 func Uint(key string, defaultValue uint) func() uint {
														
 
															 	return func() uint {
														
 
															 		if s := Var(key); s != "" {
														
@@ -290,7 +243,6 @@ func AsMap() map[string]EnvVar {
 
															 		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
														
 
															 		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
														
 
															 		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
														
 
															-		"OLLAMA_RUNNERS_DIR":       {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"},
														
 
															 		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
														
 
															 		"OLLAMA_TMPDIR":            {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"},
														
--- a/gpu/assets.go
+++ b/gpu/assets.go
@@ -1,148 +0,0 @@
 
															-package gpu
														
 
															-
														
 
															-import (
														
 
															-	"errors"
														
 
															-	"fmt"
														
 
															-	"log/slog"
														
 
															-	"os"
														
 
															-	"path/filepath"
														
 
															-	"runtime"
														
 
															-	"strconv"
														
 
															-	"strings"
														
 
															-	"sync"
														
 
															-	"syscall"
														
 
															-	"time"
														
 
															-
														
 
															-	"github.com/ollama/ollama/envconfig"
														
 
															-)
														
 
															-
														
 
															-var (
														
 
															-	lock        sync.Mutex
														
 
															-	payloadsDir = ""
														
 
															-)
														
 
															-
														
 
															-func PayloadsDir() (string, error) {
														
 
															-	lock.Lock()
														
 
															-	defer lock.Unlock()
														
 
															-	var err error
														
 
															-	if payloadsDir == "" {
														
 
															-		runnersDir := envconfig.RunnersDir()
														
 
															-
														
 
															-		if runnersDir != "" {
														
 
															-			payloadsDir = runnersDir
														
 
															-			return payloadsDir, nil
														
 
															-		}
														
 
															-
														
 
															-		// The remainder only applies on non-windows where we still carry payloads in the main executable
														
 
															-		cleanupTmpDirs()
														
 
															-		tmpDir := envconfig.TmpDir()
														
 
															-		if tmpDir == "" {
														
 
															-			tmpDir, err = os.MkdirTemp("", "ollama")
														
 
															-			if err != nil {
														
 
															-				return "", fmt.Errorf("failed to generate tmp dir: %w", err)
														
 
															-			}
														
 
															-		} else {
														
 
															-			err = os.MkdirAll(tmpDir, 0o755)
														
 
															-			if err != nil {
														
 
															-				return "", fmt.Errorf("failed to generate tmp dir %s: %w", tmpDir, err)
														
 
															-			}
														
 
															-		}
														
 
															-
														
 
															-		// Track our pid so we can clean up orphaned tmpdirs
														
 
															-		n := filepath.Join(tmpDir, "ollama.pid")
														
 
															-		if err := os.WriteFile(n, []byte(strconv.Itoa(os.Getpid())), 0o644); err != nil {
														
 
															-			return "", fmt.Errorf("failed to write pid file %s: %w", n, err)
														
 
															-		}
														
 
															-
														
 
															-		// We create a distinct subdirectory for payloads within the tmpdir
														
 
															-		// This will typically look like /tmp/ollama3208993108/runners on linux
														
 
															-		payloadsDir = filepath.Join(tmpDir, "runners")
														
 
															-	}
														
 
															-	return payloadsDir, nil
														
 
															-}
														
 
															-
														
 
															-// Best effort to clean up prior tmpdirs
														
 
															-func cleanupTmpDirs() {
														
 
															-	matches, err := filepath.Glob(filepath.Join(os.TempDir(), "ollama*", "ollama.pid"))
														
 
															-	if err != nil {
														
 
															-		return
														
 
															-	}
														
 
															-
														
 
															-	for _, match := range matches {
														
 
															-		raw, err := os.ReadFile(match)
														
 
															-		if errors.Is(err, os.ErrNotExist) {
														
 
															-			slog.Debug("not a ollama runtime directory, skipping", "path", match)
														
 
															-			continue
														
 
															-		} else if err != nil {
														
 
															-			slog.Warn("could not read ollama.pid, skipping", "path", match, "error", err)
														
 
															-			continue
														
 
															-		}
														
 
															-
														
 
															-		pid, err := strconv.Atoi(string(raw))
														
 
															-		if err != nil {
														
 
															-			slog.Warn("invalid pid, skipping", "path", match, "error", err)
														
 
															-			continue
														
 
															-		}
														
 
															-
														
 
															-		p, err := os.FindProcess(pid)
														
 
															-		if err == nil && !errors.Is(p.Signal(syscall.Signal(0)), os.ErrProcessDone) {
														
 
															-			slog.Warn("process still running, skipping", "pid", pid, "path", match)
														
 
															-			continue
														
 
															-		}
														
 
															-
														
 
															-		if err := os.Remove(match); err != nil {
														
 
															-			slog.Warn("could not cleanup stale pidfile", "path", match, "error", err)
														
 
															-		}
														
 
															-
														
 
															-		runners := filepath.Join(filepath.Dir(match), "runners")
														
 
															-		if err := os.RemoveAll(runners); err != nil {
														
 
															-			slog.Warn("could not cleanup stale runners", "path", runners, "error", err)
														
 
															-		}
														
 
															-
														
 
															-		if err := os.Remove(filepath.Dir(match)); err != nil {
														
 
															-			slog.Warn("could not cleanup stale tmpdir", "path", filepath.Dir(match), "error", err)
														
 
															-		}
														
 
															-	}
														
 
															-}
														
 
															-
														
 
															-func Cleanup() {
														
 
															-	lock.Lock()
														
 
															-	defer lock.Unlock()
														
 
															-	runnersDir := envconfig.RunnersDir()
														
 
															-	if payloadsDir != "" && runnersDir == "" && runtime.GOOS != "windows" {
														
 
															-		// We want to fully clean up the tmpdir parent of the payloads dir
														
 
															-		tmpDir := filepath.Clean(filepath.Join(payloadsDir, ".."))
														
 
															-		slog.Debug("cleaning up", "dir", tmpDir)
														
 
															-		err := os.RemoveAll(tmpDir)
														
 
															-		if err != nil {
														
 
															-			// On windows, if we remove too quickly the llama.dll may still be in-use and fail to remove
														
 
															-			time.Sleep(1000 * time.Millisecond)
														
 
															-			err = os.RemoveAll(tmpDir)
														
 
															-			if err != nil {
														
 
															-				slog.Warn("failed to clean up", "dir", tmpDir, "err", err)
														
 
															-			}
														
 
															-		}
														
 
															-	}
														
 
															-}
														
 
															-
														
 
															-func UpdatePath(dir string) {
														
 
															-	if runtime.GOOS == "windows" {
														
 
															-		tmpDir := filepath.Dir(dir)
														
 
															-		pathComponents := strings.Split(os.Getenv("PATH"), ";")
														
 
															-		i := 0
														
 
															-		for _, comp := range pathComponents {
														
 
															-			if strings.EqualFold(comp, dir) {
														
 
															-				return
														
 
															-			}
														
 
															-			// Remove any other prior paths to our temp dir
														
 
															-			if !strings.HasPrefix(strings.ToLower(comp), strings.ToLower(tmpDir)) {
														
 
															-				pathComponents[i] = comp
														
 
															-				i++
														
 
															-			}
														
 
															-		}
														
 
															-		newPath := strings.Join(append([]string{dir}, pathComponents...), ";")
														
 
															-		slog.Info("updating", "PATH", newPath)
														
 
															-		os.Setenv("PATH", newPath)
														
 
															-	}
														
 
															-	// linux and darwin rely on rpath
														
 
															-}
														
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -93,10 +93,9 @@ func initCudaHandles() *cudaHandles {
 
															 		localAppData := os.Getenv("LOCALAPPDATA")
														
 
															 		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
														
 
															 	}
														
 
															-	tmpDir, _ := PayloadsDir()
														
 
															-	if tmpDir != "" {
														
 
															-		// TODO - add "payloads" for subprocess
														
 
															-		cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", CudartMgmtName)}
														
 
															+	libDir := LibraryDir()
														
 
															+	if libDir != "" {
														
 
															+		cudartMgmtPatterns = []string{filepath.Join(libDir, CudartMgmtName)}
														
 
															 	}
														
 
															 	cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
														
--- a/llm/generate/gen_common.sh
+++ b/llm/generate/gen_common.sh
@@ -31,6 +31,7 @@ init_vars() {
 
															         NO_WHOLE_ARCHIVE=""
														
 
															         GCC_ARCH="-arch ${ARCH}"
														
 
															         DIST_BASE=../../dist/darwin-${GOARCH}/
														
 
															+        PAYLOAD_BASE=../../build/darwin/${GOARCH}
														
 
															         ;;
														
 
															     "Linux")
														
 
															         LIB_EXT="so"
														
@@ -40,6 +41,7 @@ init_vars() {
 
															         # Cross compiling not supported on linux - Use docker
														
 
															         GCC_ARCH=""
														
 
															         DIST_BASE=../../dist/linux-${GOARCH}/
														
 
															+        PAYLOAD_BASE=../../build/linux/${GOARCH}
														
 
															         ;;
														
 
															     *)
														
 
															         ;;
														
@@ -47,7 +49,8 @@ init_vars() {
 
															     if [ -z "${CMAKE_CUDA_ARCHITECTURES}" ] ; then
														
 
															         CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80"
														
 
															     fi
														
 
															-    GZIP=$(which pigz 2>/dev/null || echo "gzip")
														
 
															+    GZIP=$(command -v pigz 2>/dev/null || echo "gzip")
														
 
															+    RUNNER_BASE="${DIST_BASE}/lib/ollama/runners"
														
 
															 }
														
 
															 git_module_setup() {
														
@@ -91,17 +94,34 @@ build() {
 
															     rm -f ${BUILD_DIR}/bin/ggml-common.h ${BUILD_DIR}/bin/ggml-metal.metal
														
 
															 }
														
 
															+dist() {
														
 
															+    [ -z "${RUNNER}" ] && exit 1
														
 
															+    mkdir -p ${RUNNER_BASE}/${RUNNER}/
														
 
															+    for f in ${BUILD_DIR}/bin/* ; do
														
 
															+        cp ${f} ${RUNNER_BASE}/${RUNNER}/
														
 
															+    done
														
 
															+    # check for lib directory
														
 
															+    if [ -d ${BUILD_DIR}/lib ]; then
														
 
															+        for f in ${BUILD_DIR}/lib/* ; do
														
 
															+            cp ${f} ${RUNNER_BASE}/${RUNNER}/
														
 
															+        done
														
 
															+    fi
														
 
															+}
														
 
															+
														
 
															+# Compress from the build $BUILD_DIR into the $PAYLOAD_BASE/$RUNNER dir
														
 
															 compress() {
														
 
															-    echo "Compressing payloads to reduce overall binary size..."
														
 
															-    rm -rf ${BUILD_DIR}/bin/*.gz
														
 
															+    [ -z "${RUNNER}" ] && exit 1
														
 
															+    echo "Compressing payloads with ${GZIP} to reduce overall binary size..."
														
 
															+    rm -rf "${PAYLOAD_BASE}/${RUNNER}/"
														
 
															+    mkdir -p "${PAYLOAD_BASE}/${RUNNER}/"
														
 
															     for f in ${BUILD_DIR}/bin/* ; do
														
 
															-        ${GZIP} -n --best -f ${f} &
														
 
															+        ${GZIP} -c --best ${f} > "${PAYLOAD_BASE}/${RUNNER}/$(basename ${f}).gz" &
														
 
															         compress_pids+=" $!"
														
 
															     done
														
 
															     # check for lib directory
														
 
															     if [ -d ${BUILD_DIR}/lib ]; then
														
 
															         for f in ${BUILD_DIR}/lib/* ; do
														
 
															-            ${GZIP} -n --best -f ${f} &
														
 
															+            ${GZIP} -c --best ${f} > "${PAYLOAD_BASE}/${RUNNER}/$(basename ${f}).gz" &
														
 
															             compress_pids+=" $!"
														
 
															         done
														
 
															     fi
														
--- a/llm/generate/gen_darwin.sh
+++ b/llm/generate/gen_darwin.sh
@@ -39,7 +39,8 @@ case "${GOARCH}" in
 
															         #
														
 
															         init_vars
														
 
															         CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_ACCELERATE=off -DGGML_BLAS=off -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
														
 
															-        BUILD_DIR="../build/darwin/${ARCH}/cpu"
														
 
															+        RUNNER=cpu
														
 
															+        BUILD_DIR="../build/darwin/${ARCH}/${RUNNER}"
														
 
															         echo "Building LCD CPU"
														
 
															         build
														
 
															         sign ${BUILD_DIR}/bin/ollama_llama_server
														
@@ -51,7 +52,8 @@ case "${GOARCH}" in
 
															         #
														
 
															         init_vars
														
 
															         CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_ACCELERATE=off -DGGML_BLAS=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
														
 
															-        BUILD_DIR="../build/darwin/${ARCH}/cpu_avx"
														
 
															+        RUNNER=cpu_avx
														
 
															+        BUILD_DIR="../build/darwin/${ARCH}/${RUNNER}"
														
 
															         echo "Building AVX CPU"
														
 
															         build
														
 
															         sign ${BUILD_DIR}/bin/ollama_llama_server
														
@@ -63,7 +65,8 @@ case "${GOARCH}" in
 
															         #
														
 
															         init_vars
														
 
															         CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_ACCELERATE=on -DGGML_BLAS=off -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on ${CMAKE_DEFS}"
														
 
															-        BUILD_DIR="../build/darwin/${ARCH}/cpu_avx2"
														
 
															+        RUNNER=cpu_avx2
														
 
															+        BUILD_DIR="../build/darwin/${ARCH}/${RUNNER}"
														
 
															         echo "Building AVX2 CPU"
														
 
															         EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation"
														
 
															         build
														
@@ -84,7 +87,8 @@ case "${GOARCH}" in
 
															     if [ -z "$OLLAMA_SKIP_METAL_GENERATE" ]; then
														
 
															         init_vars
														
 
															         CMAKE_DEFS="${COMMON_DARWIN_DEFS} -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} ${CMAKE_DEFS}"
														
 
															-        BUILD_DIR="../build/darwin/${ARCH}/metal"
														
 
															+        RUNNER="metal"
														
 
															+        BUILD_DIR="../build/darwin/${ARCH}/${RUNNER}"
														
 
															         EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders"
														
 
															         build
														
 
															         sign ${BUILD_DIR}/bin/ollama_llama_server
														
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -79,10 +79,12 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
 
															         init_vars
														
 
															         echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
														
 
															         CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
														
 
															-        BUILD_DIR="../build/linux/${ARCH}/cpu"
														
 
															+        RUNNER="cpu"
														
 
															+        BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
														
 
															         echo "Building custom CPU"
														
 
															         build
														
 
															         install
														
 
															+        dist
														
 
															         compress
														
 
															     else
														
 
															         # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
														
@@ -102,10 +104,12 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
 
															             #
														
 
															             init_vars
														
 
															             CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
														
 
															-            BUILD_DIR="../build/linux/${ARCH}/cpu"
														
 
															+            RUNNER=cpu
														
 
															+            BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
														
 
															             echo "Building LCD CPU"
														
 
															             build
														
 
															             install
														
 
															+            dist
														
 
															             compress
														
 
															         fi
														
@@ -120,10 +124,12 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
 
															                 #
														
 
															                 init_vars
														
 
															                 CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
														
 
															-                BUILD_DIR="../build/linux/${ARCH}/cpu_avx"
														
 
															+                RUNNER=cpu_avx
														
 
															+                BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
														
 
															                 echo "Building AVX CPU"
														
 
															                 build
														
 
															                 install
														
 
															+                dist
														
 
															                 compress
														
 
															             fi
														
@@ -134,10 +140,12 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
 
															                 #
														
 
															                 init_vars
														
 
															                 CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on ${CMAKE_DEFS}"
														
 
															-                BUILD_DIR="../build/linux/${ARCH}/cpu_avx2"
														
 
															+                RUNNER=cpu_avx2
														
 
															+                BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
														
 
															                 echo "Building AVX2 CPU"
														
 
															                 build
														
 
															                 install
														
 
															+                dist
														
 
															                 compress
														
 
															             fi
														
 
															         fi
														
@@ -187,11 +195,13 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
 
															     fi
														
 
															     export CUDAFLAGS="-t8"
														
 
															     CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off"
														
 
															-    BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}"
														
 
															+    RUNNER=cuda${CUDA_VARIANT}
														
 
															+    BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
														
 
															     export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
														
 
															     CUDA_DIST_DIR="${CUDA_DIST_DIR:-${DIST_BASE}/lib/ollama}"
														
 
															     build
														
 
															     install
														
 
															+    dist
														
 
															     echo "Installing CUDA dependencies in ${CUDA_DIST_DIR}"
														
 
															     mkdir -p "${CUDA_DIST_DIR}"
														
 
															     for lib in ${CUDA_LIB_DIR}/libcudart.so* ${CUDA_LIB_DIR}/libcublas.so* ${CUDA_LIB_DIR}/libcublasLt.so* ; do
														
@@ -212,7 +222,8 @@ if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
 
															     source ${ONEAPI_ROOT}/setvars.sh --force # set up environment variables for oneAPI
														
 
															     CC=icx
														
 
															     CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF"
														
 
															-    BUILD_DIR="../build/linux/${ARCH}/oneapi"
														
 
															+    RUNNER=oneapi
														
 
															+    BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
														
 
															     ONEAPI_DIST_DIR="${DIST_BASE}/lib/ollama"
														
 
															     export LLAMA_SERVER_LDFLAGS="-fsycl -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
														
 
															     DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it
														
@@ -231,6 +242,7 @@ if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
 
															     cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${ONEAPI_DIST_DIR}"
														
 
															     cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${ONEAPI_DIST_DIR}"
														
 
															     install
														
 
															+    dist
														
 
															     compress
														
 
															 fi
														
@@ -259,7 +271,8 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
 
															         CMAKE_DEFS="${CMAKE_DEFS} ${OLLAMA_CUSTOM_ROCM_DEFS}"
														
 
															         echo "Building custom ROCM GPU"
														
 
															     fi
														
 
															-    BUILD_DIR="../build/linux/${ARCH}/rocm${ROCM_VARIANT}"
														
 
															+    RUNNER=rocm${ROCM_VARIANT}
														
 
															+    BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
														
 
															     # ROCm dependencies are too large to fit into a unified bundle
														
 
															     ROCM_DIST_DIR="${DIST_BASE}/../linux-${GOARCH}-rocm/lib/ollama"
														
 
															     # TODO figure out how to disable runpath (rpath)
														
@@ -269,13 +282,17 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
 
															     # copy the ROCM dependencies
														
 
															     mkdir -p "${ROCM_DIST_DIR}"
														
 
															-    for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -v "${ARCH}/rocm${ROCM_VARIANT}" | grep -e rocm -e amdgpu -e libtinfo ); do
														
 
															+    for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -v "${ARCH}/rocm${ROCM_VARIANT}" | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf ); do
														
 
															         cp -a "${dep}"* "${ROCM_DIST_DIR}"
														
 
															+        if [ $(readlink -f "${dep}") != "${dep}" ] ; then
														
 
															+            cp $(readlink -f "${dep}") "${ROCM_DIST_DIR}"
														
 
															+        fi
														
 
															     done
														
 
															     install
														
 
															+    dist
														
 
															     compress
														
 
															 fi
														
 
															 cleanup
														
 
															 wait_for_compress
														
 
															-echo "go generate completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
														
 
															+echo "go generate completed.  LLM runners: $(cd ${PAYLOAD_BASE}; echo *)"
														
--- a/llm/llm_darwin_arm64.go
+++ b/llm/llm_darwin_arm64.go
@@ -1,11 +1,7 @@
 
															 package llm
														
 
															 import (
														
 
															-	"embed"
														
 
															 	"syscall"
														
 
															 )
														
 
															-//go:embed build/darwin/arm64/*/bin/*
														
 
															-var libEmbed embed.FS
														
 
															-
														
 
															 var LlamaServerSysProcAttr = &syscall.SysProcAttr{}
														
--- a/llm/llm_darwin_amd64.go
+++ b/llm/llm_darwin_amd64.go
@@ -1,11 +0,0 @@
 
															-package llm
														
 
															-
														
 
															-import (
														
 
															-	"embed"
														
 
															-	"syscall"
														
 
															-)
														
 
															-
														
 
															-//go:embed build/darwin/x86_64/*/bin/*
														
 
															-var libEmbed embed.FS
														
 
															-
														
 
															-var LlamaServerSysProcAttr = &syscall.SysProcAttr{}
														
--- a/llm/llm_linux.go
+++ b/llm/llm_linux.go
@@ -1,11 +1,7 @@
 
															 package llm
														
 
															 import (
														
 
															-	"embed"
														
 
															 	"syscall"
														
 
															 )
														
 
															-//go:embed build/linux/*/*/bin/*
														
 
															-var libEmbed embed.FS
														
 
															-
														
 
															 var LlamaServerSysProcAttr = &syscall.SysProcAttr{}
														
--- a/llm/llm_windows.go
+++ b/llm/llm_windows.go
@@ -1,13 +1,9 @@
 
															 package llm
														
 
															 import (
														
 
															-	"embed"
														
 
															 	"syscall"
														
 
															 )
														
 
															-// unused on windows
														
 
															-var libEmbed embed.FS
														
 
															-
														
 
															 const CREATE_DEFAULT_ERROR_MODE = 0x04000000
														
 
															 var LlamaServerSysProcAttr = &syscall.SysProcAttr{
														
--- a/llm/payload.go
+++ b/llm/payload.go
@@ -1,233 +0,0 @@
 
															-package llm
														
 
															-
														
 
															-import (
														
 
															-	"compress/gzip"
														
 
															-	"errors"
														
 
															-	"fmt"
														
 
															-	"io"
														
 
															-	"io/fs"
														
 
															-	"log/slog"
														
 
															-	"os"
														
 
															-	"path/filepath"
														
 
															-	"runtime"
														
 
															-	"slices"
														
 
															-	"strings"
														
 
															-
														
 
															-	"golang.org/x/sync/errgroup"
														
 
															-
														
 
															-	"github.com/ollama/ollama/gpu"
														
 
															-)
														
 
															-
														
 
															-var errPayloadMissing = errors.New("expected payloads not included in this build of ollama")
														
 
															-
														
 
															-func Init() error {
														
 
															-	payloadsDir, err := gpu.PayloadsDir()
														
 
															-	if err != nil {
														
 
															-		return err
														
 
															-	}
														
 
															-
														
 
															-	if runtime.GOOS != "windows" {
														
 
															-		slog.Info("extracting embedded files", "dir", payloadsDir)
														
 
															-		binGlob := "build/*/*/*/bin/*"
														
 
															-
														
 
															-		// extract server libraries
														
 
															-		err = extractFiles(payloadsDir, binGlob)
														
 
															-		if err != nil {
														
 
															-			return fmt.Errorf("extract binaries: %v", err)
														
 
															-		}
														
 
															-	}
														
 
															-
														
 
															-	var variants []string
														
 
															-	for v := range getAvailableServers() {
														
 
															-		variants = append(variants, v)
														
 
															-	}
														
 
															-	slog.Info(fmt.Sprintf("Dynamic LLM libraries %v", variants))
														
 
															-	slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
														
 
															-
														
 
															-	return nil
														
 
															-}
														
 
															-
														
 
															-// binary names may contain an optional variant separated by '_'
														
 
															-// For example, "ollama_rocm_v6" and "ollama_rocm_v5" or "ollama_cpu" and "ollama_cpu_avx2"
														
 
															-// Any library without a variant is the lowest common denominator
														
 
															-func getAvailableServers() map[string]string {
														
 
															-	payloadsDir, err := gpu.PayloadsDir()
														
 
															-	if err != nil {
														
 
															-		slog.Error("payload lookup error", "error", err)
														
 
															-		return nil
														
 
															-	}
														
 
															-
														
 
															-	// glob payloadsDir for files that start with ollama_
														
 
															-	pattern := filepath.Join(payloadsDir, "*", "ollama_*")
														
 
															-
														
 
															-	files, err := filepath.Glob(pattern)
														
 
															-	if err != nil {
														
 
															-		slog.Debug("could not glob", "pattern", pattern, "error", err)
														
 
															-		return nil
														
 
															-	}
														
 
															-
														
 
															-	servers := make(map[string]string)
														
 
															-	for _, file := range files {
														
 
															-		slog.Debug("availableServers : found", "file", file)
														
 
															-		servers[filepath.Base(filepath.Dir(file))] = filepath.Dir(file)
														
 
															-	}
														
 
															-
														
 
															-	return servers
														
 
															-}
														
 
															-
														
 
															-// serversForGpu returns a list of compatible servers give the provided GPU
														
 
															-// info, ordered by performance. assumes Init() has been called
														
 
															-// TODO - switch to metadata based mapping
														
 
															-func serversForGpu(info gpu.GpuInfo) []string {
														
 
															-	// glob workDir for files that start with ollama_
														
 
															-	availableServers := getAvailableServers()
														
 
															-	requested := info.Library
														
 
															-	if info.Variant != gpu.CPUCapabilityNone.String() {
														
 
															-		requested += "_" + info.Variant
														
 
															-	}
														
 
															-
														
 
															-	servers := []string{}
														
 
															-
														
 
															-	// exact match first
														
 
															-	for a := range availableServers {
														
 
															-		if a == requested {
														
 
															-			servers = []string{a}
														
 
															-
														
 
															-			if a == "metal" {
														
 
															-				return servers
														
 
															-			}
														
 
															-
														
 
															-			break
														
 
															-		}
														
 
															-	}
														
 
															-
														
 
															-	alt := []string{}
														
 
															-
														
 
															-	// Then for GPUs load alternates and sort the list for consistent load ordering
														
 
															-	if info.Library != "cpu" {
														
 
															-		for a := range availableServers {
														
 
															-			if info.Library == strings.Split(a, "_")[0] && a != requested {
														
 
															-				alt = append(alt, a)
														
 
															-			}
														
 
															-		}
														
 
															-
														
 
															-		slices.Sort(alt)
														
 
															-		servers = append(servers, alt...)
														
 
															-	}
														
 
															-
														
 
															-	if !(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64") {
														
 
															-		// Load up the best CPU variant if not primary requested
														
 
															-		if info.Library != "cpu" {
														
 
															-			variant := gpu.GetCPUCapability()
														
 
															-			// If no variant, then we fall back to default
														
 
															-			// If we have a variant, try that if we find an exact match
														
 
															-			// Attempting to run the wrong CPU instructions will panic the
														
 
															-			// process
														
 
															-			if variant != gpu.CPUCapabilityNone {
														
 
															-				for cmp := range availableServers {
														
 
															-					if cmp == "cpu_"+variant.String() {
														
 
															-						servers = append(servers, cmp)
														
 
															-						break
														
 
															-					}
														
 
															-				}
														
 
															-			} else {
														
 
															-				servers = append(servers, "cpu")
														
 
															-			}
														
 
															-		}
														
 
															-
														
 
															-		if len(servers) == 0 {
														
 
															-			servers = []string{"cpu"}
														
 
															-		}
														
 
															-	}
														
 
															-
														
 
															-	return servers
														
 
															-}
														
 
															-
														
 
															-// Return the optimal server for this CPU architecture
														
 
															-func serverForCpu() string {
														
 
															-	if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
														
 
															-		return "metal"
														
 
															-	}
														
 
															-	variant := gpu.GetCPUCapability()
														
 
															-	availableServers := getAvailableServers()
														
 
															-	if variant != gpu.CPUCapabilityNone {
														
 
															-		for cmp := range availableServers {
														
 
															-			if cmp == "cpu_"+variant.String() {
														
 
															-				return cmp
														
 
															-			}
														
 
															-		}
														
 
															-	}
														
 
															-	return "cpu"
														
 
															-}
														
 
															-
														
 
															-// extract extracts the embedded files to the target directory
														
 
															-func extractFiles(targetDir string, glob string) error {
														
 
															-	files, err := fs.Glob(libEmbed, glob)
														
 
															-	if err != nil || len(files) == 0 {
														
 
															-		return errPayloadMissing
														
 
															-	}
														
 
															-
														
 
															-	if err := os.MkdirAll(targetDir, 0o755); err != nil {
														
 
															-		return fmt.Errorf("extractFiles could not mkdir %s: %v", targetDir, err)
														
 
															-	}
														
 
															-
														
 
															-	g := new(errgroup.Group)
														
 
															-
														
 
															-	// build/$OS/$GOARCH/$VARIANT/{bin,lib}/$FILE
														
 
															-	for _, file := range files {
														
 
															-		filename := file
														
 
															-
														
 
															-		variant := filepath.Base(filepath.Dir(filepath.Dir(filename)))
														
 
															-
														
 
															-		slog.Debug("extracting", "variant", variant, "file", filename)
														
 
															-
														
 
															-		g.Go(func() error {
														
 
															-			srcf, err := libEmbed.Open(filename)
														
 
															-			if err != nil {
														
 
															-				return err
														
 
															-			}
														
 
															-			defer srcf.Close()
														
 
															-
														
 
															-			src := io.Reader(srcf)
														
 
															-			if strings.HasSuffix(filename, ".gz") {
														
 
															-				src, err = gzip.NewReader(src)
														
 
															-				if err != nil {
														
 
															-					return fmt.Errorf("decompress payload %s: %v", filename, err)
														
 
															-				}
														
 
															-				filename = strings.TrimSuffix(filename, ".gz")
														
 
															-			}
														
 
															-
														
 
															-			variantDir := filepath.Join(targetDir, variant)
														
 
															-			if err := os.MkdirAll(variantDir, 0o755); err != nil {
														
 
															-				return fmt.Errorf("extractFiles could not mkdir %s: %v", variantDir, err)
														
 
															-			}
														
 
															-
														
 
															-			base := filepath.Base(filename)
														
 
															-			destFilename := filepath.Join(variantDir, base)
														
 
															-
														
 
															-			_, err = os.Stat(destFilename)
														
 
															-			switch {
														
 
															-			case errors.Is(err, os.ErrNotExist):
														
 
															-				destFile, err := os.OpenFile(destFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
														
 
															-				if err != nil {
														
 
															-					return fmt.Errorf("write payload %s: %v", filename, err)
														
 
															-				}
														
 
															-				defer destFile.Close()
														
 
															-				if _, err := io.Copy(destFile, src); err != nil {
														
 
															-					return fmt.Errorf("copy payload %s: %v", filename, err)
														
 
															-				}
														
 
															-			case err != nil:
														
 
															-				return fmt.Errorf("stat payload %s: %v", filename, err)
														
 
															-			}
														
 
															-			return nil
														
 
															-		})
														
 
															-	}
														
 
															-
														
 
															-	err = g.Wait()
														
 
															-	if err != nil {
														
 
															-		// If we fail to extract, the payload dir is unusable, so cleanup whatever we extracted
														
 
															-		gpu.Cleanup()
														
 
															-		return err
														
 
															-	}
														
 
															-	return nil
														
 
															-}
														
--- a/llm/server.go
+++ b/llm/server.go
@@ -24,9 +24,11 @@ import (
 
															 	"golang.org/x/sync/semaphore"
														
 
															 	"github.com/ollama/ollama/api"
														
 
															+	"github.com/ollama/ollama/build"
														
 
															 	"github.com/ollama/ollama/envconfig"
														
 
															 	"github.com/ollama/ollama/format"
														
 
															 	"github.com/ollama/ollama/gpu"
														
 
															+	"github.com/ollama/ollama/runners"
														
 
															 )
														
 
															 type LlamaServer interface {
														
@@ -106,7 +108,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 
															 		gpus = gpu.GetCPUInfo()
														
 
															 	}
														
 
															 	if len(gpus) == 1 && gpus[0].Library == "cpu" {
														
 
															-		cpuRunner = serverForCpu()
														
 
															+		cpuRunner = runners.ServerForCpu()
														
 
															 		estimate = EstimateGPULayers(gpus, ggml, projectors, opts)
														
 
															 	} else {
														
 
															 		estimate = EstimateGPULayers(gpus, ggml, projectors, opts)
														
@@ -118,7 +120,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 
															 			opts.NumGPU = 0
														
 
															 		case gpus[0].Library != "metal" && estimate.Layers == 0:
														
 
															 			// Don't bother loading into the GPU if no layers can fit
														
 
															-			cpuRunner = serverForCpu()
														
 
															+			cpuRunner = runners.ServerForCpu()
														
 
															 			gpus = gpu.GetCPUInfo()
														
 
															 		case opts.NumGPU < 0 && estimate.Layers > 0 && gpus[0].Library != "cpu":
														
 
															 			opts.NumGPU = estimate.Layers
														
@@ -145,25 +147,20 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 
															 		return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
														
 
															 	}
														
 
															-	availableServers := getAvailableServers()
														
 
															+	rDir, err := runners.Refresh(build.EmbedFS)
														
 
															+	if err != nil {
														
 
															+		return nil, err
														
 
															+	}
														
 
															+
														
 
															+	availableServers := runners.GetAvailableServers(rDir)
														
 
															 	if len(availableServers) == 0 {
														
 
															-		if runtime.GOOS != "windows" {
														
 
															-			slog.Warn("llama server binary disappeared, reinitializing payloads")
														
 
															-			err = Init()
														
 
															-			if err != nil {
														
 
															-				slog.Warn("failed to reinitialize payloads", "error", err)
														
 
															-				return nil, err
														
 
															-			}
														
 
															-			availableServers = getAvailableServers()
														
 
															-		} else {
														
 
															-			return nil, finalErr
														
 
															-		}
														
 
															+		return nil, finalErr
														
 
															 	}
														
 
															 	var servers []string
														
 
															 	if cpuRunner != "" {
														
 
															 		servers = []string{cpuRunner}
														
 
															 	} else {
														
 
															-		servers = serversForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant
														
 
															+		servers = runners.ServersForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant
														
 
															 	}
														
 
															 	demandLib := envconfig.LLMLibrary()
														
 
															 	if demandLib != "" {
														
@@ -330,7 +327,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 
															 		_, err := os.Stat(server)
														
 
															 		if errors.Is(err, os.ErrNotExist) {
														
 
															 			slog.Warn("llama server disappeared, reinitializing payloads", "path", server, "error", err)
														
 
															-			err = Init()
														
 
															+			_, err = runners.Refresh(build.EmbedFS)
														
 
															 			if err != nil {
														
 
															 				slog.Warn("failed to reinitialize payloads", "error", err)
														
 
															 				return nil, err
														
--- a/runners/common.go
+++ b/runners/common.go
@@ -0,0 +1,384 @@
 
															+package runners
														
 
															+
														
 
															+import (
														
 
															+	"compress/gzip"
														
 
															+	"errors"
														
 
															+	"fmt"
														
 
															+	"io"
														
 
															+	"io/fs"
														
 
															+	"log/slog"
														
 
															+	"os"
														
 
															+	"path/filepath"
														
 
															+	"runtime"
														
 
															+	"slices"
														
 
															+	"strconv"
														
 
															+	"strings"
														
 
															+	"sync"
														
 
															+	"syscall"
														
 
															+
														
 
															+	"golang.org/x/sync/errgroup"
														
 
															+
														
 
															+	"github.com/ollama/ollama/envconfig"
														
 
															+	"github.com/ollama/ollama/gpu"
														
 
															+)
														
 
															+
														
 
															+const (
														
 
															+	binGlob = "*/*/*/*"
														
 
															+)
														
 
															+
														
 
															+var (
														
 
															+	lock       sync.Mutex
														
 
															+	runnersDir = ""
														
 
															+)
														
 
															+
														
 
															+// Return the location where runners are stored
														
 
															+// If runners are payloads, this will either extract them
														
 
															+// or refresh them if any have disappeared due to tmp cleaners
														
 
															+func Refresh(payloadFS fs.FS) (string, error) {
														
 
															+	lock.Lock()
														
 
															+	defer lock.Unlock()
														
 
															+	var err error
														
 
															+
														
 
															+	// Wire up extra logging on our first load
														
 
															+	if runnersDir == "" {
														
 
															+		defer func() {
														
 
															+			var runners []string
														
 
															+			for v := range GetAvailableServers(runnersDir) {
														
 
															+				runners = append(runners, v)
														
 
															+			}
														
 
															+			slog.Info("Dynamic LLM libraries", "runners", runners)
														
 
															+			slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
														
 
															+		}()
														
 
															+	}
														
 
															+
														
 
															+	if hasPayloads(payloadFS) {
														
 
															+		if runnersDir == "" {
														
 
															+			runnersDir, err = extractRunners(payloadFS)
														
 
															+		} else {
														
 
															+			err = refreshRunners(payloadFS, runnersDir)
														
 
															+		}
														
 
															+	} else if runnersDir == "" {
														
 
															+		runnersDir, err = locateRunners()
														
 
															+	}
														
 
															+
														
 
															+	return runnersDir, err
														
 
															+}
														
 
															+
														
 
															+func Cleanup(payloadFS fs.FS) {
														
 
															+	lock.Lock()
														
 
															+	defer lock.Unlock()
														
 
															+	if hasPayloads(payloadFS) && runnersDir != "" {
														
 
															+		// We want to fully clean up the tmpdir parent of the payloads dir
														
 
															+		tmpDir := filepath.Clean(filepath.Join(runnersDir, ".."))
														
 
															+		slog.Debug("cleaning up", "dir", tmpDir)
														
 
															+		err := os.RemoveAll(tmpDir)
														
 
															+		if err != nil {
														
 
															+			slog.Warn("failed to clean up", "dir", tmpDir, "err", err)
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func locateRunners() (string, error) {
														
 
															+	exe, err := os.Executable()
														
 
															+	if err != nil {
														
 
															+		return "", err
														
 
															+	}
														
 
															+
														
 
															+	cwd, err := os.Getwd()
														
 
															+	if err != nil {
														
 
															+		return "", err
														
 
															+	}
														
 
															+
														
 
															+	var paths []string
														
 
															+	for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe()), cwd} {
														
 
															+		paths = append(paths,
														
 
															+			root,
														
 
															+			filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
														
 
															+			filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH),
														
 
															+		)
														
 
															+	}
														
 
															+
														
 
															+	// Try a few variations to improve developer experience when building from source in the local tree
														
 
															+	for _, path := range paths {
														
 
															+		candidate := filepath.Join(path, "lib", "ollama", "runners")
														
 
															+		if _, err := os.Stat(candidate); err == nil {
														
 
															+			return candidate, nil
														
 
															+		}
														
 
															+	}
														
 
															+	return "", fmt.Errorf("unable to locate runners in any search path %v", paths)
														
 
															+}
														
 
															+
														
 
															+// Return true if we're carying nested payloads for the runners
														
 
															+func hasPayloads(payloadFS fs.FS) bool {
														
 
															+	files, err := fs.Glob(payloadFS, binGlob)
														
 
															+	if err != nil || len(files) == 0 || (len(files) == 1 && strings.Contains(files[0], "placeholder")) {
														
 
															+		return false
														
 
															+	}
														
 
															+	return true
														
 
															+}
														
 
															+
														
 
															+func extractRunners(payloadFS fs.FS) (string, error) {
														
 
															+	cleanupTmpDirs()
														
 
															+	tmpDir, err := os.MkdirTemp(envconfig.TmpDir(), "ollama")
														
 
															+	if err != nil {
														
 
															+		return "", fmt.Errorf("failed to generate tmp dir: %w", err)
														
 
															+	}
														
 
															+	// Track our pid so we can clean up orphaned tmpdirs
														
 
															+	n := filepath.Join(tmpDir, "ollama.pid")
														
 
															+	if err := os.WriteFile(n, []byte(strconv.Itoa(os.Getpid())), 0o644); err != nil {
														
 
															+		slog.Warn("failed to write pid file", "file", n, "error", err)
														
 
															+	}
														
 
															+	// We create a distinct subdirectory for payloads within the tmpdir
														
 
															+	// This will typically look like /tmp/ollama3208993108/runners on linux
														
 
															+	rDir := filepath.Join(tmpDir, "runners")
														
 
															+
														
 
															+	slog.Info("extracting embedded files", "dir", rDir)
														
 
															+	return rDir, refreshRunners(payloadFS, rDir)
														
 
															+}
														
 
															+
														
 
															+func refreshRunners(payloadFS fs.FS, rDir string) error {
														
 
															+	// extract or refresh server libraries
														
 
															+	err := extractFiles(payloadFS, rDir, binGlob)
														
 
															+	if err != nil {
														
 
															+		return fmt.Errorf("extract binaries: %v", err)
														
 
															+	}
														
 
															+	return nil
														
 
															+}
														
 
															+
														
 
															+// extract extracts the embedded files to the target directory
														
 
															+func extractFiles(payloadFS fs.FS, targetDir string, glob string) error {
														
 
															+	files, err := fs.Glob(payloadFS, glob)
														
 
															+	if err != nil || len(files) == 0 {
														
 
															+		// Should not happen
														
 
															+		return fmt.Errorf("extractFiles called without payload present")
														
 
															+	}
														
 
															+
														
 
															+	if err := os.MkdirAll(targetDir, 0o755); err != nil {
														
 
															+		return fmt.Errorf("extractFiles could not mkdir %s: %v", targetDir, err)
														
 
															+	}
														
 
															+
														
 
															+	g := new(errgroup.Group)
														
 
															+
														
 
															+	// $OS/$GOARCH/$RUNNER/$FILE
														
 
															+	for _, file := range files {
														
 
															+		filename := file
														
 
															+
														
 
															+		runner := filepath.Base(filepath.Dir(filename))
														
 
															+
														
 
															+		slog.Debug("extracting", "runner", runner, "payload", filename)
														
 
															+
														
 
															+		g.Go(func() error {
														
 
															+			srcf, err := payloadFS.Open(filename)
														
 
															+			if err != nil {
														
 
															+				return err
														
 
															+			}
														
 
															+			defer srcf.Close()
														
 
															+
														
 
															+			src := io.Reader(srcf)
														
 
															+			if strings.HasSuffix(filename, ".gz") {
														
 
															+				src, err = gzip.NewReader(src)
														
 
															+				if err != nil {
														
 
															+					return fmt.Errorf("decompress payload %s: %v", filename, err)
														
 
															+				}
														
 
															+				filename = strings.TrimSuffix(filename, ".gz")
														
 
															+			}
														
 
															+
														
 
															+			runnerDir := filepath.Join(targetDir, runner)
														
 
															+			if err := os.MkdirAll(runnerDir, 0o755); err != nil {
														
 
															+				return fmt.Errorf("extractFiles could not mkdir %s: %v", runnerDir, err)
														
 
															+			}
														
 
															+
														
 
															+			base := filepath.Base(filename)
														
 
															+			destFilename := filepath.Join(runnerDir, base)
														
 
															+
														
 
															+			_, err = os.Stat(destFilename)
														
 
															+			switch {
														
 
															+			case errors.Is(err, os.ErrNotExist):
														
 
															+				destFile, err := os.OpenFile(destFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
														
 
															+				if err != nil {
														
 
															+					return fmt.Errorf("write payload %s: %v", filename, err)
														
 
															+				}
														
 
															+				defer destFile.Close()
														
 
															+				if _, err := io.Copy(destFile, src); err != nil {
														
 
															+					return fmt.Errorf("copy payload %s: %v", filename, err)
														
 
															+				}
														
 
															+			case err != nil:
														
 
															+				return fmt.Errorf("stat payload %s: %v", filename, err)
														
 
															+			}
														
 
															+			return nil
														
 
															+		})
														
 
															+	}
														
 
															+
														
 
															+	err = g.Wait()
														
 
															+	if err != nil {
														
 
															+		slog.Error("failed to extract files", "error", err)
														
 
															+		// If we fail to extract, the payload dir is most likely unusable, so cleanup whatever we extracted
														
 
															+		err := os.RemoveAll(targetDir)
														
 
															+		if err != nil {
														
 
															+			slog.Warn("failed to cleanup incomplete payload dir", "dir", targetDir, "error", err)
														
 
															+		}
														
 
															+		return err
														
 
															+	}
														
 
															+	return nil
														
 
															+}
														
 
															+
														
 
															+// Best effort to clean up prior tmpdirs
														
 
															+func cleanupTmpDirs() {
														
 
															+	tmpDir := envconfig.TmpDir()
														
 
															+	if tmpDir == "" {
														
 
															+		tmpDir = os.TempDir()
														
 
															+	}
														
 
															+	matches, err := filepath.Glob(filepath.Join(tmpDir, "ollama*", "ollama.pid"))
														
 
															+	if err != nil {
														
 
															+		return
														
 
															+	}
														
 
															+
														
 
															+	for _, match := range matches {
														
 
															+		raw, err := os.ReadFile(match)
														
 
															+		if errors.Is(err, os.ErrNotExist) {
														
 
															+			slog.Debug("not a ollama runtime directory, skipping", "path", match)
														
 
															+			continue
														
 
															+		} else if err != nil {
														
 
															+			slog.Warn("could not read ollama.pid, skipping", "path", match, "error", err)
														
 
															+			continue
														
 
															+		}
														
 
															+
														
 
															+		pid, err := strconv.Atoi(string(raw))
														
 
															+		if err != nil {
														
 
															+			slog.Warn("invalid pid, skipping", "path", match, "error", err)
														
 
															+			continue
														
 
															+		}
														
 
															+
														
 
															+		p, err := os.FindProcess(pid)
														
 
															+		if err == nil && !errors.Is(p.Signal(syscall.Signal(0)), os.ErrProcessDone) {
														
 
															+			slog.Warn("process still running, skipping", "pid", pid, "path", match)
														
 
															+			continue
														
 
															+		}
														
 
															+
														
 
															+		if err := os.Remove(match); err != nil {
														
 
															+			slog.Warn("could not cleanup stale pidfile", "path", match, "error", err)
														
 
															+		}
														
 
															+
														
 
															+		runners := filepath.Join(filepath.Dir(match), "runners")
														
 
															+		if err := os.RemoveAll(runners); err != nil {
														
 
															+			slog.Warn("could not cleanup stale runners", "path", runners, "error", err)
														
 
															+		}
														
 
															+
														
 
															+		if err := os.Remove(filepath.Dir(match)); err != nil {
														
 
															+			slog.Warn("could not cleanup stale tmpdir", "path", filepath.Dir(match), "error", err)
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+// directory names are the name of the runner and may contain an optional
														
 
															+// variant prefixed with '_' as the separator. For example, "cuda_v11" and
														
 
															+// "cuda_v12" or "cpu" and "cpu_avx2". Any library without a variant is the
														
 
															+// lowest common denominator
														
 
															+func GetAvailableServers(payloadsDir string) map[string]string {
														
 
															+	if payloadsDir == "" {
														
 
															+		slog.Error("empty runner dir")
														
 
															+		return nil
														
 
															+	}
														
 
															+
														
 
															+	// glob payloadsDir for files that start with ollama_
														
 
															+	pattern := filepath.Join(payloadsDir, "*", "ollama_*")
														
 
															+
														
 
															+	files, err := filepath.Glob(pattern)
														
 
															+	if err != nil {
														
 
															+		slog.Debug("could not glob", "pattern", pattern, "error", err)
														
 
															+		return nil
														
 
															+	}
														
 
															+
														
 
															+	servers := make(map[string]string)
														
 
															+	for _, file := range files {
														
 
															+		slog.Debug("availableServers : found", "file", file)
														
 
															+		servers[filepath.Base(filepath.Dir(file))] = filepath.Dir(file)
														
 
															+	}
														
 
															+
														
 
															+	return servers
														
 
															+}
														
 
															+
														
 
															+// serversForGpu returns a list of compatible servers give the provided GPU
														
 
															+// info, ordered by performance. assumes Init() has been called
														
 
															+// TODO - switch to metadata based mapping
														
 
															+func ServersForGpu(info gpu.GpuInfo) []string {
														
 
															+	// glob workDir for files that start with ollama_
														
 
															+	availableServers := GetAvailableServers(runnersDir)
														
 
															+	requested := info.Library
														
 
															+	if info.Variant != gpu.CPUCapabilityNone.String() {
														
 
															+		requested += "_" + info.Variant
														
 
															+	}
														
 
															+
														
 
															+	servers := []string{}
														
 
															+
														
 
															+	// exact match first
														
 
															+	for a := range availableServers {
														
 
															+		if a == requested {
														
 
															+			servers = []string{a}
														
 
															+
														
 
															+			if a == "metal" {
														
 
															+				return servers
														
 
															+			}
														
 
															+
														
 
															+			break
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	alt := []string{}
														
 
															+
														
 
															+	// Then for GPUs load alternates and sort the list for consistent load ordering
														
 
															+	if info.Library != "cpu" {
														
 
															+		for a := range availableServers {
														
 
															+			if info.Library == strings.Split(a, "_")[0] && a != requested {
														
 
															+				alt = append(alt, a)
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		slices.Sort(alt)
														
 
															+		servers = append(servers, alt...)
														
 
															+	}
														
 
															+
														
 
															+	if !(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64") {
														
 
															+		// Load up the best CPU variant if not primary requested
														
 
															+		if info.Library != "cpu" {
														
 
															+			variant := gpu.GetCPUCapability()
														
 
															+			// If no variant, then we fall back to default
														
 
															+			// If we have a variant, try that if we find an exact match
														
 
															+			// Attempting to run the wrong CPU instructions will panic the
														
 
															+			// process
														
 
															+			if variant != gpu.CPUCapabilityNone {
														
 
															+				for cmp := range availableServers {
														
 
															+					if cmp == "cpu_"+variant.String() {
														
 
															+						servers = append(servers, cmp)
														
 
															+						break
														
 
															+					}
														
 
															+				}
														
 
															+			} else {
														
 
															+				servers = append(servers, "cpu")
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		if len(servers) == 0 {
														
 
															+			servers = []string{"cpu"}
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	return servers
														
 
															+}
														
 
															+
														
 
															+// Return the optimal server for this CPU architecture
														
 
															+func ServerForCpu() string {
														
 
															+	if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
														
 
															+		return "metal"
														
 
															+	}
														
 
															+	variant := gpu.GetCPUCapability()
														
 
															+	availableServers := GetAvailableServers(runnersDir)
														
 
															+	if variant != gpu.CPUCapabilityNone {
														
 
															+		for cmp := range availableServers {
														
 
															+			if cmp == "cpu_"+variant.String() {
														
 
															+				return cmp
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+	return "cpu"
														
 
															+}
														
--- a/runners/runners_test.go
+++ b/runners/runners_test.go
@@ -0,0 +1,50 @@
 
															+package runners
														
 
															+
														
 
															+import (
														
 
															+	"log/slog"
														
 
															+	"os"
														
 
															+	"path"
														
 
															+	"runtime"
														
 
															+	"strings"
														
 
															+	"testing"
														
 
															+	"testing/fstest"
														
 
															+)
														
 
															+
														
 
															+func TestRefreshRunners(t *testing.T) {
														
 
															+	slog.SetLogLoggerLevel(slog.LevelDebug)
														
 
															+
														
 
															+	payloadFS := fstest.MapFS{
														
 
															+		path.Join(runtime.GOOS, runtime.GOARCH, "foo", "ollama_llama_server"): {Data: []byte("hello, world\n")},
														
 
															+	}
														
 
															+	tmpDir, err := os.MkdirTemp("", "testing")
														
 
															+	if err != nil {
														
 
															+		t.Fatalf("failed to make tmp dir %s", err)
														
 
															+	}
														
 
															+	t.Setenv("OLLAMA_TMPDIR", tmpDir)
														
 
															+	rDir, err := Refresh(payloadFS)
														
 
															+	if err != nil {
														
 
															+		t.Fatalf("failed to extract to %s %s", tmpDir, err)
														
 
															+	}
														
 
															+	if !strings.Contains(rDir, tmpDir) {
														
 
															+		t.Fatalf("runner dir %s was not in tmp dir %s", rDir, tmpDir)
														
 
															+	}
														
 
															+
														
 
															+	// spot check results
														
 
															+	servers := GetAvailableServers(rDir)
														
 
															+	if len(servers) < 1 {
														
 
															+		t.Fatalf("expected at least 1 server")
														
 
															+	}
														
 
															+
														
 
															+	// Refresh contents
														
 
															+	rDir, err = extractRunners(payloadFS)
														
 
															+	if err != nil {
														
 
															+		t.Fatalf("failed to extract to %s %s", tmpDir, err)
														
 
															+	}
														
 
															+	if !strings.Contains(rDir, tmpDir) {
														
 
															+		t.Fatalf("runner dir %s was not in tmp dir %s", rDir, tmpDir)
														
 
															+	}
														
 
															+
														
 
															+	cleanupTmpDirs()
														
 
															+
														
 
															+	Cleanup(payloadFS)
														
 
															+}
														
--- a/scripts/build_darwin.sh
+++ b/scripts/build_darwin.sh
@@ -2,8 +2,7 @@
 
															 set -e
														
 
															-export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
														
 
															-export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
														
 
															+. $(dirname $0)/env.sh
														
 
															 mkdir -p dist
														
--- a/scripts/build_docker.sh
+++ b/scripts/build_docker.sh
@@ -2,76 +2,34 @@
 
															 set -eu
														
 
															-export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
														
 
															-export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
														
 
															-
														
 
															-# We use 2 different image repositories to handle combining architecture images into multiarch manifest
														
 
															-# (The ROCm image is x86 only and is not a multiarch manifest)
														
 
															-# For developers, you can override the DOCKER_ORG to generate multiarch manifests
														
 
															-#  DOCKER_ORG=jdoe PUSH=1 ./scripts/build_docker.sh
														
 
															-DOCKER_ORG=${DOCKER_ORG:-"ollama"}
														
 
															-RELEASE_IMAGE_REPO=${RELEASE_IMAGE_REPO:-"${DOCKER_ORG}/release"}
														
 
															-FINAL_IMAGE_REPO=${FINAL_IMAGE_REPO:-"${DOCKER_ORG}/ollama"}
														
 
															-
														
 
															-BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"}
														
 
															+. $(dirname $0)/env.sh
														
 
															 # Set PUSH to a non-empty string to trigger push instead of load
														
 
															 PUSH=${PUSH:-""}
														
 
															-# In CI mode, we break things down
														
 
															-OLLAMA_SKIP_MANIFEST_CREATE=${OLLAMA_SKIP_MANIFEST_CREATE:-""}
														
 
															-OLLAMA_SKIP_IMAGE_BUILD=${OLLAMA_SKIP_IMAGE_BUILD:-""}
														
 
															-
														
 
															 if [ -z "${PUSH}" ] ; then
														
 
															+    echo "Building ${FINAL_IMAGE_REPO}:$VERSION locally.  set PUSH=1 to push"
														
 
															     LOAD_OR_PUSH="--load"
														
 
															 else
														
 
															-    echo "Will be pushing ${RELEASE_IMAGE_REPO}:$VERSION for ${BUILD_ARCH}"
														
 
															+    echo "Will be pushing ${FINAL_IMAGE_REPO}:$VERSION"
														
 
															     LOAD_OR_PUSH="--push"
														
 
															 fi
														
 
															-if [ -z "${OLLAMA_SKIP_IMAGE_BUILD}" ]; then
														
 
															-    for TARGETARCH in ${BUILD_ARCH}; do
														
 
															-        docker build \
														
 
															-            ${LOAD_OR_PUSH} \
														
 
															-            --platform=linux/${TARGETARCH} \
														
 
															-            --build-arg=VERSION \
														
 
															-            --build-arg=GOFLAGS \
														
 
															-            -f Dockerfile \
														
 
															-            -t ${RELEASE_IMAGE_REPO}:$VERSION-${TARGETARCH} \
														
 
															-            .
														
 
															-    done
														
 
															-
														
 
															-    if echo ${BUILD_ARCH} | grep "amd64" > /dev/null; then
														
 
															-        docker build \
														
 
															-            ${LOAD_OR_PUSH} \
														
 
															-            --platform=linux/amd64 \
														
 
															-            --build-arg=VERSION \
														
 
															-            --build-arg=GOFLAGS \
														
 
															-            --target runtime-rocm \
														
 
															-            -f Dockerfile \
														
 
															-            -t ${RELEASE_IMAGE_REPO}:$VERSION-rocm \
														
 
															-            .
														
 
															-    fi
														
 
															-fi
														
 
															-
														
 
															-if [ -z "${OLLAMA_SKIP_MANIFEST_CREATE}" ]; then
														
 
															-    if [ -n "${PUSH}" ]; then
														
 
															-        docker manifest create ${FINAL_IMAGE_REPO}:$VERSION \
														
 
															-            ${RELEASE_IMAGE_REPO}:$VERSION-amd64 \
														
 
															-            ${RELEASE_IMAGE_REPO}:$VERSION-arm64
														
 
															-        docker manifest push ${FINAL_IMAGE_REPO}:$VERSION
														
 
															-
														
 
															-        # For symmetry, tag/push the rocm image
														
 
															-        if [ "${RELEASE_IMAGE_REPO}" != "${FINAL_IMAGE_REPO}" ]; then
														
 
															-            echo "Tagging and pushing rocm image"
														
 
															-            docker pull ${RELEASE_IMAGE_REPO}:$VERSION-rocm
														
 
															-            docker tag ${RELEASE_IMAGE_REPO}:$VERSION-rocm ${FINAL_IMAGE_REPO}:$VERSION-rocm
														
 
															-            docker push ${FINAL_IMAGE_REPO}:$VERSION-rocm
														
 
															-        fi
														
 
															-    else
														
 
															-        echo "Skipping manifest generation when not pushing images are available locally as "
														
 
															-        echo "  ${RELEASE_IMAGE_REPO}:$VERSION-amd64"
														
 
															-        echo "  ${RELEASE_IMAGE_REPO}:$VERSION-arm64"
														
 
															-        echo "  ${RELEASE_IMAGE_REPO}:$VERSION-rocm"
														
 
															-    fi
														
 
															-fi
														
 
															+docker buildx build \
														
 
															+    ${LOAD_OR_PUSH} \
														
 
															+    --platform=${PLATFORM} \
														
 
															+    ${OLLAMA_COMMON_BUILD_ARGS} \
														
 
															+    -f Dockerfile \
														
 
															+    -t ${FINAL_IMAGE_REPO}:$VERSION \
														
 
															+    .
														
 
															+
														
 
															+if echo $PLATFORM | grep "amd64" > /dev/null; then
														
 
															+    docker buildx build \
														
 
															+        ${LOAD_OR_PUSH} \
														
 
															+        --platform=linux/amd64 \
														
 
															+        ${OLLAMA_COMMON_BUILD_ARGS} \
														
 
															+        --target runtime-rocm \
														
 
															+        -f Dockerfile \
														
 
															+        -t ${FINAL_IMAGE_REPO}:$VERSION-rocm \
														
 
															+        .
														
 
															+fi
														
--- a/scripts/build_linux.sh
+++ b/scripts/build_linux.sh
@@ -1,37 +1,29 @@
 
															 #!/bin/sh
														
 
															+#
														
 
															+# Mac ARM users, rosetta can be flaky, so to use a remote x86 builder
														
 
															+#
														
 
															+# docker context create amd64 --docker host=ssh://mybuildhost
														
 
															+# docker buildx create --name mybuilder amd64 --platform linux/amd64
														
 
															+# docker buildx create --name mybuilder --append desktop-linux --platform linux/arm64
														
 
															+# docker buildx use mybuilder
														
 
															+
														
 
															 set -eu
														
 
															-export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
														
 
															-export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
														
 
															-GZIP=$(which pigz 2>/dev/null || echo "gzip")
														
 
															+. $(dirname $0)/env.sh
														
 
															-BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"}
														
 
															-export AMDGPU_TARGETS=${AMDGPU_TARGETS:=""}
														
 
															 mkdir -p dist
														
 
															-for TARGETARCH in ${BUILD_ARCH}; do
														
 
															-    docker build \
														
 
															-        --platform=linux/$TARGETARCH \
														
 
															-        --build-arg=GOFLAGS \
														
 
															-        --build-arg=CGO_CFLAGS \
														
 
															-        --build-arg=OLLAMA_CUSTOM_CPU_DEFS \
														
 
															-        --build-arg=AMDGPU_TARGETS \
														
 
															-        --target build-$TARGETARCH \
														
 
															+docker buildx build \
														
 
															+        --output type=local,dest=./dist/ \
														
 
															+        --platform=${PLATFORM} \
														
 
															+        ${OLLAMA_COMMON_BUILD_ARGS} \
														
 
															+        --target dist \
														
 
															         -f Dockerfile \
														
 
															-        -t builder:$TARGETARCH \
														
 
															         .
														
 
															-    docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
														
 
															-    rm -rf ./dist/linux-$TARGETARCH
														
 
															-    docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH ./dist
														
 
															-    if echo ${TARGETARCH} | grep "amd64" > /dev/null; then
														
 
															-        docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH-rocm ./dist
														
 
															-    fi
														
 
															-    docker rm builder-$TARGETARCH
														
 
															-    echo "Compressing final linux bundle..."
														
 
															-    rm -f ./dist/ollama-linux-$TARGETARCH.tgz
														
 
															-    (cd dist/linux-$TARGETARCH && tar cf - . | ${GZIP} --best > ../ollama-linux-$TARGETARCH.tgz )
														
 
															-    if [ -d dist/linux-$TARGETARCH-rocm ]; then
														
 
															-        (cd dist/linux-$TARGETARCH-rocm && tar cf - . | ${GZIP} --best > ../ollama-linux-$TARGETARCH-rocm.tgz )
														
 
															-    fi
														
 
															-done
														
 
															+
														
 
															+# buildx behavior changes for single vs. multiplatform
														
 
															+if echo $PLATFORM | grep "," > /dev/null ; then 
														
 
															+        mv -f ./dist/linux_*64/ollama* ./dist/
														
 
															+        rmdir ./dist/linux_*64
														
 
															+fi
														
--- a/scripts/env.sh
+++ b/scripts/env.sh
@@ -0,0 +1,14 @@
 
															+# Common environment setup across build*.sh scripts
														
 
															+
														
 
															+export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
														
 
															+export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
														
 
															+# TODO - consider `docker buildx ls --format=json` to autodiscover platform capability
														
 
															+PLATFORM=${PLATFORM:-"linux/arm64,linux/amd64"}
														
 
															+DOCKER_ORG=${DOCKER_ORG:-"ollama"}
														
 
															+RELEASE_IMAGE_REPO=${RELEASE_IMAGE_REPO:-"${DOCKER_ORG}/release"}
														
 
															+FINAL_IMAGE_REPO=${FINAL_IMAGE_REPO:-"${DOCKER_ORG}/ollama"}
														
 
															+OLLAMA_COMMON_BUILD_ARGS="--build-arg=VERSION --build-arg=GOFLAGS --build-arg=OLLAMA_CUSTOM_CPU_DEFS --build-arg=AMDGPU_TARGETS"
														
 
															+
														
 
															+echo "Building Ollama"
														
 
															+echo "VERSION=$VERSION"
														
 
															+echo "PLATFORM=$PLATFORM"
														
--- a/server/routes.go
+++ b/server/routes.go
@@ -26,11 +26,13 @@ import (
 
															 	"golang.org/x/sync/errgroup"
														
 
															 	"github.com/ollama/ollama/api"
														
 
															+	"github.com/ollama/ollama/build"
														
 
															 	"github.com/ollama/ollama/envconfig"
														
 
															 	"github.com/ollama/ollama/gpu"
														
 
															 	"github.com/ollama/ollama/llm"
														
 
															 	"github.com/ollama/ollama/openai"
														
 
															 	"github.com/ollama/ollama/parser"
														
 
															+	"github.com/ollama/ollama/runners"
														
 
															 	"github.com/ollama/ollama/template"
														
 
															 	"github.com/ollama/ollama/types/errtypes"
														
 
															 	"github.com/ollama/ollama/types/model"
														
@@ -1216,12 +1218,12 @@ func Serve(ln net.Listener) error {
 
															 		srvr.Close()
														
 
															 		schedDone()
														
 
															 		sched.unloadAllRunners()
														
 
															-		gpu.Cleanup()
														
 
															+		runners.Cleanup(build.EmbedFS)
														
 
															 		done()
														
 
															 	}()
														
 
															-	if err := llm.Init(); err != nil {
														
 
															-		return fmt.Errorf("unable to initialize llm library %w", err)
														
 
															+	if _, err := runners.Refresh(build.EmbedFS); err != nil {
														
 
															+		return fmt.Errorf("unable to initialize llm runners %w", err)
														
 
															 	}
														
 
															 	s.sched.Run(schedCtx)
	`@@ -0,0 +1 @@`
			`+This is here to make sure the build/ directory exists for the go:embed command`