7 mēneši atpakaļ · cd5c8f6471
--- a/.dockerignore
+++ b/.dockerignore
@@ -7,3 +7,5 @@ llm/llama.cpp
 
				 .env
			
 
				 .cache
			
 
				 test_data
			
 
				+llm/build
			
 
				+llama/build
			
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -102,8 +102,8 @@ jobs:
 
				         with:
			
 
				           name: generate-windows-cpu
			
 
				           path: |
			
 
				-            llm/build/**/bin/*
			
 
				-            llm/build/**/*.a
			
 
				+            build/**/*
			
 
				+            build/**/*.a
			
 
				             dist/windows-amd64/**
			
 
				 
			
 
				   # ROCm generation step
			
@@ -176,7 +176,7 @@ jobs:
 
				         with:
			
 
				           name: generate-windows-rocm
			
 
				           path: |
			
 
				-            llm/build/**/bin/*
			
 
				+            build/**/*
			
 
				             dist/windows-amd64/**
			
 
				       - uses: actions/upload-artifact@v4
			
 
				         with:
			
@@ -265,7 +265,7 @@ jobs:
 
				         with:
			
 
				           name: generate-windows-cuda-${{ matrix.cuda.version }}
			
 
				           path: |
			
 
				-            llm/build/**/bin/*
			
 
				+            build/**/*
			
 
				             dist/windows-amd64/**
			
 
				       - uses: actions/upload-artifact@v4
			
 
				         with:
			
@@ -338,7 +338,7 @@ jobs:
 
				       - uses: actions/download-artifact@v4
			
 
				         with:
			
 
				           name: generate-windows-rocm
			
 
				-      - run: dir llm/build
			
 
				+      - run: dir build
			
 
				       - run: |
			
 
				           $gopath=(get-command go).source | split-path -parent
			
 
				           & "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Launch-VsDevShell.ps1"
			
@@ -359,9 +359,7 @@ jobs:
 
				     environment: release
			
 
				     runs-on: linux
			
 
				     env:
			
 
				-      OLLAMA_SKIP_MANIFEST_CREATE: '1'
			
 
				       BUILD_ARCH: amd64
			
 
				-      PUSH: '1'
			
 
				     steps:
			
 
				       - uses: actions/checkout@v4
			
 
				         with:
			
@@ -369,14 +367,8 @@ jobs:
 
				       - name: Set Version
			
 
				         shell: bash
			
 
				         run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
			
 
				-      - name: Login to Docker Hub
			
 
				-        uses: docker/login-action@v3
			
 
				-        with:
			
 
				-          username: ${{ vars.DOCKER_USER }}
			
 
				-          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
			
 
				       - run: |
			
 
				           ./scripts/build_linux.sh
			
 
				-          ./scripts/build_docker.sh
			
 
				       - uses: actions/upload-artifact@v4
			
 
				         with:
			
 
				           name: dist-linux-amd64
			
@@ -390,9 +382,7 @@ jobs:
 
				     environment: release
			
 
				     runs-on: linux-arm64
			
 
				     env:
			
 
				-      OLLAMA_SKIP_MANIFEST_CREATE: '1'
			
 
				       BUILD_ARCH: arm64
			
 
				-      PUSH: '1'
			
 
				     steps:
			
 
				       - uses: actions/checkout@v4
			
 
				         with:
			
@@ -421,14 +411,8 @@ jobs:
 
				           sudo usermod -aG docker $USER
			
 
				           sudo apt-get install acl
			
 
				           sudo setfacl --modify user:$USER:rw /var/run/docker.sock
			
 
				-      - name: Login to Docker Hub
			
 
				-        uses: docker/login-action@v3
			
 
				-        with:
			
 
				-          username: ${{ vars.DOCKER_USER }}
			
 
				-          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
			
 
				       - run: |
			
 
				           ./scripts/build_linux.sh
			
 
				-          ./scripts/build_docker.sh
			
 
				       - uses: actions/upload-artifact@v4
			
 
				         with:
			
 
				           name: dist-linux-arm64
			
@@ -436,6 +420,181 @@ jobs:
 
				             dist/*linux*
			
 
				             !dist/*-cov
			
 
				 
			
 
				+  # Container image build
			
 
				+  build-linux:
			
 
				+    environment: release
			
 
				+    strategy:
			
 
				+      matrix:
			
 
				+        runner:
			
 
				+          - linux
			
 
				+          - linux-arm64
			
 
				+    runs-on: ${{ matrix.runner }}
			
 
				+    env:
			
 
				+      FINAL_IMAGE_REPO: ollama/ollama
			
 
				+    steps:
			
 
				+      - uses: actions/checkout@v4
			
 
				+        with:
			
 
				+          submodules: recursive
			
 
				+      - name: 'Install Docker'
			
 
				+        if: ${{ startsWith(matrix.runner, 'linux-arm64') }}
			
 
				+        run: |
			
 
				+          sudo apt-get update
			
 
				+          sudo apt-get install -y ca-certificates curl
			
 
				+          sudo install -m 0755 -d /etc/apt/keyrings
			
 
				+          sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
			
 
				+          sudo chmod a+r /etc/apt/keyrings/docker.asc
			
 
				+          echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
			
 
				+            $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
			
 
				+            sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
			
 
				+          sudo apt-get update
			
 
				+          sudo apt-get install -y docker-ce docker-ce-cli containerd.io
			
 
				+          sudo usermod -aG docker $USER
			
 
				+          sudo apt-get install acl
			
 
				+          sudo setfacl --modify user:$USER:rw /var/run/docker.sock
			
 
				+      - name: Docker meta
			
 
				+        id: meta
			
 
				+        uses: docker/metadata-action@v5
			
 
				+        with:
			
 
				+          images: ${{ env.FINAL_IMAGE_REPO }}
			
 
				+          flavor: |
			
 
				+            latest=false
			
 
				+          tags: |
			
 
				+            type=ref,event=tag
			
 
				+            type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
			
 
				+            type=semver,pattern={{version}}
			
 
				+      - name: Set Version
			
 
				+        shell: bash
			
 
				+        run: |
			
 
				+          machine=$(uname -m)
			
 
				+          case ${machine} in
			
 
				+            x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;;
			
 
				+            aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;;
			
 
				+          esac >>$GITHUB_ENV
			
 
				+          echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
			
 
				+      - name: Set up Docker Buildx
			
 
				+        uses: docker/setup-buildx-action@v3
			
 
				+      - name: Login to Docker Hub
			
 
				+        uses: docker/login-action@v3
			
 
				+        with:
			
 
				+          username: ${{ vars.DOCKER_USER }}
			
 
				+          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
			
 
				+      - name: Build and push by digest
			
 
				+        id: build
			
 
				+        uses: docker/build-push-action@v6
			
 
				+        with:
			
 
				+          context: "."
			
 
				+          platforms: linux/${{ env.ARCH }}
			
 
				+          build-args: |
			
 
				+            GOFLAGS
			
 
				+          outputs: type=image,name=${{ env.FINAL_IMAGE_REPO }},push-by-digest=true,name-canonical=true,push=true
			
 
				+      - name: Export digest
			
 
				+        run: |
			
 
				+          mkdir -p /tmp/digests
			
 
				+          digest="${{ steps.build.outputs.digest }}"
			
 
				+          touch "/tmp/digests/${digest#sha256:}"
			
 
				+      - name: Upload digest
			
 
				+        uses: actions/upload-artifact@v4
			
 
				+        with:
			
 
				+          name: digests-${{ env.PLATFORM_PAIR }}
			
 
				+          path: /tmp/digests/*
			
 
				+          if-no-files-found: error
			
 
				+          retention-days: 1
			
 
				+  merge:
			
 
				+    environment: release
			
 
				+    runs-on: linux
			
 
				+    needs:
			
 
				+      - build-linux
			
 
				+    env:
			
 
				+      FINAL_IMAGE_REPO: ollama/ollama
			
 
				+    steps:
			
 
				+      - uses: actions/checkout@v4
			
 
				+        with:
			
 
				+          submodules: recursive
			
 
				+      - name: Download digests
			
 
				+        uses: actions/download-artifact@v4
			
 
				+        with:
			
 
				+          path: /tmp/digests
			
 
				+          pattern: digests-*
			
 
				+          merge-multiple: true
			
 
				+      - name: Set up Docker Buildx
			
 
				+        uses: docker/setup-buildx-action@v3
			
 
				+      - name: Docker meta
			
 
				+        id: meta
			
 
				+        uses: docker/metadata-action@v5
			
 
				+        with:
			
 
				+          images: ${{ env.FINAL_IMAGE_REPO }}
			
 
				+          flavor: |
			
 
				+            latest=false
			
 
				+          tags: |
			
 
				+            type=ref,event=tag
			
 
				+            type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
			
 
				+            type=semver,pattern={{version}}
			
 
				+      - name: Set Version
			
 
				+        shell: bash
			
 
				+        run: |
			
 
				+          machine=$(uname -m)
			
 
				+          case ${machine} in
			
 
				+            x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;;
			
 
				+            aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;;
			
 
				+          esac >>$GITHUB_ENV
			
 
				+          echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
			
 
				+      - name: Login to Docker Hub
			
 
				+        uses: docker/login-action@v3
			
 
				+        with:
			
 
				+          username: ${{ vars.DOCKER_USER }}
			
 
				+          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
			
 
				+      - name: Create manifest list and push
			
 
				+        working-directory: /tmp/digests
			
 
				+        run: |
			
 
				+          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
			
 
				+            $(printf '${{ env.FINAL_IMAGE_REPO }}@sha256:%s ' *)
			
 
				+      - name: Inspect image
			
 
				+        run: |
			
 
				+          docker buildx imagetools inspect ${{ env.FINAL_IMAGE_REPO }}:${{ steps.meta.outputs.version }}          
			
 
				+  build-linux-rocm:
			
 
				+    environment: release
			
 
				+    runs-on: linux
			
 
				+    env:
			
 
				+      FINAL_IMAGE_REPO: ollama/ollama
			
 
				+      ARCH: amd64
			
 
				+      PLATFORM_PAIR: linux-amd64
			
 
				+    steps:
			
 
				+      - uses: actions/checkout@v4
			
 
				+        with:
			
 
				+          submodules: recursive
			
 
				+      - name: Docker meta
			
 
				+        id: meta
			
 
				+        uses: docker/metadata-action@v5
			
 
				+        with:
			
 
				+          images: ${{ env.FINAL_IMAGE_REPO }}
			
 
				+          flavor: |
			
 
				+            latest=false
			
 
				+          tags: |
			
 
				+            type=ref,event=tag
			
 
				+            type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
			
 
				+            type=semver,pattern={{version}}
			
 
				+      - name: Set Version
			
 
				+        shell: bash
			
 
				+        run: |
			
 
				+          echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
			
 
				+      - name: Set up Docker Buildx
			
 
				+        uses: docker/setup-buildx-action@v3
			
 
				+      - name: Login to Docker Hub
			
 
				+        uses: docker/login-action@v3
			
 
				+        with:
			
 
				+          username: ${{ vars.DOCKER_USER }}
			
 
				+          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
			
 
				+      - name: Build and push by digest
			
 
				+        id: build
			
 
				+        uses: docker/build-push-action@v6
			
 
				+        with:
			
 
				+          context: "."
			
 
				+          target: runtime-rocm
			
 
				+          build-args: |
			
 
				+            GOFLAGS
			
 
				+          tags: ${{ env.FINAL_IMAGE_REPO }}:${{ env.DOCKER_METADATA_OUTPUT_VERSION}}-rocm,${{ env.FINAL_IMAGE_REPO }}:rocm
			
 
				+          push: true
			
 
				+
			
 
				   # Aggregate all the assets and ship a release
			
 
				   release:
			
 
				     needs:
			
@@ -448,8 +607,6 @@ jobs:
 
				     permissions:
			
 
				       contents: write
			
 
				     env:
			
 
				-      OLLAMA_SKIP_IMAGE_BUILD: '1'
			
 
				-      PUSH: '1'
			
 
				       GH_TOKEN: ${{ github.token }}
			
 
				     steps:
			
 
				       - uses: actions/checkout@v4
			
@@ -458,12 +615,6 @@ jobs:
 
				         run: |
			
 
				           echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
			
 
				           echo "RELEASE_VERSION=$(echo ${GITHUB_REF_NAME} | cut -f1 -d-)" >> $GITHUB_ENV
			
 
				-      - name: Login to Docker Hub
			
 
				-        uses: docker/login-action@v3
			
 
				-        with:
			
 
				-          username: ${{ vars.DOCKER_USER }}
			
 
				-          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
			
 
				-      - run: ./scripts/build_docker.sh
			
 
				       - name: Retrieve built artifact
			
 
				         uses: actions/download-artifact@v4
			
 
				         with:
			
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -81,12 +81,6 @@ jobs:
 
				         if: ${{ ! startsWith(matrix.os, 'windows-') }}
			
 
				         name: 'Unix Go Generate'
			
 
				       - run: go build .
			
 
				-      - uses: actions/upload-artifact@v4
			
 
				-        with:
			
 
				-          name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
			
 
				-          path: |
			
 
				-            llm/build/**/bin/*
			
 
				-            llm/build/**/*.a
			
 
				   generate-cuda:
			
 
				     needs: [changes]
			
 
				     if: ${{ needs.changes.outputs.GENERATE_CUDA == 'True' }}
			
@@ -114,12 +108,6 @@ jobs:
 
				           go generate -x ./...
			
 
				         env:
			
 
				           OLLAMA_SKIP_CPU_GENERATE: '1'
			
 
				-      - uses: actions/upload-artifact@v4
			
 
				-        with:
			
 
				-          name: cuda-${{ matrix.cuda-version }}-libraries
			
 
				-          path: |
			
 
				-            llm/build/**/bin/*
			
 
				-            dist/windows-amd64/**
			
 
				   generate-rocm:
			
 
				     needs: [changes]
			
 
				     if: ${{ needs.changes.outputs.GENERATE_ROCM == 'True' }}
			
@@ -147,12 +135,6 @@ jobs:
 
				           go generate -x ./...
			
 
				         env:
			
 
				           OLLAMA_SKIP_CPU_GENERATE: '1'
			
 
				-      - uses: actions/upload-artifact@v4
			
 
				-        with:
			
 
				-          name: rocm-${{ matrix.rocm-version }}-libraries
			
 
				-          path: |
			
 
				-            llm/build/**/bin/*
			
 
				-            dist/windows-amd64/**
			
 
				 
			
 
				   # ROCm generation step
			
 
				   generate-windows-rocm:
			
@@ -189,7 +171,6 @@ jobs:
 
				         name: go generate
			
 
				         env:
			
 
				           OLLAMA_SKIP_CPU_GENERATE: '1'
			
 
				-      # TODO - do we need any artifacts?
			
 
				 
			
 
				   # CUDA generation step
			
 
				   generate-windows-cuda:
			
@@ -231,7 +212,6 @@ jobs:
 
				           go generate -x ./...
			
 
				         env:
			
 
				           OLLAMA_SKIP_CPU_GENERATE: '1'
			
 
				-      # TODO - do we need any artifacts?
			
 
				 
			
 
				   lint:
			
 
				     strategy:
			
@@ -263,14 +243,6 @@ jobs:
 
				             arm64) echo ARCH=arm64 ;;
			
 
				           esac >>$GITHUB_ENV
			
 
				         shell: bash
			
 
				-      - run: |
			
 
				-          mkdir -p llm/build/linux/$ARCH/stub/bin
			
 
				-          touch llm/build/linux/$ARCH/stub/bin/ollama_llama_server
			
 
				-        if: ${{ startsWith(matrix.os, 'ubuntu-') }}
			
 
				-      - run: |
			
 
				-          mkdir -p llm/build/darwin/$ARCH/stub/bin
			
 
				-          touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server
			
 
				-        if: ${{ startsWith(matrix.os, 'macos-') }}
			
 
				       - uses: golangci/golangci-lint-action@v6
			
 
				         with:
			
 
				           args: --timeout 8m0s -v
			
@@ -301,23 +273,10 @@ jobs:
 
				           cache: true
			
 
				       - run: |
			
 
				           case ${{ matrix.arch }} in
			
 
				-            amd64) echo ARCH=x86_64 ;;
			
 
				+            amd64) echo ARCH=amd64 ;;
			
 
				             arm64) echo ARCH=arm64 ;;
			
 
				           esac >>$GITHUB_ENV
			
 
				         shell: bash
			
 
				-      - run: |
			
 
				-          mkdir -p llm/build/linux/$ARCH/stub/bin
			
 
				-          touch llm/build/linux/$ARCH/stub/bin/ollama_llama_server
			
 
				-        if: ${{ startsWith(matrix.os, 'ubuntu-') }}
			
 
				-      - run: |
			
 
				-          mkdir -p llm/build/darwin/$ARCH/stub/bin
			
 
				-          touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server
			
 
				-        if: ${{ startsWith(matrix.os, 'macos-') }}
			
 
				-        shell: bash
			
 
				       - run: go generate ./...
			
 
				       - run: go build
			
 
				       - run: go test -v ./...
			
 
				-      - uses: actions/upload-artifact@v4
			
 
				-        with:
			
 
				-          name: ${{ matrix.os }}-binaries
			
 
				-          path: ollama
			
--- a/.gitignore
+++ b/.gitignore
@@ -12,4 +12,7 @@ ggml-metal.metal
 
				 test_data
			
 
				 *.crt
			
 
				 llm/build
			
 
				+build/*/*/*
			
 
				+!build/**/placeholder
			
 
				+llama/build
			
 
				 __debug_bin*
			
--- a/Dockerfile
+++ b/Dockerfile
@@ -47,7 +47,7 @@ RUN --mount=type=cache,target=/root/.ccache \
 
				     OLLAMA_CUSTOM_CUDA_DEFS="-DGGML_CUDA_USE_GRAPHS=on" \
			
 
				     bash gen_linux.sh
			
 
				 
			
 
				-FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-server-arm64
			
 
				+FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_11-devel-rockylinux8 AS cuda-11-build-runner-arm64
			
 
				 ARG CMAKE_VERSION
			
 
				 COPY ./scripts/rh_linux_deps.sh /
			
 
				 RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
			
@@ -63,7 +63,7 @@ RUN OLLAMA_SKIP_STATIC_GENERATE=1 \
 
				     CUDA_VARIANT="_v11" \
			
 
				     bash gen_linux.sh
			
 
				 
			
 
				-FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-server-arm64
			
 
				+FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION_12-devel-rockylinux8 AS cuda-12-build-runner-arm64
			
 
				 ARG CMAKE_VERSION
			
 
				 COPY ./scripts/rh_linux_deps.sh /
			
 
				 RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
			
@@ -143,64 +143,103 @@ RUN --mount=type=cache,target=/root/.ccache \
 
				     OLLAMA_SKIP_STATIC_GENERATE=1 OLLAMA_CPU_TARGET="cpu" bash gen_linux.sh
			
 
				 
			
 
				 
			
 
				-# Intermediate stage used for ./scripts/build_linux.sh
			
 
				+# Intermediate stages used for ./scripts/build_linux.sh
			
 
				 FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
			
 
				 ENV CGO_ENABLED=1
			
 
				 WORKDIR /go/src/github.com/ollama/ollama
			
 
				 COPY . .
			
 
				-COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
			
 
				-COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
			
 
				-COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
			
 
				+COPY --from=static-build-amd64 /go/src/github.com/ollama/ollama/llm/build/ llm/build/
			
 
				+COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/build/ build/
			
 
				+COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/build/ build/
			
 
				 COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
			
 
				-COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
			
 
				+COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/build/ build/
			
 
				 COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
			
 
				-COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
			
 
				+COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/build/ build/
			
 
				 COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/ dist/
			
 
				-COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
			
 
				+COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/build/ build/
			
 
				 ARG GOFLAGS
			
 
				 ARG CGO_CFLAGS
			
 
				 RUN --mount=type=cache,target=/root/.ccache \
			
 
				     go build -trimpath -o dist/linux-amd64/bin/ollama .
			
 
				+RUN cd dist/linux-$GOARCH && \
			
 
				+    tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
			
 
				+RUN cd dist/linux-$GOARCH-rocm && \
			
 
				+    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz
			
 
				 
			
 
				-# Intermediate stage used for ./scripts/build_linux.sh
			
 
				 FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
			
 
				 ENV CGO_ENABLED=1
			
 
				 ARG GOLANG_VERSION
			
 
				 WORKDIR /go/src/github.com/ollama/ollama
			
 
				 COPY . .
			
 
				-COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
			
 
				-COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
			
 
				-COPY --from=cuda-11-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
			
 
				-COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
			
 
				-COPY --from=cuda-12-build-server-arm64 /go/src/github.com/ollama/ollama/llm/build/linux/ llm/build/linux/
			
 
				+COPY --from=static-build-arm64 /go/src/github.com/ollama/ollama/llm/build/ llm/build/
			
 
				+COPY --from=cuda-11-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
			
 
				+COPY --from=cuda-11-build-runner-arm64 /go/src/github.com/ollama/ollama/build/ build/
			
 
				+COPY --from=cuda-12-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
			
 
				+COPY --from=cuda-12-build-runner-arm64 /go/src/github.com/ollama/ollama/build/ build/
			
 
				 ARG GOFLAGS
			
 
				 ARG CGO_CFLAGS
			
 
				 RUN --mount=type=cache,target=/root/.ccache \
			
 
				     go build -trimpath -o dist/linux-arm64/bin/ollama .
			
 
				+RUN cd dist/linux-$GOARCH && \
			
 
				+    tar --exclude runners -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
			
 
				 
			
 
				-# Strip out ROCm dependencies to keep the primary image lean
			
 
				-FROM --platform=linux/amd64 ubuntu:22.04 AS amd64-libs-without-rocm
			
 
				-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /scratch/
			
 
				-RUN cd /scratch/ollama/ && rm -rf rocblas libamd* libdrm* libroc* libhip* libhsa*
			
 
				+FROM --platform=linux/amd64 scratch AS dist-amd64
			
 
				+COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
			
 
				+FROM --platform=linux/arm64 scratch AS dist-arm64
			
 
				+COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
			
 
				+FROM dist-$TARGETARCH as dist
			
 
				+
			
 
				+
			
 
				+# Optimized container images do not cary nested payloads
			
 
				+FROM --platform=linux/amd64 static-build-amd64 AS container-build-amd64
			
 
				+WORKDIR /go/src/github.com/ollama/ollama
			
 
				+COPY . .
			
 
				+ARG GOFLAGS
			
 
				+ARG CGO_CFLAGS
			
 
				+RUN --mount=type=cache,target=/root/.ccache \
			
 
				+    go build -trimpath -o dist/linux-amd64/bin/ollama .
			
 
				+
			
 
				+FROM --platform=linux/arm64 static-build-arm64 AS container-build-arm64
			
 
				+WORKDIR /go/src/github.com/ollama/ollama
			
 
				+COPY . .
			
 
				+ARG GOFLAGS
			
 
				+ARG CGO_CFLAGS
			
 
				+RUN --mount=type=cache,target=/root/.ccache \
			
 
				+    go build -trimpath -o dist/linux-arm64/bin/ollama .
			
 
				 
			
 
				-# Runtime stages
			
 
				 FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64
			
 
				-COPY --from=amd64-libs-without-rocm /scratch/ /lib/
			
 
				-RUN apt-get update && apt-get install -y ca-certificates && \
			
 
				+RUN apt-get update && \
			
 
				+    apt-get install -y ca-certificates && \
			
 
				     apt-get clean && rm -rf /var/lib/apt/lists/*
			
 
				-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
			
 
				+COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
			
 
				+COPY --from=cpu-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
			
 
				+COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
			
 
				+COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
			
 
				+COPY --from=cuda-11-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
			
 
				+COPY --from=cuda-12-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
			
 
				 
			
 
				 FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64
			
 
				-COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
			
 
				-RUN apt-get update && apt-get install -y ca-certificates && \
			
 
				+RUN apt-get update && \
			
 
				+    apt-get install -y ca-certificates && \
			
 
				     apt-get clean && rm -rf /var/lib/apt/lists/*
			
 
				-COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
			
 
				+COPY --from=container-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
			
 
				+COPY --from=cpu-build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
			
 
				+COPY --from=cuda-11-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
			
 
				+COPY --from=cuda-12-build-runner-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
			
 
				 
			
 
				-# Radeon images are much larger so we keep it distinct from the CPU/CUDA image
			
 
				-FROM  rocm/dev-centos-7:${ROCM_VERSION}-complete AS runtime-rocm
			
 
				-RUN update-pciids
			
 
				-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
			
 
				-RUN ln -s /opt/rocm/lib /lib/ollama
			
 
				+# ROCm libraries larger so we keep it distinct from the CPU/CUDA image
			
 
				+FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm
			
 
				+# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer
			
 
				+# across releases
			
 
				+COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/
			
 
				+RUN apt-get update && \
			
 
				+    apt-get install -y ca-certificates && \
			
 
				+    apt-get clean && rm -rf /var/lib/apt/lists/*
			
 
				+COPY --from=container-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
			
 
				+COPY --from=cpu-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
			
 
				+COPY --from=cpu_avx-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
			
 
				+COPY --from=cpu_avx2-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
			
 
				+COPY --from=rocm-build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
			
 
				 EXPOSE 11434
			
 
				 ENV OLLAMA_HOST=0.0.0.0
			
 
				 
			
--- a/build/darwin/amd64/placeholder
+++ b/build/darwin/amd64/placeholder
@@ -0,0 +1 @@
 
				+This is here to make sure the build/ directory exists for the go:embed command
			
--- a/build/darwin/arm64/placeholder
+++ b/build/darwin/arm64/placeholder
@@ -0,0 +1 @@
 
				+This is here to make sure the build/ directory exists for the go:embed command
			
--- a/build/embed_darwin_amd64.go
+++ b/build/embed_darwin_amd64.go
@@ -0,0 +1,8 @@
 
				+package build
			
 
				+
			
 
				+import "embed"
			
 
				+
			
 
				+// Darwin payloads separated by architecture to avoid duplicate payloads when cross compiling
			
 
				+
			
 
				+//go:embed darwin/amd64/*
			
 
				+var EmbedFS embed.FS
			
--- a/build/embed_darwin_arm64.go
+++ b/build/embed_darwin_arm64.go
@@ -0,0 +1,8 @@
 
				+package build
			
 
				+
			
 
				+import "embed"
			
 
				+
			
 
				+// Darwin payloads separated by architecture to avoid duplicate payloads when cross compiling
			
 
				+
			
 
				+//go:embed darwin/arm64/*
			
 
				+var EmbedFS embed.FS
			
--- a/build/embed_linux.go
+++ b/build/embed_linux.go
@@ -0,0 +1,6 @@
 
				+package build
			
 
				+
			
 
				+import "embed"
			
 
				+
			
 
				+//go:embed linux/*
			
 
				+var EmbedFS embed.FS
			
--- a/build/embed_unused.go
+++ b/build/embed_unused.go
@@ -0,0 +1,8 @@
 
				+//go:build !linux && !darwin
			
 
				+
			
 
				+package build
			
 
				+
			
 
				+import "embed"
			
 
				+
			
 
				+// unused on windows
			
 
				+var EmbedFS embed.FS
			
--- a/build/linux/amd64/placeholder
+++ b/build/linux/amd64/placeholder
@@ -0,0 +1 @@
 
				+This is here to make sure the build/ directory exists for the go:embed command
			
--- a/build/linux/arm64/placeholder
+++ b/build/linux/arm64/placeholder
@@ -0,0 +1 @@
 
				+This is here to make sure the build/ directory exists for the go:embed command
			
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -179,53 +179,6 @@ var (
 
				 	HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
			
 
				 )
			
 
				 
			
 
				-func RunnersDir() (p string) {
			
 
				-	if p := Var("OLLAMA_RUNNERS_DIR"); p != "" {
			
 
				-		return p
			
 
				-	}
			
 
				-
			
 
				-	if runtime.GOOS != "windows" {
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	defer func() {
			
 
				-		if p == "" {
			
 
				-			slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama/runners'")
			
 
				-		}
			
 
				-	}()
			
 
				-
			
 
				-	// On Windows we do not carry the payloads inside the main executable
			
 
				-	exe, err := os.Executable()
			
 
				-	if err != nil {
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	cwd, err := os.Getwd()
			
 
				-	if err != nil {
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	var paths []string
			
 
				-	for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), LibRelativeToExe()), cwd} {
			
 
				-		paths = append(paths,
			
 
				-			root,
			
 
				-			filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
			
 
				-			filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH),
			
 
				-		)
			
 
				-	}
			
 
				-
			
 
				-	// Try a few variations to improve developer experience when building from source in the local tree
			
 
				-	for _, path := range paths {
			
 
				-		candidate := filepath.Join(path, "lib", "ollama", "runners")
			
 
				-		if _, err := os.Stat(candidate); err == nil {
			
 
				-			p = candidate
			
 
				-			break
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return p
			
 
				-}
			
 
				-
			
 
				 func Uint(key string, defaultValue uint) func() uint {
			
 
				 	return func() uint {
			
 
				 		if s := Var(key); s != "" {
			
@@ -290,7 +243,6 @@ func AsMap() map[string]EnvVar {
 
				 		"OLLAMA_NOPRUNE":           {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
			
 
				 		"OLLAMA_NUM_PARALLEL":      {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
			
 
				 		"OLLAMA_ORIGINS":           {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
			
 
				-		"OLLAMA_RUNNERS_DIR":       {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"},
			
 
				 		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
			
 
				 		"OLLAMA_TMPDIR":            {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"},
			
 
				 
			
--- a/gpu/assets.go
+++ b/gpu/assets.go
@@ -1,148 +0,0 @@
 
				-package gpu
			
 
				-
			
 
				-import (
			
 
				-	"errors"
			
 
				-	"fmt"
			
 
				-	"log/slog"
			
 
				-	"os"
			
 
				-	"path/filepath"
			
 
				-	"runtime"
			
 
				-	"strconv"
			
 
				-	"strings"
			
 
				-	"sync"
			
 
				-	"syscall"
			
 
				-	"time"
			
 
				-
			
 
				-	"github.com/ollama/ollama/envconfig"
			
 
				-)
			
 
				-
			
 
				-var (
			
 
				-	lock        sync.Mutex
			
 
				-	payloadsDir = ""
			
 
				-)
			
 
				-
			
 
				-func PayloadsDir() (string, error) {
			
 
				-	lock.Lock()
			
 
				-	defer lock.Unlock()
			
 
				-	var err error
			
 
				-	if payloadsDir == "" {
			
 
				-		runnersDir := envconfig.RunnersDir()
			
 
				-
			
 
				-		if runnersDir != "" {
			
 
				-			payloadsDir = runnersDir
			
 
				-			return payloadsDir, nil
			
 
				-		}
			
 
				-
			
 
				-		// The remainder only applies on non-windows where we still carry payloads in the main executable
			
 
				-		cleanupTmpDirs()
			
 
				-		tmpDir := envconfig.TmpDir()
			
 
				-		if tmpDir == "" {
			
 
				-			tmpDir, err = os.MkdirTemp("", "ollama")
			
 
				-			if err != nil {
			
 
				-				return "", fmt.Errorf("failed to generate tmp dir: %w", err)
			
 
				-			}
			
 
				-		} else {
			
 
				-			err = os.MkdirAll(tmpDir, 0o755)
			
 
				-			if err != nil {
			
 
				-				return "", fmt.Errorf("failed to generate tmp dir %s: %w", tmpDir, err)
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		// Track our pid so we can clean up orphaned tmpdirs
			
 
				-		n := filepath.Join(tmpDir, "ollama.pid")
			
 
				-		if err := os.WriteFile(n, []byte(strconv.Itoa(os.Getpid())), 0o644); err != nil {
			
 
				-			return "", fmt.Errorf("failed to write pid file %s: %w", n, err)
			
 
				-		}
			
 
				-
			
 
				-		// We create a distinct subdirectory for payloads within the tmpdir
			
 
				-		// This will typically look like /tmp/ollama3208993108/runners on linux
			
 
				-		payloadsDir = filepath.Join(tmpDir, "runners")
			
 
				-	}
			
 
				-	return payloadsDir, nil
			
 
				-}
			
 
				-
			
 
				-// Best effort to clean up prior tmpdirs
			
 
				-func cleanupTmpDirs() {
			
 
				-	matches, err := filepath.Glob(filepath.Join(os.TempDir(), "ollama*", "ollama.pid"))
			
 
				-	if err != nil {
			
 
				-		return
			
 
				-	}
			
 
				-
			
 
				-	for _, match := range matches {
			
 
				-		raw, err := os.ReadFile(match)
			
 
				-		if errors.Is(err, os.ErrNotExist) {
			
 
				-			slog.Debug("not a ollama runtime directory, skipping", "path", match)
			
 
				-			continue
			
 
				-		} else if err != nil {
			
 
				-			slog.Warn("could not read ollama.pid, skipping", "path", match, "error", err)
			
 
				-			continue
			
 
				-		}
			
 
				-
			
 
				-		pid, err := strconv.Atoi(string(raw))
			
 
				-		if err != nil {
			
 
				-			slog.Warn("invalid pid, skipping", "path", match, "error", err)
			
 
				-			continue
			
 
				-		}
			
 
				-
			
 
				-		p, err := os.FindProcess(pid)
			
 
				-		if err == nil && !errors.Is(p.Signal(syscall.Signal(0)), os.ErrProcessDone) {
			
 
				-			slog.Warn("process still running, skipping", "pid", pid, "path", match)
			
 
				-			continue
			
 
				-		}
			
 
				-
			
 
				-		if err := os.Remove(match); err != nil {
			
 
				-			slog.Warn("could not cleanup stale pidfile", "path", match, "error", err)
			
 
				-		}
			
 
				-
			
 
				-		runners := filepath.Join(filepath.Dir(match), "runners")
			
 
				-		if err := os.RemoveAll(runners); err != nil {
			
 
				-			slog.Warn("could not cleanup stale runners", "path", runners, "error", err)
			
 
				-		}
			
 
				-
			
 
				-		if err := os.Remove(filepath.Dir(match)); err != nil {
			
 
				-			slog.Warn("could not cleanup stale tmpdir", "path", filepath.Dir(match), "error", err)
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-func Cleanup() {
			
 
				-	lock.Lock()
			
 
				-	defer lock.Unlock()
			
 
				-	runnersDir := envconfig.RunnersDir()
			
 
				-	if payloadsDir != "" && runnersDir == "" && runtime.GOOS != "windows" {
			
 
				-		// We want to fully clean up the tmpdir parent of the payloads dir
			
 
				-		tmpDir := filepath.Clean(filepath.Join(payloadsDir, ".."))
			
 
				-		slog.Debug("cleaning up", "dir", tmpDir)
			
 
				-		err := os.RemoveAll(tmpDir)
			
 
				-		if err != nil {
			
 
				-			// On windows, if we remove too quickly the llama.dll may still be in-use and fail to remove
			
 
				-			time.Sleep(1000 * time.Millisecond)
			
 
				-			err = os.RemoveAll(tmpDir)
			
 
				-			if err != nil {
			
 
				-				slog.Warn("failed to clean up", "dir", tmpDir, "err", err)
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-func UpdatePath(dir string) {
			
 
				-	if runtime.GOOS == "windows" {
			
 
				-		tmpDir := filepath.Dir(dir)
			
 
				-		pathComponents := strings.Split(os.Getenv("PATH"), ";")
			
 
				-		i := 0
			
 
				-		for _, comp := range pathComponents {
			
 
				-			if strings.EqualFold(comp, dir) {
			
 
				-				return
			
 
				-			}
			
 
				-			// Remove any other prior paths to our temp dir
			
 
				-			if !strings.HasPrefix(strings.ToLower(comp), strings.ToLower(tmpDir)) {
			
 
				-				pathComponents[i] = comp
			
 
				-				i++
			
 
				-			}
			
 
				-		}
			
 
				-		newPath := strings.Join(append([]string{dir}, pathComponents...), ";")
			
 
				-		slog.Info("updating", "PATH", newPath)
			
 
				-		os.Setenv("PATH", newPath)
			
 
				-	}
			
 
				-	// linux and darwin rely on rpath
			
 
				-}
			
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -93,10 +93,9 @@ func initCudaHandles() *cudaHandles {
 
				 		localAppData := os.Getenv("LOCALAPPDATA")
			
 
				 		cudartMgmtPatterns = []string{filepath.Join(localAppData, "Programs", "Ollama", CudartMgmtName)}
			
 
				 	}
			
 
				-	tmpDir, _ := PayloadsDir()
			
 
				-	if tmpDir != "" {
			
 
				-		// TODO - add "payloads" for subprocess
			
 
				-		cudartMgmtPatterns = []string{filepath.Join(tmpDir, "cuda*", CudartMgmtName)}
			
 
				+	libDir := LibraryDir()
			
 
				+	if libDir != "" {
			
 
				+		cudartMgmtPatterns = []string{filepath.Join(libDir, CudartMgmtName)}
			
 
				 	}
			
 
				 	cudartMgmtPatterns = append(cudartMgmtPatterns, CudartGlobs...)
			
 
				 
			
--- a/llm/generate/gen_common.sh
+++ b/llm/generate/gen_common.sh
@@ -31,6 +31,7 @@ init_vars() {
 
				         NO_WHOLE_ARCHIVE=""
			
 
				         GCC_ARCH="-arch ${ARCH}"
			
 
				         DIST_BASE=../../dist/darwin-${GOARCH}/
			
 
				+        PAYLOAD_BASE=../../build/darwin/${GOARCH}
			
 
				         ;;
			
 
				     "Linux")
			
 
				         LIB_EXT="so"
			
@@ -40,6 +41,7 @@ init_vars() {
 
				         # Cross compiling not supported on linux - Use docker
			
 
				         GCC_ARCH=""
			
 
				         DIST_BASE=../../dist/linux-${GOARCH}/
			
 
				+        PAYLOAD_BASE=../../build/linux/${GOARCH}
			
 
				         ;;
			
 
				     *)
			
 
				         ;;
			
@@ -47,7 +49,8 @@ init_vars() {
 
				     if [ -z "${CMAKE_CUDA_ARCHITECTURES}" ] ; then
			
 
				         CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80"
			
 
				     fi
			
 
				-    GZIP=$(which pigz 2>/dev/null || echo "gzip")
			
 
				+    GZIP=$(command -v pigz 2>/dev/null || echo "gzip")
			
 
				+    RUNNER_BASE="${DIST_BASE}/lib/ollama/runners"
			
 
				 }
			
 
				 
			
 
				 git_module_setup() {
			
@@ -91,17 +94,34 @@ build() {
 
				     rm -f ${BUILD_DIR}/bin/ggml-common.h ${BUILD_DIR}/bin/ggml-metal.metal
			
 
				 }
			
 
				 
			
 
				+dist() {
			
 
				+    [ -z "${RUNNER}" ] && exit 1
			
 
				+    mkdir -p ${RUNNER_BASE}/${RUNNER}/
			
 
				+    for f in ${BUILD_DIR}/bin/* ; do
			
 
				+        cp ${f} ${RUNNER_BASE}/${RUNNER}/
			
 
				+    done
			
 
				+    # check for lib directory
			
 
				+    if [ -d ${BUILD_DIR}/lib ]; then
			
 
				+        for f in ${BUILD_DIR}/lib/* ; do
			
 
				+            cp ${f} ${RUNNER_BASE}/${RUNNER}/
			
 
				+        done
			
 
				+    fi
			
 
				+}
			
 
				+
			
 
				+# Compress from the build $BUILD_DIR into the $PAYLOAD_BASE/$RUNNER dir
			
 
				 compress() {
			
 
				-    echo "Compressing payloads to reduce overall binary size..."
			
 
				-    rm -rf ${BUILD_DIR}/bin/*.gz
			
 
				+    [ -z "${RUNNER}" ] && exit 1
			
 
				+    echo "Compressing payloads with ${GZIP} to reduce overall binary size..."
			
 
				+    rm -rf "${PAYLOAD_BASE}/${RUNNER}/"
			
 
				+    mkdir -p "${PAYLOAD_BASE}/${RUNNER}/"
			
 
				     for f in ${BUILD_DIR}/bin/* ; do
			
 
				-        ${GZIP} -n --best -f ${f} &
			
 
				+        ${GZIP} -c --best ${f} > "${PAYLOAD_BASE}/${RUNNER}/$(basename ${f}).gz" &
			
 
				         compress_pids+=" $!"
			
 
				     done
			
 
				     # check for lib directory
			
 
				     if [ -d ${BUILD_DIR}/lib ]; then
			
 
				         for f in ${BUILD_DIR}/lib/* ; do
			
 
				-            ${GZIP} -n --best -f ${f} &
			
 
				+            ${GZIP} -c --best ${f} > "${PAYLOAD_BASE}/${RUNNER}/$(basename ${f}).gz" &
			
 
				             compress_pids+=" $!"
			
 
				         done
			
 
				     fi
			
--- a/llm/generate/gen_darwin.sh
+++ b/llm/generate/gen_darwin.sh
@@ -39,7 +39,8 @@ case "${GOARCH}" in
 
				         #
			
 
				         init_vars
			
 
				         CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_ACCELERATE=off -DGGML_BLAS=off -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
			
 
				-        BUILD_DIR="../build/darwin/${ARCH}/cpu"
			
 
				+        RUNNER=cpu
			
 
				+        BUILD_DIR="../build/darwin/${ARCH}/${RUNNER}"
			
 
				         echo "Building LCD CPU"
			
 
				         build
			
 
				         sign ${BUILD_DIR}/bin/ollama_llama_server
			
@@ -51,7 +52,8 @@ case "${GOARCH}" in
 
				         #
			
 
				         init_vars
			
 
				         CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_ACCELERATE=off -DGGML_BLAS=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
			
 
				-        BUILD_DIR="../build/darwin/${ARCH}/cpu_avx"
			
 
				+        RUNNER=cpu_avx
			
 
				+        BUILD_DIR="../build/darwin/${ARCH}/${RUNNER}"
			
 
				         echo "Building AVX CPU"
			
 
				         build
			
 
				         sign ${BUILD_DIR}/bin/ollama_llama_server
			
@@ -63,7 +65,8 @@ case "${GOARCH}" in
 
				         #
			
 
				         init_vars
			
 
				         CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_ACCELERATE=on -DGGML_BLAS=off -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on ${CMAKE_DEFS}"
			
 
				-        BUILD_DIR="../build/darwin/${ARCH}/cpu_avx2"
			
 
				+        RUNNER=cpu_avx2
			
 
				+        BUILD_DIR="../build/darwin/${ARCH}/${RUNNER}"
			
 
				         echo "Building AVX2 CPU"
			
 
				         EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation"
			
 
				         build
			
@@ -84,7 +87,8 @@ case "${GOARCH}" in
 
				     if [ -z "$OLLAMA_SKIP_METAL_GENERATE" ]; then
			
 
				         init_vars
			
 
				         CMAKE_DEFS="${COMMON_DARWIN_DEFS} -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} ${CMAKE_DEFS}"
			
 
				-        BUILD_DIR="../build/darwin/${ARCH}/metal"
			
 
				+        RUNNER="metal"
			
 
				+        BUILD_DIR="../build/darwin/${ARCH}/${RUNNER}"
			
 
				         EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders"
			
 
				         build
			
 
				         sign ${BUILD_DIR}/bin/ollama_llama_server
			
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -79,10 +79,12 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
 
				         init_vars
			
 
				         echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
			
 
				         CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
			
 
				-        BUILD_DIR="../build/linux/${ARCH}/cpu"
			
 
				+        RUNNER="cpu"
			
 
				+        BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
			
 
				         echo "Building custom CPU"
			
 
				         build
			
 
				         install
			
 
				+        dist
			
 
				         compress
			
 
				     else
			
 
				         # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
			
@@ -102,10 +104,12 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
 
				             #
			
 
				             init_vars
			
 
				             CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
			
 
				-            BUILD_DIR="../build/linux/${ARCH}/cpu"
			
 
				+            RUNNER=cpu
			
 
				+            BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
			
 
				             echo "Building LCD CPU"
			
 
				             build
			
 
				             install
			
 
				+            dist
			
 
				             compress
			
 
				         fi
			
 
				 
			
@@ -120,10 +124,12 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
 
				                 #
			
 
				                 init_vars
			
 
				                 CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
			
 
				-                BUILD_DIR="../build/linux/${ARCH}/cpu_avx"
			
 
				+                RUNNER=cpu_avx
			
 
				+                BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
			
 
				                 echo "Building AVX CPU"
			
 
				                 build
			
 
				                 install
			
 
				+                dist
			
 
				                 compress
			
 
				             fi
			
 
				 
			
@@ -134,10 +140,12 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
 
				                 #
			
 
				                 init_vars
			
 
				                 CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on ${CMAKE_DEFS}"
			
 
				-                BUILD_DIR="../build/linux/${ARCH}/cpu_avx2"
			
 
				+                RUNNER=cpu_avx2
			
 
				+                BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
			
 
				                 echo "Building AVX2 CPU"
			
 
				                 build
			
 
				                 install
			
 
				+                dist
			
 
				                 compress
			
 
				             fi
			
 
				         fi
			
@@ -187,11 +195,13 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
 
				     fi
			
 
				     export CUDAFLAGS="-t8"
			
 
				     CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off"
			
 
				-    BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}"
			
 
				+    RUNNER=cuda${CUDA_VARIANT}
			
 
				+    BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
			
 
				     export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
			
 
				     CUDA_DIST_DIR="${CUDA_DIST_DIR:-${DIST_BASE}/lib/ollama}"
			
 
				     build
			
 
				     install
			
 
				+    dist
			
 
				     echo "Installing CUDA dependencies in ${CUDA_DIST_DIR}"
			
 
				     mkdir -p "${CUDA_DIST_DIR}"
			
 
				     for lib in ${CUDA_LIB_DIR}/libcudart.so* ${CUDA_LIB_DIR}/libcublas.so* ${CUDA_LIB_DIR}/libcublasLt.so* ; do
			
@@ -212,7 +222,8 @@ if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
 
				     source ${ONEAPI_ROOT}/setvars.sh --force # set up environment variables for oneAPI
			
 
				     CC=icx
			
 
				     CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF"
			
 
				-    BUILD_DIR="../build/linux/${ARCH}/oneapi"
			
 
				+    RUNNER=oneapi
			
 
				+    BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
			
 
				     ONEAPI_DIST_DIR="${DIST_BASE}/lib/ollama"
			
 
				     export LLAMA_SERVER_LDFLAGS="-fsycl -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
			
 
				     DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it
			
@@ -231,6 +242,7 @@ if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
 
				     cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${ONEAPI_DIST_DIR}"
			
 
				     cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${ONEAPI_DIST_DIR}"
			
 
				     install
			
 
				+    dist
			
 
				     compress
			
 
				 fi
			
 
				 
			
@@ -259,7 +271,8 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
 
				         CMAKE_DEFS="${CMAKE_DEFS} ${OLLAMA_CUSTOM_ROCM_DEFS}"
			
 
				         echo "Building custom ROCM GPU"
			
 
				     fi
			
 
				-    BUILD_DIR="../build/linux/${ARCH}/rocm${ROCM_VARIANT}"
			
 
				+    RUNNER=rocm${ROCM_VARIANT}
			
 
				+    BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
			
 
				     # ROCm dependencies are too large to fit into a unified bundle
			
 
				     ROCM_DIST_DIR="${DIST_BASE}/../linux-${GOARCH}-rocm/lib/ollama"
			
 
				     # TODO figure out how to disable runpath (rpath)
			
@@ -269,13 +282,17 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
 
				 
			
 
				     # copy the ROCM dependencies
			
 
				     mkdir -p "${ROCM_DIST_DIR}"
			
 
				-    for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -v "${ARCH}/rocm${ROCM_VARIANT}" | grep -e rocm -e amdgpu -e libtinfo ); do
			
 
				+    for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -v "${ARCH}/rocm${ROCM_VARIANT}" | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf ); do
			
 
				         cp -a "${dep}"* "${ROCM_DIST_DIR}"
			
 
				+        if [ $(readlink -f "${dep}") != "${dep}" ] ; then
			
 
				+            cp $(readlink -f "${dep}") "${ROCM_DIST_DIR}"
			
 
				+        fi
			
 
				     done
			
 
				     install
			
 
				+    dist
			
 
				     compress
			
 
				 fi
			
 
				 
			
 
				 cleanup
			
 
				 wait_for_compress
			
 
				-echo "go generate completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
			
 
				+echo "go generate completed.  LLM runners: $(cd ${PAYLOAD_BASE}; echo *)"
			
--- a/llm/llm_darwin_arm64.go
+++ b/llm/llm_darwin_arm64.go
@@ -1,11 +1,7 @@
 
				 package llm
			
 
				 
			
 
				 import (
			
 
				-	"embed"
			
 
				 	"syscall"
			
 
				 )
			
 
				 
			
 
				-//go:embed build/darwin/arm64/*/bin/*
			
 
				-var libEmbed embed.FS
			
 
				-
			
 
				 var LlamaServerSysProcAttr = &syscall.SysProcAttr{}
			
--- a/llm/llm_darwin_amd64.go
+++ b/llm/llm_darwin_amd64.go
@@ -1,11 +0,0 @@
 
				-package llm
			
 
				-
			
 
				-import (
			
 
				-	"embed"
			
 
				-	"syscall"
			
 
				-)
			
 
				-
			
 
				-//go:embed build/darwin/x86_64/*/bin/*
			
 
				-var libEmbed embed.FS
			
 
				-
			
 
				-var LlamaServerSysProcAttr = &syscall.SysProcAttr{}
			
--- a/llm/llm_linux.go
+++ b/llm/llm_linux.go
@@ -1,11 +1,7 @@
 
				 package llm
			
 
				 
			
 
				 import (
			
 
				-	"embed"
			
 
				 	"syscall"
			
 
				 )
			
 
				 
			
 
				-//go:embed build/linux/*/*/bin/*
			
 
				-var libEmbed embed.FS
			
 
				-
			
 
				 var LlamaServerSysProcAttr = &syscall.SysProcAttr{}
			
--- a/llm/llm_windows.go
+++ b/llm/llm_windows.go
@@ -1,13 +1,9 @@
 
				 package llm
			
 
				 
			
 
				 import (
			
 
				-	"embed"
			
 
				 	"syscall"
			
 
				 )
			
 
				 
			
 
				-// unused on windows
			
 
				-var libEmbed embed.FS
			
 
				-
			
 
				 const CREATE_DEFAULT_ERROR_MODE = 0x04000000
			
 
				 
			
 
				 var LlamaServerSysProcAttr = &syscall.SysProcAttr{
			
--- a/llm/payload.go
+++ b/llm/payload.go
@@ -1,233 +0,0 @@
 
				-package llm
			
 
				-
			
 
				-import (
			
 
				-	"compress/gzip"
			
 
				-	"errors"
			
 
				-	"fmt"
			
 
				-	"io"
			
 
				-	"io/fs"
			
 
				-	"log/slog"
			
 
				-	"os"
			
 
				-	"path/filepath"
			
 
				-	"runtime"
			
 
				-	"slices"
			
 
				-	"strings"
			
 
				-
			
 
				-	"golang.org/x/sync/errgroup"
			
 
				-
			
 
				-	"github.com/ollama/ollama/gpu"
			
 
				-)
			
 
				-
			
 
				-var errPayloadMissing = errors.New("expected payloads not included in this build of ollama")
			
 
				-
			
 
				-func Init() error {
			
 
				-	payloadsDir, err := gpu.PayloadsDir()
			
 
				-	if err != nil {
			
 
				-		return err
			
 
				-	}
			
 
				-
			
 
				-	if runtime.GOOS != "windows" {
			
 
				-		slog.Info("extracting embedded files", "dir", payloadsDir)
			
 
				-		binGlob := "build/*/*/*/bin/*"
			
 
				-
			
 
				-		// extract server libraries
			
 
				-		err = extractFiles(payloadsDir, binGlob)
			
 
				-		if err != nil {
			
 
				-			return fmt.Errorf("extract binaries: %v", err)
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	var variants []string
			
 
				-	for v := range getAvailableServers() {
			
 
				-		variants = append(variants, v)
			
 
				-	}
			
 
				-	slog.Info(fmt.Sprintf("Dynamic LLM libraries %v", variants))
			
 
				-	slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
			
 
				-
			
 
				-	return nil
			
 
				-}
			
 
				-
			
 
				-// binary names may contain an optional variant separated by '_'
			
 
				-// For example, "ollama_rocm_v6" and "ollama_rocm_v5" or "ollama_cpu" and "ollama_cpu_avx2"
			
 
				-// Any library without a variant is the lowest common denominator
			
 
				-func getAvailableServers() map[string]string {
			
 
				-	payloadsDir, err := gpu.PayloadsDir()
			
 
				-	if err != nil {
			
 
				-		slog.Error("payload lookup error", "error", err)
			
 
				-		return nil
			
 
				-	}
			
 
				-
			
 
				-	// glob payloadsDir for files that start with ollama_
			
 
				-	pattern := filepath.Join(payloadsDir, "*", "ollama_*")
			
 
				-
			
 
				-	files, err := filepath.Glob(pattern)
			
 
				-	if err != nil {
			
 
				-		slog.Debug("could not glob", "pattern", pattern, "error", err)
			
 
				-		return nil
			
 
				-	}
			
 
				-
			
 
				-	servers := make(map[string]string)
			
 
				-	for _, file := range files {
			
 
				-		slog.Debug("availableServers : found", "file", file)
			
 
				-		servers[filepath.Base(filepath.Dir(file))] = filepath.Dir(file)
			
 
				-	}
			
 
				-
			
 
				-	return servers
			
 
				-}
			
 
				-
			
 
				-// serversForGpu returns a list of compatible servers give the provided GPU
			
 
				-// info, ordered by performance. assumes Init() has been called
			
 
				-// TODO - switch to metadata based mapping
			
 
				-func serversForGpu(info gpu.GpuInfo) []string {
			
 
				-	// glob workDir for files that start with ollama_
			
 
				-	availableServers := getAvailableServers()
			
 
				-	requested := info.Library
			
 
				-	if info.Variant != gpu.CPUCapabilityNone.String() {
			
 
				-		requested += "_" + info.Variant
			
 
				-	}
			
 
				-
			
 
				-	servers := []string{}
			
 
				-
			
 
				-	// exact match first
			
 
				-	for a := range availableServers {
			
 
				-		if a == requested {
			
 
				-			servers = []string{a}
			
 
				-
			
 
				-			if a == "metal" {
			
 
				-				return servers
			
 
				-			}
			
 
				-
			
 
				-			break
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	alt := []string{}
			
 
				-
			
 
				-	// Then for GPUs load alternates and sort the list for consistent load ordering
			
 
				-	if info.Library != "cpu" {
			
 
				-		for a := range availableServers {
			
 
				-			if info.Library == strings.Split(a, "_")[0] && a != requested {
			
 
				-				alt = append(alt, a)
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		slices.Sort(alt)
			
 
				-		servers = append(servers, alt...)
			
 
				-	}
			
 
				-
			
 
				-	if !(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64") {
			
 
				-		// Load up the best CPU variant if not primary requested
			
 
				-		if info.Library != "cpu" {
			
 
				-			variant := gpu.GetCPUCapability()
			
 
				-			// If no variant, then we fall back to default
			
 
				-			// If we have a variant, try that if we find an exact match
			
 
				-			// Attempting to run the wrong CPU instructions will panic the
			
 
				-			// process
			
 
				-			if variant != gpu.CPUCapabilityNone {
			
 
				-				for cmp := range availableServers {
			
 
				-					if cmp == "cpu_"+variant.String() {
			
 
				-						servers = append(servers, cmp)
			
 
				-						break
			
 
				-					}
			
 
				-				}
			
 
				-			} else {
			
 
				-				servers = append(servers, "cpu")
			
 
				-			}
			
 
				-		}
			
 
				-
			
 
				-		if len(servers) == 0 {
			
 
				-			servers = []string{"cpu"}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	return servers
			
 
				-}
			
 
				-
			
 
				-// Return the optimal server for this CPU architecture
			
 
				-func serverForCpu() string {
			
 
				-	if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
			
 
				-		return "metal"
			
 
				-	}
			
 
				-	variant := gpu.GetCPUCapability()
			
 
				-	availableServers := getAvailableServers()
			
 
				-	if variant != gpu.CPUCapabilityNone {
			
 
				-		for cmp := range availableServers {
			
 
				-			if cmp == "cpu_"+variant.String() {
			
 
				-				return cmp
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-	return "cpu"
			
 
				-}
			
 
				-
			
 
				-// extract extracts the embedded files to the target directory
			
 
				-func extractFiles(targetDir string, glob string) error {
			
 
				-	files, err := fs.Glob(libEmbed, glob)
			
 
				-	if err != nil || len(files) == 0 {
			
 
				-		return errPayloadMissing
			
 
				-	}
			
 
				-
			
 
				-	if err := os.MkdirAll(targetDir, 0o755); err != nil {
			
 
				-		return fmt.Errorf("extractFiles could not mkdir %s: %v", targetDir, err)
			
 
				-	}
			
 
				-
			
 
				-	g := new(errgroup.Group)
			
 
				-
			
 
				-	// build/$OS/$GOARCH/$VARIANT/{bin,lib}/$FILE
			
 
				-	for _, file := range files {
			
 
				-		filename := file
			
 
				-
			
 
				-		variant := filepath.Base(filepath.Dir(filepath.Dir(filename)))
			
 
				-
			
 
				-		slog.Debug("extracting", "variant", variant, "file", filename)
			
 
				-
			
 
				-		g.Go(func() error {
			
 
				-			srcf, err := libEmbed.Open(filename)
			
 
				-			if err != nil {
			
 
				-				return err
			
 
				-			}
			
 
				-			defer srcf.Close()
			
 
				-
			
 
				-			src := io.Reader(srcf)
			
 
				-			if strings.HasSuffix(filename, ".gz") {
			
 
				-				src, err = gzip.NewReader(src)
			
 
				-				if err != nil {
			
 
				-					return fmt.Errorf("decompress payload %s: %v", filename, err)
			
 
				-				}
			
 
				-				filename = strings.TrimSuffix(filename, ".gz")
			
 
				-			}
			
 
				-
			
 
				-			variantDir := filepath.Join(targetDir, variant)
			
 
				-			if err := os.MkdirAll(variantDir, 0o755); err != nil {
			
 
				-				return fmt.Errorf("extractFiles could not mkdir %s: %v", variantDir, err)
			
 
				-			}
			
 
				-
			
 
				-			base := filepath.Base(filename)
			
 
				-			destFilename := filepath.Join(variantDir, base)
			
 
				-
			
 
				-			_, err = os.Stat(destFilename)
			
 
				-			switch {
			
 
				-			case errors.Is(err, os.ErrNotExist):
			
 
				-				destFile, err := os.OpenFile(destFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
			
 
				-				if err != nil {
			
 
				-					return fmt.Errorf("write payload %s: %v", filename, err)
			
 
				-				}
			
 
				-				defer destFile.Close()
			
 
				-				if _, err := io.Copy(destFile, src); err != nil {
			
 
				-					return fmt.Errorf("copy payload %s: %v", filename, err)
			
 
				-				}
			
 
				-			case err != nil:
			
 
				-				return fmt.Errorf("stat payload %s: %v", filename, err)
			
 
				-			}
			
 
				-			return nil
			
 
				-		})
			
 
				-	}
			
 
				-
			
 
				-	err = g.Wait()
			
 
				-	if err != nil {
			
 
				-		// If we fail to extract, the payload dir is unusable, so cleanup whatever we extracted
			
 
				-		gpu.Cleanup()
			
 
				-		return err
			
 
				-	}
			
 
				-	return nil
			
 
				-}
			
--- a/llm/server.go
+++ b/llm/server.go
@@ -24,9 +24,11 @@ import (
 
				 	"golang.org/x/sync/semaphore"
			
 
				 
			
 
				 	"github.com/ollama/ollama/api"
			
 
				+	"github.com/ollama/ollama/build"
			
 
				 	"github.com/ollama/ollama/envconfig"
			
 
				 	"github.com/ollama/ollama/format"
			
 
				 	"github.com/ollama/ollama/gpu"
			
 
				+	"github.com/ollama/ollama/runners"
			
 
				 )
			
 
				 
			
 
				 type LlamaServer interface {
			
@@ -106,7 +108,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 
				 		gpus = gpu.GetCPUInfo()
			
 
				 	}
			
 
				 	if len(gpus) == 1 && gpus[0].Library == "cpu" {
			
 
				-		cpuRunner = serverForCpu()
			
 
				+		cpuRunner = runners.ServerForCpu()
			
 
				 		estimate = EstimateGPULayers(gpus, ggml, projectors, opts)
			
 
				 	} else {
			
 
				 		estimate = EstimateGPULayers(gpus, ggml, projectors, opts)
			
@@ -118,7 +120,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 
				 			opts.NumGPU = 0
			
 
				 		case gpus[0].Library != "metal" && estimate.Layers == 0:
			
 
				 			// Don't bother loading into the GPU if no layers can fit
			
 
				-			cpuRunner = serverForCpu()
			
 
				+			cpuRunner = runners.ServerForCpu()
			
 
				 			gpus = gpu.GetCPUInfo()
			
 
				 		case opts.NumGPU < 0 && estimate.Layers > 0 && gpus[0].Library != "cpu":
			
 
				 			opts.NumGPU = estimate.Layers
			
@@ -145,25 +147,20 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 
				 		return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
			
 
				 	}
			
 
				 
			
 
				-	availableServers := getAvailableServers()
			
 
				+	rDir, err := runners.Refresh(build.EmbedFS)
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+
			
 
				+	availableServers := runners.GetAvailableServers(rDir)
			
 
				 	if len(availableServers) == 0 {
			
 
				-		if runtime.GOOS != "windows" {
			
 
				-			slog.Warn("llama server binary disappeared, reinitializing payloads")
			
 
				-			err = Init()
			
 
				-			if err != nil {
			
 
				-				slog.Warn("failed to reinitialize payloads", "error", err)
			
 
				-				return nil, err
			
 
				-			}
			
 
				-			availableServers = getAvailableServers()
			
 
				-		} else {
			
 
				-			return nil, finalErr
			
 
				-		}
			
 
				+		return nil, finalErr
			
 
				 	}
			
 
				 	var servers []string
			
 
				 	if cpuRunner != "" {
			
 
				 		servers = []string{cpuRunner}
			
 
				 	} else {
			
 
				-		servers = serversForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant
			
 
				+		servers = runners.ServersForGpu(gpus[0]) // All GPUs in the list are matching Library and Variant
			
 
				 	}
			
 
				 	demandLib := envconfig.LLMLibrary()
			
 
				 	if demandLib != "" {
			
@@ -330,7 +327,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 
				 		_, err := os.Stat(server)
			
 
				 		if errors.Is(err, os.ErrNotExist) {
			
 
				 			slog.Warn("llama server disappeared, reinitializing payloads", "path", server, "error", err)
			
 
				-			err = Init()
			
 
				+			_, err = runners.Refresh(build.EmbedFS)
			
 
				 			if err != nil {
			
 
				 				slog.Warn("failed to reinitialize payloads", "error", err)
			
 
				 				return nil, err
			
--- a/runners/common.go
+++ b/runners/common.go
@@ -0,0 +1,384 @@
 
				+package runners
			
 
				+
			
 
				+import (
			
 
				+	"compress/gzip"
			
 
				+	"errors"
			
 
				+	"fmt"
			
 
				+	"io"
			
 
				+	"io/fs"
			
 
				+	"log/slog"
			
 
				+	"os"
			
 
				+	"path/filepath"
			
 
				+	"runtime"
			
 
				+	"slices"
			
 
				+	"strconv"
			
 
				+	"strings"
			
 
				+	"sync"
			
 
				+	"syscall"
			
 
				+
			
 
				+	"golang.org/x/sync/errgroup"
			
 
				+
			
 
				+	"github.com/ollama/ollama/envconfig"
			
 
				+	"github.com/ollama/ollama/gpu"
			
 
				+)
			
 
				+
			
 
				+const (
			
 
				+	binGlob = "*/*/*/*"
			
 
				+)
			
 
				+
			
 
				+var (
			
 
				+	lock       sync.Mutex
			
 
				+	runnersDir = ""
			
 
				+)
			
 
				+
			
 
				+// Return the location where runners are stored
			
 
				+// If runners are payloads, this will either extract them
			
 
				+// or refresh them if any have disappeared due to tmp cleaners
			
 
				+func Refresh(payloadFS fs.FS) (string, error) {
			
 
				+	lock.Lock()
			
 
				+	defer lock.Unlock()
			
 
				+	var err error
			
 
				+
			
 
				+	// Wire up extra logging on our first load
			
 
				+	if runnersDir == "" {
			
 
				+		defer func() {
			
 
				+			var runners []string
			
 
				+			for v := range GetAvailableServers(runnersDir) {
			
 
				+				runners = append(runners, v)
			
 
				+			}
			
 
				+			slog.Info("Dynamic LLM libraries", "runners", runners)
			
 
				+			slog.Debug("Override detection logic by setting OLLAMA_LLM_LIBRARY")
			
 
				+		}()
			
 
				+	}
			
 
				+
			
 
				+	if hasPayloads(payloadFS) {
			
 
				+		if runnersDir == "" {
			
 
				+			runnersDir, err = extractRunners(payloadFS)
			
 
				+		} else {
			
 
				+			err = refreshRunners(payloadFS, runnersDir)
			
 
				+		}
			
 
				+	} else if runnersDir == "" {
			
 
				+		runnersDir, err = locateRunners()
			
 
				+	}
			
 
				+
			
 
				+	return runnersDir, err
			
 
				+}
			
 
				+
			
 
				+func Cleanup(payloadFS fs.FS) {
			
 
				+	lock.Lock()
			
 
				+	defer lock.Unlock()
			
 
				+	if hasPayloads(payloadFS) && runnersDir != "" {
			
 
				+		// We want to fully clean up the tmpdir parent of the payloads dir
			
 
				+		tmpDir := filepath.Clean(filepath.Join(runnersDir, ".."))
			
 
				+		slog.Debug("cleaning up", "dir", tmpDir)
			
 
				+		err := os.RemoveAll(tmpDir)
			
 
				+		if err != nil {
			
 
				+			slog.Warn("failed to clean up", "dir", tmpDir, "err", err)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func locateRunners() (string, error) {
			
 
				+	exe, err := os.Executable()
			
 
				+	if err != nil {
			
 
				+		return "", err
			
 
				+	}
			
 
				+
			
 
				+	cwd, err := os.Getwd()
			
 
				+	if err != nil {
			
 
				+		return "", err
			
 
				+	}
			
 
				+
			
 
				+	var paths []string
			
 
				+	for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), envconfig.LibRelativeToExe()), cwd} {
			
 
				+		paths = append(paths,
			
 
				+			root,
			
 
				+			filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
			
 
				+			filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH),
			
 
				+		)
			
 
				+	}
			
 
				+
			
 
				+	// Try a few variations to improve developer experience when building from source in the local tree
			
 
				+	for _, path := range paths {
			
 
				+		candidate := filepath.Join(path, "lib", "ollama", "runners")
			
 
				+		if _, err := os.Stat(candidate); err == nil {
			
 
				+			return candidate, nil
			
 
				+		}
			
 
				+	}
			
 
				+	return "", fmt.Errorf("unable to locate runners in any search path %v", paths)
			
 
				+}
			
 
				+
			
 
				+// Return true if we're carying nested payloads for the runners
			
 
				+func hasPayloads(payloadFS fs.FS) bool {
			
 
				+	files, err := fs.Glob(payloadFS, binGlob)
			
 
				+	if err != nil || len(files) == 0 || (len(files) == 1 && strings.Contains(files[0], "placeholder")) {
			
 
				+		return false
			
 
				+	}
			
 
				+	return true
			
 
				+}
			
 
				+
			
 
				+func extractRunners(payloadFS fs.FS) (string, error) {
			
 
				+	cleanupTmpDirs()
			
 
				+	tmpDir, err := os.MkdirTemp(envconfig.TmpDir(), "ollama")
			
 
				+	if err != nil {
			
 
				+		return "", fmt.Errorf("failed to generate tmp dir: %w", err)
			
 
				+	}
			
 
				+	// Track our pid so we can clean up orphaned tmpdirs
			
 
				+	n := filepath.Join(tmpDir, "ollama.pid")
			
 
				+	if err := os.WriteFile(n, []byte(strconv.Itoa(os.Getpid())), 0o644); err != nil {
			
 
				+		slog.Warn("failed to write pid file", "file", n, "error", err)
			
 
				+	}
			
 
				+	// We create a distinct subdirectory for payloads within the tmpdir
			
 
				+	// This will typically look like /tmp/ollama3208993108/runners on linux
			
 
				+	rDir := filepath.Join(tmpDir, "runners")
			
 
				+
			
 
				+	slog.Info("extracting embedded files", "dir", rDir)
			
 
				+	return rDir, refreshRunners(payloadFS, rDir)
			
 
				+}
			
 
				+
			
 
				+func refreshRunners(payloadFS fs.FS, rDir string) error {
			
 
				+	// extract or refresh server libraries
			
 
				+	err := extractFiles(payloadFS, rDir, binGlob)
			
 
				+	if err != nil {
			
 
				+		return fmt.Errorf("extract binaries: %v", err)
			
 
				+	}
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+// extract extracts the embedded files to the target directory
			
 
				+func extractFiles(payloadFS fs.FS, targetDir string, glob string) error {
			
 
				+	files, err := fs.Glob(payloadFS, glob)
			
 
				+	if err != nil || len(files) == 0 {
			
 
				+		// Should not happen
			
 
				+		return fmt.Errorf("extractFiles called without payload present")
			
 
				+	}
			
 
				+
			
 
				+	if err := os.MkdirAll(targetDir, 0o755); err != nil {
			
 
				+		return fmt.Errorf("extractFiles could not mkdir %s: %v", targetDir, err)
			
 
				+	}
			
 
				+
			
 
				+	g := new(errgroup.Group)
			
 
				+
			
 
				+	// $OS/$GOARCH/$RUNNER/$FILE
			
 
				+	for _, file := range files {
			
 
				+		filename := file
			
 
				+
			
 
				+		runner := filepath.Base(filepath.Dir(filename))
			
 
				+
			
 
				+		slog.Debug("extracting", "runner", runner, "payload", filename)
			
 
				+
			
 
				+		g.Go(func() error {
			
 
				+			srcf, err := payloadFS.Open(filename)
			
 
				+			if err != nil {
			
 
				+				return err
			
 
				+			}
			
 
				+			defer srcf.Close()
			
 
				+
			
 
				+			src := io.Reader(srcf)
			
 
				+			if strings.HasSuffix(filename, ".gz") {
			
 
				+				src, err = gzip.NewReader(src)
			
 
				+				if err != nil {
			
 
				+					return fmt.Errorf("decompress payload %s: %v", filename, err)
			
 
				+				}
			
 
				+				filename = strings.TrimSuffix(filename, ".gz")
			
 
				+			}
			
 
				+
			
 
				+			runnerDir := filepath.Join(targetDir, runner)
			
 
				+			if err := os.MkdirAll(runnerDir, 0o755); err != nil {
			
 
				+				return fmt.Errorf("extractFiles could not mkdir %s: %v", runnerDir, err)
			
 
				+			}
			
 
				+
			
 
				+			base := filepath.Base(filename)
			
 
				+			destFilename := filepath.Join(runnerDir, base)
			
 
				+
			
 
				+			_, err = os.Stat(destFilename)
			
 
				+			switch {
			
 
				+			case errors.Is(err, os.ErrNotExist):
			
 
				+				destFile, err := os.OpenFile(destFilename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
			
 
				+				if err != nil {
			
 
				+					return fmt.Errorf("write payload %s: %v", filename, err)
			
 
				+				}
			
 
				+				defer destFile.Close()
			
 
				+				if _, err := io.Copy(destFile, src); err != nil {
			
 
				+					return fmt.Errorf("copy payload %s: %v", filename, err)
			
 
				+				}
			
 
				+			case err != nil:
			
 
				+				return fmt.Errorf("stat payload %s: %v", filename, err)
			
 
				+			}
			
 
				+			return nil
			
 
				+		})
			
 
				+	}
			
 
				+
			
 
				+	err = g.Wait()
			
 
				+	if err != nil {
			
 
				+		slog.Error("failed to extract files", "error", err)
			
 
				+		// If we fail to extract, the payload dir is most likely unusable, so cleanup whatever we extracted
			
 
				+		err := os.RemoveAll(targetDir)
			
 
				+		if err != nil {
			
 
				+			slog.Warn("failed to cleanup incomplete payload dir", "dir", targetDir, "error", err)
			
 
				+		}
			
 
				+		return err
			
 
				+	}
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+// Best effort to clean up prior tmpdirs
			
 
				+func cleanupTmpDirs() {
			
 
				+	tmpDir := envconfig.TmpDir()
			
 
				+	if tmpDir == "" {
			
 
				+		tmpDir = os.TempDir()
			
 
				+	}
			
 
				+	matches, err := filepath.Glob(filepath.Join(tmpDir, "ollama*", "ollama.pid"))
			
 
				+	if err != nil {
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	for _, match := range matches {
			
 
				+		raw, err := os.ReadFile(match)
			
 
				+		if errors.Is(err, os.ErrNotExist) {
			
 
				+			slog.Debug("not a ollama runtime directory, skipping", "path", match)
			
 
				+			continue
			
 
				+		} else if err != nil {
			
 
				+			slog.Warn("could not read ollama.pid, skipping", "path", match, "error", err)
			
 
				+			continue
			
 
				+		}
			
 
				+
			
 
				+		pid, err := strconv.Atoi(string(raw))
			
 
				+		if err != nil {
			
 
				+			slog.Warn("invalid pid, skipping", "path", match, "error", err)
			
 
				+			continue
			
 
				+		}
			
 
				+
			
 
				+		p, err := os.FindProcess(pid)
			
 
				+		if err == nil && !errors.Is(p.Signal(syscall.Signal(0)), os.ErrProcessDone) {
			
 
				+			slog.Warn("process still running, skipping", "pid", pid, "path", match)
			
 
				+			continue
			
 
				+		}
			
 
				+
			
 
				+		if err := os.Remove(match); err != nil {
			
 
				+			slog.Warn("could not cleanup stale pidfile", "path", match, "error", err)
			
 
				+		}
			
 
				+
			
 
				+		runners := filepath.Join(filepath.Dir(match), "runners")
			
 
				+		if err := os.RemoveAll(runners); err != nil {
			
 
				+			slog.Warn("could not cleanup stale runners", "path", runners, "error", err)
			
 
				+		}
			
 
				+
			
 
				+		if err := os.Remove(filepath.Dir(match)); err != nil {
			
 
				+			slog.Warn("could not cleanup stale tmpdir", "path", filepath.Dir(match), "error", err)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// directory names are the name of the runner and may contain an optional
			
 
				+// variant prefixed with '_' as the separator. For example, "cuda_v11" and
			
 
				+// "cuda_v12" or "cpu" and "cpu_avx2". Any library without a variant is the
			
 
				+// lowest common denominator
			
 
				+func GetAvailableServers(payloadsDir string) map[string]string {
			
 
				+	if payloadsDir == "" {
			
 
				+		slog.Error("empty runner dir")
			
 
				+		return nil
			
 
				+	}
			
 
				+
			
 
				+	// glob payloadsDir for files that start with ollama_
			
 
				+	pattern := filepath.Join(payloadsDir, "*", "ollama_*")
			
 
				+
			
 
				+	files, err := filepath.Glob(pattern)
			
 
				+	if err != nil {
			
 
				+		slog.Debug("could not glob", "pattern", pattern, "error", err)
			
 
				+		return nil
			
 
				+	}
			
 
				+
			
 
				+	servers := make(map[string]string)
			
 
				+	for _, file := range files {
			
 
				+		slog.Debug("availableServers : found", "file", file)
			
 
				+		servers[filepath.Base(filepath.Dir(file))] = filepath.Dir(file)
			
 
				+	}
			
 
				+
			
 
				+	return servers
			
 
				+}
			
 
				+
			
 
				+// serversForGpu returns a list of compatible servers give the provided GPU
			
 
				+// info, ordered by performance. assumes Init() has been called
			
 
				+// TODO - switch to metadata based mapping
			
 
				+func ServersForGpu(info gpu.GpuInfo) []string {
			
 
				+	// glob workDir for files that start with ollama_
			
 
				+	availableServers := GetAvailableServers(runnersDir)
			
 
				+	requested := info.Library
			
 
				+	if info.Variant != gpu.CPUCapabilityNone.String() {
			
 
				+		requested += "_" + info.Variant
			
 
				+	}
			
 
				+
			
 
				+	servers := []string{}
			
 
				+
			
 
				+	// exact match first
			
 
				+	for a := range availableServers {
			
 
				+		if a == requested {
			
 
				+			servers = []string{a}
			
 
				+
			
 
				+			if a == "metal" {
			
 
				+				return servers
			
 
				+			}
			
 
				+
			
 
				+			break
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	alt := []string{}
			
 
				+
			
 
				+	// Then for GPUs load alternates and sort the list for consistent load ordering
			
 
				+	if info.Library != "cpu" {
			
 
				+		for a := range availableServers {
			
 
				+			if info.Library == strings.Split(a, "_")[0] && a != requested {
			
 
				+				alt = append(alt, a)
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		slices.Sort(alt)
			
 
				+		servers = append(servers, alt...)
			
 
				+	}
			
 
				+
			
 
				+	if !(runtime.GOOS == "darwin" && runtime.GOARCH == "arm64") {
			
 
				+		// Load up the best CPU variant if not primary requested
			
 
				+		if info.Library != "cpu" {
			
 
				+			variant := gpu.GetCPUCapability()
			
 
				+			// If no variant, then we fall back to default
			
 
				+			// If we have a variant, try that if we find an exact match
			
 
				+			// Attempting to run the wrong CPU instructions will panic the
			
 
				+			// process
			
 
				+			if variant != gpu.CPUCapabilityNone {
			
 
				+				for cmp := range availableServers {
			
 
				+					if cmp == "cpu_"+variant.String() {
			
 
				+						servers = append(servers, cmp)
			
 
				+						break
			
 
				+					}
			
 
				+				}
			
 
				+			} else {
			
 
				+				servers = append(servers, "cpu")
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if len(servers) == 0 {
			
 
				+			servers = []string{"cpu"}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return servers
			
 
				+}
			
 
				+
			
 
				+// Return the optimal server for this CPU architecture
			
 
				+func ServerForCpu() string {
			
 
				+	if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
			
 
				+		return "metal"
			
 
				+	}
			
 
				+	variant := gpu.GetCPUCapability()
			
 
				+	availableServers := GetAvailableServers(runnersDir)
			
 
				+	if variant != gpu.CPUCapabilityNone {
			
 
				+		for cmp := range availableServers {
			
 
				+			if cmp == "cpu_"+variant.String() {
			
 
				+				return cmp
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	return "cpu"
			
 
				+}
			
--- a/runners/runners_test.go
+++ b/runners/runners_test.go
@@ -0,0 +1,50 @@
 
				+package runners
			
 
				+
			
 
				+import (
			
 
				+	"log/slog"
			
 
				+	"os"
			
 
				+	"path"
			
 
				+	"runtime"
			
 
				+	"strings"
			
 
				+	"testing"
			
 
				+	"testing/fstest"
			
 
				+)
			
 
				+
			
 
				+func TestRefreshRunners(t *testing.T) {
			
 
				+	slog.SetLogLoggerLevel(slog.LevelDebug)
			
 
				+
			
 
				+	payloadFS := fstest.MapFS{
			
 
				+		path.Join(runtime.GOOS, runtime.GOARCH, "foo", "ollama_llama_server"): {Data: []byte("hello, world\n")},
			
 
				+	}
			
 
				+	tmpDir, err := os.MkdirTemp("", "testing")
			
 
				+	if err != nil {
			
 
				+		t.Fatalf("failed to make tmp dir %s", err)
			
 
				+	}
			
 
				+	t.Setenv("OLLAMA_TMPDIR", tmpDir)
			
 
				+	rDir, err := Refresh(payloadFS)
			
 
				+	if err != nil {
			
 
				+		t.Fatalf("failed to extract to %s %s", tmpDir, err)
			
 
				+	}
			
 
				+	if !strings.Contains(rDir, tmpDir) {
			
 
				+		t.Fatalf("runner dir %s was not in tmp dir %s", rDir, tmpDir)
			
 
				+	}
			
 
				+
			
 
				+	// spot check results
			
 
				+	servers := GetAvailableServers(rDir)
			
 
				+	if len(servers) < 1 {
			
 
				+		t.Fatalf("expected at least 1 server")
			
 
				+	}
			
 
				+
			
 
				+	// Refresh contents
			
 
				+	rDir, err = extractRunners(payloadFS)
			
 
				+	if err != nil {
			
 
				+		t.Fatalf("failed to extract to %s %s", tmpDir, err)
			
 
				+	}
			
 
				+	if !strings.Contains(rDir, tmpDir) {
			
 
				+		t.Fatalf("runner dir %s was not in tmp dir %s", rDir, tmpDir)
			
 
				+	}
			
 
				+
			
 
				+	cleanupTmpDirs()
			
 
				+
			
 
				+	Cleanup(payloadFS)
			
 
				+}
			
--- a/scripts/build_darwin.sh
+++ b/scripts/build_darwin.sh
@@ -2,8 +2,7 @@
 
				 
			
 
				 set -e
			
 
				 
			
 
				-export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
			
 
				-export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
			
 
				+. $(dirname $0)/env.sh
			
 
				 
			
 
				 mkdir -p dist
			
 
				 
			
--- a/scripts/build_docker.sh
+++ b/scripts/build_docker.sh
@@ -2,76 +2,34 @@
 
				 
			
 
				 set -eu
			
 
				 
			
 
				-export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
			
 
				-export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
			
 
				-
			
 
				-# We use 2 different image repositories to handle combining architecture images into multiarch manifest
			
 
				-# (The ROCm image is x86 only and is not a multiarch manifest)
			
 
				-# For developers, you can override the DOCKER_ORG to generate multiarch manifests
			
 
				-#  DOCKER_ORG=jdoe PUSH=1 ./scripts/build_docker.sh
			
 
				-DOCKER_ORG=${DOCKER_ORG:-"ollama"}
			
 
				-RELEASE_IMAGE_REPO=${RELEASE_IMAGE_REPO:-"${DOCKER_ORG}/release"}
			
 
				-FINAL_IMAGE_REPO=${FINAL_IMAGE_REPO:-"${DOCKER_ORG}/ollama"}
			
 
				-
			
 
				-BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"}
			
 
				+. $(dirname $0)/env.sh
			
 
				 
			
 
				 # Set PUSH to a non-empty string to trigger push instead of load
			
 
				 PUSH=${PUSH:-""}
			
 
				 
			
 
				-# In CI mode, we break things down
			
 
				-OLLAMA_SKIP_MANIFEST_CREATE=${OLLAMA_SKIP_MANIFEST_CREATE:-""}
			
 
				-OLLAMA_SKIP_IMAGE_BUILD=${OLLAMA_SKIP_IMAGE_BUILD:-""}
			
 
				-
			
 
				 if [ -z "${PUSH}" ] ; then
			
 
				+    echo "Building ${FINAL_IMAGE_REPO}:$VERSION locally.  set PUSH=1 to push"
			
 
				     LOAD_OR_PUSH="--load"
			
 
				 else
			
 
				-    echo "Will be pushing ${RELEASE_IMAGE_REPO}:$VERSION for ${BUILD_ARCH}"
			
 
				+    echo "Will be pushing ${FINAL_IMAGE_REPO}:$VERSION"
			
 
				     LOAD_OR_PUSH="--push"
			
 
				 fi
			
 
				 
			
 
				-if [ -z "${OLLAMA_SKIP_IMAGE_BUILD}" ]; then
			
 
				-    for TARGETARCH in ${BUILD_ARCH}; do
			
 
				-        docker build \
			
 
				-            ${LOAD_OR_PUSH} \
			
 
				-            --platform=linux/${TARGETARCH} \
			
 
				-            --build-arg=VERSION \
			
 
				-            --build-arg=GOFLAGS \
			
 
				-            -f Dockerfile \
			
 
				-            -t ${RELEASE_IMAGE_REPO}:$VERSION-${TARGETARCH} \
			
 
				-            .
			
 
				-    done
			
 
				-
			
 
				-    if echo ${BUILD_ARCH} | grep "amd64" > /dev/null; then
			
 
				-        docker build \
			
 
				-            ${LOAD_OR_PUSH} \
			
 
				-            --platform=linux/amd64 \
			
 
				-            --build-arg=VERSION \
			
 
				-            --build-arg=GOFLAGS \
			
 
				-            --target runtime-rocm \
			
 
				-            -f Dockerfile \
			
 
				-            -t ${RELEASE_IMAGE_REPO}:$VERSION-rocm \
			
 
				-            .
			
 
				-    fi
			
 
				-fi
			
 
				-
			
 
				-if [ -z "${OLLAMA_SKIP_MANIFEST_CREATE}" ]; then
			
 
				-    if [ -n "${PUSH}" ]; then
			
 
				-        docker manifest create ${FINAL_IMAGE_REPO}:$VERSION \
			
 
				-            ${RELEASE_IMAGE_REPO}:$VERSION-amd64 \
			
 
				-            ${RELEASE_IMAGE_REPO}:$VERSION-arm64
			
 
				-        docker manifest push ${FINAL_IMAGE_REPO}:$VERSION
			
 
				-
			
 
				-        # For symmetry, tag/push the rocm image
			
 
				-        if [ "${RELEASE_IMAGE_REPO}" != "${FINAL_IMAGE_REPO}" ]; then
			
 
				-            echo "Tagging and pushing rocm image"
			
 
				-            docker pull ${RELEASE_IMAGE_REPO}:$VERSION-rocm
			
 
				-            docker tag ${RELEASE_IMAGE_REPO}:$VERSION-rocm ${FINAL_IMAGE_REPO}:$VERSION-rocm
			
 
				-            docker push ${FINAL_IMAGE_REPO}:$VERSION-rocm
			
 
				-        fi
			
 
				-    else
			
 
				-        echo "Skipping manifest generation when not pushing images are available locally as "
			
 
				-        echo "  ${RELEASE_IMAGE_REPO}:$VERSION-amd64"
			
 
				-        echo "  ${RELEASE_IMAGE_REPO}:$VERSION-arm64"
			
 
				-        echo "  ${RELEASE_IMAGE_REPO}:$VERSION-rocm"
			
 
				-    fi
			
 
				-fi
			
 
				+docker buildx build \
			
 
				+    ${LOAD_OR_PUSH} \
			
 
				+    --platform=${PLATFORM} \
			
 
				+    ${OLLAMA_COMMON_BUILD_ARGS} \
			
 
				+    -f Dockerfile \
			
 
				+    -t ${FINAL_IMAGE_REPO}:$VERSION \
			
 
				+    .
			
 
				+
			
 
				+if echo $PLATFORM | grep "amd64" > /dev/null; then
			
 
				+    docker buildx build \
			
 
				+        ${LOAD_OR_PUSH} \
			
 
				+        --platform=linux/amd64 \
			
 
				+        ${OLLAMA_COMMON_BUILD_ARGS} \
			
 
				+        --target runtime-rocm \
			
 
				+        -f Dockerfile \
			
 
				+        -t ${FINAL_IMAGE_REPO}:$VERSION-rocm \
			
 
				+        .
			
 
				+fi
			
--- a/scripts/build_linux.sh
+++ b/scripts/build_linux.sh
@@ -1,37 +1,29 @@
 
				 #!/bin/sh
			
 
				+#
			
 
				+# Mac ARM users, rosetta can be flaky, so to use a remote x86 builder
			
 
				+#
			
 
				+# docker context create amd64 --docker host=ssh://mybuildhost
			
 
				+# docker buildx create --name mybuilder amd64 --platform linux/amd64
			
 
				+# docker buildx create --name mybuilder --append desktop-linux --platform linux/arm64
			
 
				+# docker buildx use mybuilder
			
 
				+
			
 
				 
			
 
				 set -eu
			
 
				 
			
 
				-export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
			
 
				-export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
			
 
				-GZIP=$(which pigz 2>/dev/null || echo "gzip")
			
 
				+. $(dirname $0)/env.sh
			
 
				 
			
 
				-BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"}
			
 
				-export AMDGPU_TARGETS=${AMDGPU_TARGETS:=""}
			
 
				 mkdir -p dist
			
 
				 
			
 
				-for TARGETARCH in ${BUILD_ARCH}; do
			
 
				-    docker build \
			
 
				-        --platform=linux/$TARGETARCH \
			
 
				-        --build-arg=GOFLAGS \
			
 
				-        --build-arg=CGO_CFLAGS \
			
 
				-        --build-arg=OLLAMA_CUSTOM_CPU_DEFS \
			
 
				-        --build-arg=AMDGPU_TARGETS \
			
 
				-        --target build-$TARGETARCH \
			
 
				+docker buildx build \
			
 
				+        --output type=local,dest=./dist/ \
			
 
				+        --platform=${PLATFORM} \
			
 
				+        ${OLLAMA_COMMON_BUILD_ARGS} \
			
 
				+        --target dist \
			
 
				         -f Dockerfile \
			
 
				-        -t builder:$TARGETARCH \
			
 
				         .
			
 
				-    docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
			
 
				-    rm -rf ./dist/linux-$TARGETARCH
			
 
				-    docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH ./dist
			
 
				-    if echo ${TARGETARCH} | grep "amd64" > /dev/null; then
			
 
				-        docker cp builder-$TARGETARCH:/go/src/github.com/ollama/ollama/dist/linux-$TARGETARCH-rocm ./dist
			
 
				-    fi
			
 
				-    docker rm builder-$TARGETARCH
			
 
				-    echo "Compressing final linux bundle..."
			
 
				-    rm -f ./dist/ollama-linux-$TARGETARCH.tgz
			
 
				-    (cd dist/linux-$TARGETARCH && tar cf - . | ${GZIP} --best > ../ollama-linux-$TARGETARCH.tgz )
			
 
				-    if [ -d dist/linux-$TARGETARCH-rocm ]; then
			
 
				-        (cd dist/linux-$TARGETARCH-rocm && tar cf - . | ${GZIP} --best > ../ollama-linux-$TARGETARCH-rocm.tgz )
			
 
				-    fi
			
 
				-done
			
 
				+
			
 
				+# buildx behavior changes for single vs. multiplatform
			
 
				+if echo $PLATFORM | grep "," > /dev/null ; then 
			
 
				+        mv -f ./dist/linux_*64/ollama* ./dist/
			
 
				+        rmdir ./dist/linux_*64
			
 
				+fi
			
--- a/scripts/env.sh
+++ b/scripts/env.sh
@@ -0,0 +1,14 @@
 
				+# Common environment setup across build*.sh scripts
			
 
				+
			
 
				+export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")}
			
 
				+export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
			
 
				+# TODO - consider `docker buildx ls --format=json` to autodiscover platform capability
			
 
				+PLATFORM=${PLATFORM:-"linux/arm64,linux/amd64"}
			
 
				+DOCKER_ORG=${DOCKER_ORG:-"ollama"}
			
 
				+RELEASE_IMAGE_REPO=${RELEASE_IMAGE_REPO:-"${DOCKER_ORG}/release"}
			
 
				+FINAL_IMAGE_REPO=${FINAL_IMAGE_REPO:-"${DOCKER_ORG}/ollama"}
			
 
				+OLLAMA_COMMON_BUILD_ARGS="--build-arg=VERSION --build-arg=GOFLAGS --build-arg=OLLAMA_CUSTOM_CPU_DEFS --build-arg=AMDGPU_TARGETS"
			
 
				+
			
 
				+echo "Building Ollama"
			
 
				+echo "VERSION=$VERSION"
			
 
				+echo "PLATFORM=$PLATFORM"
			
--- a/server/routes.go
+++ b/server/routes.go
@@ -26,11 +26,13 @@ import (
 
				 	"golang.org/x/sync/errgroup"
			
 
				 
			
 
				 	"github.com/ollama/ollama/api"
			
 
				+	"github.com/ollama/ollama/build"
			
 
				 	"github.com/ollama/ollama/envconfig"
			
 
				 	"github.com/ollama/ollama/gpu"
			
 
				 	"github.com/ollama/ollama/llm"
			
 
				 	"github.com/ollama/ollama/openai"
			
 
				 	"github.com/ollama/ollama/parser"
			
 
				+	"github.com/ollama/ollama/runners"
			
 
				 	"github.com/ollama/ollama/template"
			
 
				 	"github.com/ollama/ollama/types/errtypes"
			
 
				 	"github.com/ollama/ollama/types/model"
			
@@ -1216,12 +1218,12 @@ func Serve(ln net.Listener) error {
 
				 		srvr.Close()
			
 
				 		schedDone()
			
 
				 		sched.unloadAllRunners()
			
 
				-		gpu.Cleanup()
			
 
				+		runners.Cleanup(build.EmbedFS)
			
 
				 		done()
			
 
				 	}()
			
 
				 
			
 
				-	if err := llm.Init(); err != nil {
			
 
				-		return fmt.Errorf("unable to initialize llm library %w", err)
			
 
				+	if _, err := runners.Refresh(build.EmbedFS); err != nil {
			
 
				+		return fmt.Errorf("unable to initialize llm runners %w", err)
			
 
				 	}
			
 
				 
			
 
				 	s.sched.Run(schedCtx)
		`@@ -0,0 +1 @@`
		`+This is here to make sure the build/ directory exists for the go:embed command`