5 meses atrás · 144f63e2fb
--- a/.gitattributes
+++ b/.gitattributes
@@ -7,5 +7,14 @@ llama/**/*.cuh linguist-vendored
 
				 llama/**/*.m linguist-vendored
			
 
				 llama/**/*.metal linguist-vendored
			
 
				 
			
 
				+ml/backend/**/*.c linguist-vendored
			
 
				+ml/backend/**/*.h linguist-vendored
			
 
				+ml/backend/**/*.cpp linguist-vendored
			
 
				+ml/backend/**/*.hpp linguist-vendored
			
 
				+ml/backend/**/*.cu linguist-vendored
			
 
				+ml/backend/**/*.cuh linguist-vendored
			
 
				+ml/backend/**/*.m linguist-vendored
			
 
				+ml/backend/**/*.metal linguist-vendored
			
 
				+
			
 
				 * text=auto
			
 
				 *.go text eol=lf
			
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -478,243 +478,77 @@ jobs:
 
				             dist/OllamaSetup.exe
			
 
				             dist/ollama-windows-*.zip
			
 
				 
			
 
				-  # Linux x86 assets built using the container based build
			
 
				-  build-linux-amd64:
			
 
				+  build-linux:
			
 
				     environment: release
			
 
				     runs-on: linux
			
 
				-    env:
			
 
				-      PLATFORM: linux/amd64
			
 
				-    steps:
			
 
				-      - uses: actions/checkout@v4
			
 
				-        with:
			
 
				-          submodules: recursive
			
 
				-      - name: Set Version
			
 
				-        shell: bash
			
 
				-        run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
			
 
				-      - run: |
			
 
				-          ./scripts/build_linux.sh
			
 
				-      - uses: actions/upload-artifact@v4
			
 
				-        with:
			
 
				-          name: dist-linux-amd64
			
 
				-          path: |
			
 
				-            dist/*linux*
			
 
				-            !dist/*-cov
			
 
				-
			
 
				-  # Linux ARM assets built using the container based build
			
 
				-  # (at present, docker isn't pre-installed on arm ubunutu images)
			
 
				-  build-linux-arm64:
			
 
				-    environment: release
			
 
				-    runs-on: linux-arm64
			
 
				-    env:
			
 
				-      PLATFORM: linux/arm64
			
 
				+    strategy:
			
 
				+      matrix:
			
 
				+        include:
			
 
				+          - os: linux
			
 
				+            arch: amd64
			
 
				+            targets: [archive, rocm]
			
 
				+          - os: linux
			
 
				+            arch: arm64
			
 
				+            targets: [archive]
			
 
				     steps:
			
 
				       - uses: actions/checkout@v4
			
 
				-        with:
			
 
				-          submodules: recursive
			
 
				-      - name: Set Version
			
 
				-        shell: bash
			
 
				-        run: echo "VERSION=${GITHUB_REF_NAME#v}" >> $GITHUB_ENV
			
 
				-      - name: 'Install Docker'
			
 
				-        run: |
			
 
				-          # Add Docker's official GPG key:
			
 
				-          env
			
 
				-          uname -a
			
 
				-          sudo apt-get update
			
 
				-          sudo apt-get install -y ca-certificates curl
			
 
				-          sudo install -m 0755 -d /etc/apt/keyrings
			
 
				-          sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
			
 
				-          sudo chmod a+r /etc/apt/keyrings/docker.asc
			
 
				-
			
 
				-          # Add the repository to Apt sources:
			
 
				-          echo \
			
 
				-            "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
			
 
				-            $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
			
 
				-            sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
			
 
				-          sudo apt-get update
			
 
				-          sudo apt-get install -y docker-ce docker-ce-cli containerd.io
			
 
				-          sudo usermod -aG docker $USER
			
 
				-          sudo apt-get install acl
			
 
				-          sudo setfacl --modify user:$USER:rw /var/run/docker.sock
			
 
				+      - uses: docker/setup-qemu-action@v3
			
 
				+      - uses: docker/setup-buildx-action@v3
			
 
				       - run: |
			
 
				-          ./scripts/build_linux.sh
			
 
				+          apt-get update && apt-get install pigz
			
 
				+          for TARGET in ${{ matrix.targets }}; do docker buildx build --platform $PLATFORM --target $TARGET --output type=local,dest=dist/$PLATFORM .; done
			
 
				+          tar c -C dist/$PLATFORM . | pigz -9cv >dist/ollama-${PLATFORM//\//-}.tar.gz
			
 
				+        env:
			
 
				+          PLATFORM: ${{ matrix.os }}/${{ matrix.arch }}
			
 
				       - uses: actions/upload-artifact@v4
			
 
				         with:
			
 
				-          name: dist-linux-arm64
			
 
				+          name: dist-${{ matrix.os }}-${{ matrix.arch }}
			
 
				           path: |
			
 
				-            dist/*linux*
			
 
				-            !dist/*-cov
			
 
				+            dist/ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.gz
			
 
				 
			
 
				-  # Container image build
			
 
				-  build-container-image:
			
 
				+  build-docker:
			
 
				     environment: release
			
 
				+    runs-on: linux
			
 
				     strategy:
			
 
				       matrix:
			
 
				-        runner:
			
 
				-          - linux
			
 
				-          - linux-arm64
			
 
				-    runs-on: ${{ matrix.runner }}
			
 
				-    env:
			
 
				-      FINAL_IMAGE_REPO: ollama/ollama
			
 
				-    steps:
			
 
				-      - uses: actions/checkout@v4
			
 
				-        with:
			
 
				-          submodules: recursive
			
 
				-      - name: 'Install Docker'
			
 
				-        if: ${{ startsWith(matrix.runner, 'linux-arm64') }}
			
 
				-        run: |
			
 
				-          sudo apt-get update
			
 
				-          sudo apt-get install -y ca-certificates curl
			
 
				-          sudo install -m 0755 -d /etc/apt/keyrings
			
 
				-          sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
			
 
				-          sudo chmod a+r /etc/apt/keyrings/docker.asc
			
 
				-          echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
			
 
				-            $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
			
 
				-            sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
			
 
				-          sudo apt-get update
			
 
				-          sudo apt-get install -y docker-ce docker-ce-cli containerd.io
			
 
				-          sudo usermod -aG docker $USER
			
 
				-          sudo apt-get install acl
			
 
				-          sudo setfacl --modify user:$USER:rw /var/run/docker.sock
			
 
				-      - name: Docker meta
			
 
				-        id: meta
			
 
				-        uses: docker/metadata-action@v5
			
 
				-        with:
			
 
				-          images: ${{ env.FINAL_IMAGE_REPO }}
			
 
				-          flavor: |
			
 
				-            latest=false
			
 
				-          tags: |
			
 
				-            type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
			
 
				-            type=semver,pattern={{version}}
			
 
				-      - name: Set Version
			
 
				-        shell: bash
			
 
				-        run: |
			
 
				-          machine=$(uname -m)
			
 
				-          case ${machine} in
			
 
				-            x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;;
			
 
				-            aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;;
			
 
				-          esac >>$GITHUB_ENV
			
 
				-          echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
			
 
				-      - name: Set up Docker Buildx
			
 
				-        uses: docker/setup-buildx-action@v3
			
 
				-      - name: Login to Docker Hub
			
 
				-        uses: docker/login-action@v3
			
 
				-        with:
			
 
				-          username: ${{ vars.DOCKER_USER }}
			
 
				-          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
			
 
				-      - name: Build and push by digest
			
 
				-        id: build
			
 
				-        uses: docker/build-push-action@v6
			
 
				-        with:
			
 
				-          context: "."
			
 
				-          platforms: linux/${{ env.ARCH }}
			
 
				-          build-args: |
			
 
				-            GOFLAGS
			
 
				-          outputs: type=image,name=${{ env.FINAL_IMAGE_REPO }},push-by-digest=true,name-canonical=true,push=true
			
 
				-      - name: Export digest
			
 
				-        run: |
			
 
				-          mkdir -p /tmp/digests
			
 
				-          digest="${{ steps.build.outputs.digest }}"
			
 
				-          touch "/tmp/digests/${digest#sha256:}"
			
 
				-      - name: Upload digest
			
 
				-        uses: actions/upload-artifact@v4
			
 
				-        with:
			
 
				-          name: digests-${{ env.PLATFORM_PAIR }}
			
 
				-          path: /tmp/digests/*
			
 
				-          if-no-files-found: error
			
 
				-          retention-days: 1
			
 
				-  merge:
			
 
				-    environment: release
			
 
				-    runs-on: linux
			
 
				-    needs:
			
 
				-      - build-container-image
			
 
				-    env:
			
 
				-      FINAL_IMAGE_REPO: ollama/ollama
			
 
				+        include:
			
 
				+          - flavor: |
			
 
				+              latest=auto
			
 
				+            platforms: linux/amd64,linux/arm64
			
 
				+            build-args: [GOFLAGS]
			
 
				+          - flavor: |
			
 
				+              suffix=-rocm,onlatest=false
			
 
				+            platforms: linux/amd64
			
 
				+            build-args: [GOFLAGS, FLAVOR=rocm]
			
 
				     steps:
			
 
				       - uses: actions/checkout@v4
			
 
				-        with:
			
 
				-          submodules: recursive
			
 
				-      - name: Download digests
			
 
				-        uses: actions/download-artifact@v4
			
 
				-        with:
			
 
				-          path: /tmp/digests
			
 
				-          pattern: digests-*
			
 
				-          merge-multiple: true
			
 
				-      - name: Set up Docker Buildx
			
 
				-        uses: docker/setup-buildx-action@v3
			
 
				-      - name: Docker meta
			
 
				-        id: meta
			
 
				-        uses: docker/metadata-action@v5
			
 
				-        with:
			
 
				-          images: ${{ env.FINAL_IMAGE_REPO }}
			
 
				-          flavor: |
			
 
				-            latest=false
			
 
				-          tags: |
			
 
				-            type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
			
 
				-            type=semver,pattern={{version}}
			
 
				-      - name: Set Version
			
 
				-        shell: bash
			
 
				-        run: |
			
 
				-          machine=$(uname -m)
			
 
				-          case ${machine} in
			
 
				-            x86_64) echo ARCH=amd64; echo PLATFORM_PAIR=linux-amd64 ;;
			
 
				-            aarch64) echo ARCH=arm64; echo PLATFORM_PAIR=linux-arm64 ;;
			
 
				-          esac >>$GITHUB_ENV
			
 
				-          echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
			
 
				-      - name: Login to Docker Hub
			
 
				-        uses: docker/login-action@v3
			
 
				+      - uses: docker/setup-qemu-action@v2
			
 
				+      - uses: docker/setup-buildx-action@v2
			
 
				+      - uses: docker/login-action@v3
			
 
				         with:
			
 
				           username: ${{ vars.DOCKER_USER }}
			
 
				           password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
			
 
				-      - name: Create manifest list and push
			
 
				-        working-directory: /tmp/digests
			
 
				-        run: |
			
 
				-          docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
			
 
				-            $(printf '${{ env.FINAL_IMAGE_REPO }}@sha256:%s ' *)
			
 
				-      - name: Inspect image
			
 
				-        run: |
			
 
				-          docker buildx imagetools inspect ${{ env.FINAL_IMAGE_REPO }}:${{ steps.meta.outputs.version }}          
			
 
				-  build-container-image-rocm:
			
 
				-    environment: release
			
 
				-    runs-on: linux
			
 
				-    env:
			
 
				-      FINAL_IMAGE_REPO: ollama/ollama
			
 
				-      ARCH: amd64
			
 
				-      PLATFORM_PAIR: linux-amd64
			
 
				-    steps:
			
 
				-      - uses: actions/checkout@v4
			
 
				+      - id: metadata
			
 
				+        uses: docker/metadata-action@v4
			
 
				         with:
			
 
				-          submodules: recursive
			
 
				-      - name: Docker meta
			
 
				-        id: meta
			
 
				-        uses: docker/metadata-action@v5
			
 
				-        with:
			
 
				-          images: ${{ env.FINAL_IMAGE_REPO }}
			
 
				-          flavor: |
			
 
				-            latest=false
			
 
				+          flavor: ${{ matrix.flavor }}
			
 
				+          images: |
			
 
				+            ollama/ollama
			
 
				           tags: |
			
 
				-            type=ref,enable=true,priority=600,prefix=0.0.0-pr,suffix=,event=pr
			
 
				             type=semver,pattern={{version}}
			
 
				-      - name: Set Version
			
 
				-        shell: bash
			
 
				-        run: |
			
 
				-          echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ env.DOCKER_METADATA_OUTPUT_VERSION }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_ENV
			
 
				-      - name: Set up Docker Buildx
			
 
				-        uses: docker/setup-buildx-action@v3
			
 
				-      - name: Login to Docker Hub
			
 
				-        uses: docker/login-action@v3
			
 
				+      - uses: docker/build-push-action@v6
			
 
				         with:
			
 
				-          username: ${{ vars.DOCKER_USER }}
			
 
				-          password: ${{ secrets.DOCKER_ACCESS_TOKEN }}
			
 
				-      - name: Build and push by digest
			
 
				-        id: build
			
 
				-        uses: docker/build-push-action@v6
			
 
				-        with:
			
 
				-          context: "."
			
 
				-          target: runtime-rocm
			
 
				-          build-args: |
			
 
				-            GOFLAGS
			
 
				-          tags: ${{ env.FINAL_IMAGE_REPO }}:${{ env.DOCKER_METADATA_OUTPUT_VERSION}}-rocm
			
 
				+          context: .
			
 
				           push: true
			
 
				+          platforms: ${{ matrix.platforms }}
			
 
				+          build-args: ${{ matrix.build-args }}
			
 
				+          tags: ${{ steps.metadata.outputs.tags }}
			
 
				+          labels: ${{ steps.metadata.outputs.labels }}
			
 
				+          cache-from: type=registry,ref=ollama/ollama:latest
			
 
				+          cache-to: type=inline
			
 
				+          provenance: false
			
 
				+        env:
			
 
				+          GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${{ steps.metadata.outputs.version }}\" \"-X=github.com/ollama/ollama/server.mode=release\"'"
			
 
				 
			
 
				   # Aggregate all the assets and ship a release
			
 
				   release:
			
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -1,11 +1,5 @@
 
				 name: test
			
 
				 
			
 
				-env:
			
 
				-  ROCM_WINDOWS_URL: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe
			
 
				-  MSYS2_URL: https://github.com/msys2/msys2-installer/releases/download/2024-07-27/msys2-x86_64-20240727.exe
			
 
				-  CUDA_12_WINDOWS_URL: https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe
			
 
				-  CUDA_12_WINDOWS_VER: 12.4
			
 
				-
			
 
				 concurrency:
			
 
				   # For PRs, later CI runs preempt previous ones. e.g. a force push on a PR
			
 
				   # cancels running CI jobs and starts all new ones.
			
@@ -27,7 +21,7 @@ jobs:
 
				   changes:
			
 
				     runs-on: ubuntu-latest
			
 
				     outputs:
			
 
				-      RUNNERS: ${{ steps.changes.outputs.RUNNERS }}
			
 
				+      changed: ${{ steps.changes.outputs.changed }}
			
 
				     steps:
			
 
				       - uses: actions/checkout@v4
			
 
				         with:
			
@@ -35,309 +29,66 @@ jobs:
 
				       - id: changes
			
 
				         run: |
			
 
				           changed() {
			
 
				-            git diff-tree -r --no-commit-id --name-only \
			
 
				-              $(git merge-base ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}) \
			
 
				-              ${{ github.event.pull_request.head.sha }} \
			
 
				+            local BASE=${{ github.event.pull_request.base.sha }}
			
 
				+            local HEAD=${{ github.event.pull_request.head.sha }}
			
 
				+            local MERGE_BASE=$(git merge-base $BASE $HEAD)
			
 
				+            git diff-tree -r --no-commit-id --name-only "$MERGE_BASE" "$HEAD" \
			
 
				               | xargs python3 -c "import sys; from pathlib import Path; print(any(Path(x).match(glob) for x in sys.argv[1:] for glob in '$*'.split(' ')))"
			
 
				           }
			
 
				 
			
 
				-          {
			
 
				-            echo RUNNERS=$(changed 'llama/**')
			
 
				-          } >>$GITHUB_OUTPUT
			
 
				+          echo changed=$(changed 'llama/llama.cpp/**' 'ml/backend/ggml/ggml/**') | tee -a $GITHUB_OUTPUT
			
 
				 
			
 
				-  runners-linux-cuda:
			
 
				+  linux:
			
 
				     needs: [changes]
			
 
				-    if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
			
 
				+    if: ${{ needs.changes.outputs.changed == 'True' }}
			
 
				     strategy:
			
 
				       matrix:
			
 
				-        cuda-version:
			
 
				-          - '11.8.0'
			
 
				-    runs-on: linux
			
 
				-    container: nvidia/cuda:${{ matrix.cuda-version }}-devel-ubuntu20.04
			
 
				+        include:
			
 
				+          - container: nvidia/cuda:11.8.0-devel-ubuntu22.04
			
 
				+            preset: CUDA
			
 
				+          - container: rocm/dev-ubuntu-22.04:6.1.2
			
 
				+            preset: ROCm
			
 
				+            extra-packages: rocm-libs
			
 
				+    runs-on: ubuntu-latest
			
 
				+    container: ${{ matrix.container }}
			
 
				     steps:
			
 
				-      - run: |
			
 
				-          apt-get update && apt-get install -y git build-essential curl
			
 
				-        env:
			
 
				-          DEBIAN_FRONTEND: noninteractive
			
 
				       - uses: actions/checkout@v4
			
 
				-      - uses: actions/setup-go@v4
			
 
				-        with:
			
 
				-          go-version-file: go.mod
			
 
				-          cache: true
			
 
				-      - run: go get ./...
			
 
				-      - run: |
			
 
				-          git config --global --add safe.directory /__w/ollama/ollama
			
 
				-          cores=$(grep '^core id' /proc/cpuinfo |sort -u|wc -l)
			
 
				-          make -j $cores cuda_v11
			
 
				-  runners-linux-rocm:
			
 
				-    needs: [changes]
			
 
				-    if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
			
 
				-    strategy:
			
 
				-      matrix:
			
 
				-        rocm-version:
			
 
				-          - '6.1.2'
			
 
				-    runs-on: linux
			
 
				-    container: rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}
			
 
				-    steps:
			
 
				       - run: |
			
 
				-          apt-get update && apt-get install -y git build-essential curl rocm-libs
			
 
				+          apt-get update
			
 
				+          apt-get install -y cmake pkg-config ccache ${{ matrix.extra-packages }}
			
 
				+          ccache -o cache_dir=${{ github.workspace }}\.ccache
			
 
				         env:
			
 
				           DEBIAN_FRONTEND: noninteractive
			
 
				-      - uses: actions/checkout@v4
			
 
				-      - uses: actions/setup-go@v4
			
 
				+      - uses: actions/cache@v4
			
 
				         with:
			
 
				-          go-version-file: go.mod
			
 
				-          cache: true
			
 
				-      - run: go get ./...
			
 
				+          path: ${{ github.workspace }}\.ccache
			
 
				+          key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}
			
 
				       - run: |
			
 
				-          git config --global --add safe.directory /__w/ollama/ollama
			
 
				-          cores=$(grep '^core id' /proc/cpuinfo |sort -u|wc -l)
			
 
				-          make -j $cores rocm
			
 
				-
			
 
				-  # ROCm generation step
			
 
				-  runners-windows-rocm:
			
 
				-    needs: [changes]
			
 
				-    if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
			
 
				-    runs-on: windows
			
 
				-    steps:
			
 
				-      - uses: actions/checkout@v4
			
 
				-      - uses: actions/setup-go@v5
			
 
				-        with:
			
 
				-          go-version-file: go.mod
			
 
				-          cache: true
			
 
				-      - name: Set make jobs default
			
 
				-        run: |
			
 
				-          echo "MAKEFLAGS=--jobs=$((Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
			
 
				-
			
 
				-      # ROCM installation steps
			
 
				-      - name: 'Cache ROCm installer'
			
 
				-        id: cache-rocm
			
 
				-        uses: actions/cache@v4
			
 
				-        with:
			
 
				-          path: rocm-install.exe
			
 
				-          key: ${{ env.ROCM_WINDOWS_URL }}
			
 
				-      - name: 'Conditionally Download ROCm'
			
 
				-        if: steps.cache-rocm.outputs.cache-hit != 'true'
			
 
				-        run: |
			
 
				-          $ErrorActionPreference = "Stop"
			
 
				-          Invoke-WebRequest -Uri "${env:ROCM_WINDOWS_URL}" -OutFile "rocm-install.exe"
			
 
				-      - name: 'Install ROCm'
			
 
				-        run: |
			
 
				-          Start-Process "rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
			
 
				-      - name: 'Verify ROCm'
			
 
				-        run: |
			
 
				-          & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
			
 
				-          echo "HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path | select -first 1)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
			
 
				-
			
 
				-      - name: Add msys paths
			
 
				-        run: |
			
 
				-          echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
			
 
				-          echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
			
 
				-      - name: Install msys2 tools
			
 
				-        run: |
			
 
				-          Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
			
 
				-
			
 
				-      - name: make rocm runner
			
 
				-        run: |
			
 
				-          import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
			
 
				-          Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
			
 
				-          if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
			
 
				-          make -C llama print-HIP_PATH print-HIP_LIB_DIR
			
 
				-          make rocm
			
 
				-
			
 
				-  # CUDA generation step
			
 
				-  runners-windows-cuda:
			
 
				-    needs: [changes]
			
 
				-    if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
			
 
				-    runs-on: windows
			
 
				-    steps:
			
 
				-      - uses: actions/checkout@v4
			
 
				-      - uses: actions/setup-go@v5
			
 
				-        with:
			
 
				-          go-version-file: go.mod
			
 
				-          cache: true
			
 
				-      - name: Set make jobs default
			
 
				-        run: |
			
 
				-          echo "MAKEFLAGS=--jobs=$((Get-ComputerInfo -Property CsProcessors).CsProcessors.NumberOfCores)" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
			
 
				-
			
 
				-      # CUDA installation steps
			
 
				-      - name: 'Cache CUDA installer'
			
 
				-        id: cache-cuda
			
 
				-        uses: actions/cache@v4
			
 
				-        with:
			
 
				-          path: cuda-install.exe
			
 
				-          key: ${{ env.CUDA_12_WINDOWS_URL }}
			
 
				-      - name: 'Conditionally Download CUDA'
			
 
				-        if: steps.cache-cuda.outputs.cache-hit != 'true'
			
 
				-        run: |
			
 
				-          $ErrorActionPreference = "Stop"
			
 
				-          Invoke-WebRequest -Uri "${env:CUDA_12_WINDOWS_URL}" -OutFile "cuda-install.exe"
			
 
				-      - name: 'Install CUDA'
			
 
				-        run: |
			
 
				-          $subpackages = @("cudart", "nvcc", "cublas", "cublas_dev") | foreach-object {"${_}_${{ env.CUDA_12_WINDOWS_VER }}"}
			
 
				-          Start-Process "cuda-install.exe" -ArgumentList (@("-s") + $subpackages) -NoNewWindow -Wait
			
 
				-      - name: 'Verify CUDA'
			
 
				-        run: |
			
 
				-          & (resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0] --version
			
 
				-          $cudaPath=((resolve-path "c:\Program Files\NVIDIA*\CUDA\v*\bin\nvcc.exe")[0].path | split-path | split-path)
			
 
				-          $cudaVer=($cudaPath | split-path -leaf ) -replace 'v(\d+).(\d+)', '$1_$2' 
			
 
				-          echo "$cudaPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
			
 
				-          echo "CUDA_PATH=$cudaPath" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
			
 
				-          echo "CUDA_PATH_V${cudaVer}=$cudaPath" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
			
 
				-          echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
			
 
				-
			
 
				-      - name: Add msys paths
			
 
				-        run: |
			
 
				-          echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
			
 
				-          echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
			
 
				-      - name: Install msys2 tools
			
 
				-        run: |
			
 
				-          Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
			
 
				-      - name: make cuda runner
			
 
				-        run: |
			
 
				-          import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
			
 
				-          Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
			
 
				-          if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
			
 
				-          make cuda_v$(($env:CUDA_PATH | split-path -leaf) -replace 'v(\d+).*', '$1')
			
 
				+          cmake --preset ${{ matrix.preset }}
			
 
				+          cmake --build --preset ${{ matrix.preset }} --parallel
			
 
				 
			
 
				-  runners-cpu:
			
 
				-    needs: [changes]
			
 
				-    if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
			
 
				+  test:
			
 
				     strategy:
			
 
				       matrix:
			
 
				-        os: [ubuntu-latest, macos-latest, windows-2019]
			
 
				-        arch: [amd64, arm64]
			
 
				-        exclude:
			
 
				-          - os: ubuntu-latest
			
 
				-            arch: arm64
			
 
				-          - os: windows-2019
			
 
				-            arch: arm64
			
 
				+        os: [ubuntu-latest, macos-latest, windows-latest]
			
 
				     runs-on: ${{ matrix.os }}
			
 
				     env:
			
 
				-      GOARCH: ${{ matrix.arch }}
			
 
				-      ARCH: ${{ matrix.arch }}
			
 
				       CGO_ENABLED: '1'
			
 
				     steps:
			
 
				       - uses: actions/checkout@v4
			
 
				       - uses: actions/setup-go@v5
			
 
				         with:
			
 
				           go-version-file: go.mod
			
 
				-          cache: true
			
 
				-      - name: Add msys paths
			
 
				-        if: ${{ startsWith(matrix.os, 'windows-') }}
			
 
				-        run: |
			
 
				-          echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
			
 
				-          echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
			
 
				-      - name: Install msys2 tools
			
 
				-        if: ${{ startsWith(matrix.os, 'windows-') }}
			
 
				-        run: |
			
 
				-          Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
			
 
				-      - name: 'Build Windows Go Runners'
			
 
				-        if: ${{ startsWith(matrix.os, 'windows-') }}
			
 
				-        run: |
			
 
				-          $gopath=(get-command go).source | split-path -parent
			
 
				-          $gccpath=(get-command gcc).source | split-path -parent
			
 
				-          import-module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
			
 
				-          Enter-VsDevShell -vsinstallpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -skipautomaticlocation -DevCmdArguments '-arch=x64 -no_logo'
			
 
				-          $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
			
 
				-          $env:PATH="$gopath;$gccpath;$env:PATH"
			
 
				-          echo $env:PATH
			
 
				-          if (!(gcc --version | select-string -quiet clang)) { throw "wrong gcc compiler detected - must be clang" }
			
 
				-          make -j 4
			
 
				-      - name: 'Build Unix Go Runners'
			
 
				-        if: ${{ ! startsWith(matrix.os, 'windows-') }}
			
 
				-        run: make -j 4
			
 
				-      - run: go build .
			
 
				-
			
 
				-  lint:
			
 
				-    strategy:
			
 
				-      matrix:
			
 
				-        os: [ubuntu-latest, macos-latest, windows-2019]
			
 
				-        arch: [amd64, arm64]
			
 
				-        exclude:
			
 
				-          - os: ubuntu-latest
			
 
				-            arch: arm64
			
 
				-          - os: windows-2019
			
 
				-            arch: arm64
			
 
				-          - os: macos-latest
			
 
				-            arch: amd64
			
 
				-    runs-on: ${{ matrix.os }}
			
 
				-    env:
			
 
				-      GOARCH: ${{ matrix.arch }}
			
 
				-      CGO_ENABLED: '1'
			
 
				-    steps:
			
 
				-      - uses: actions/checkout@v4
			
 
				-        with:
			
 
				-          submodules: recursive
			
 
				-      - name: Add msys paths
			
 
				-        if: ${{ startsWith(matrix.os, 'windows-') }}
			
 
				-        run: |
			
 
				-          echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
			
 
				-          echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
			
 
				-      - name: Install msys2 tools
			
 
				-        if: ${{ startsWith(matrix.os, 'windows-') }}
			
 
				-        run: |
			
 
				-          Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
			
 
				-      - uses: actions/setup-go@v5
			
 
				-        with:
			
 
				-          go-version-file: go.mod
			
 
				-          cache: false
			
 
				-      - run: |
			
 
				-          case ${{ matrix.arch }} in
			
 
				-            amd64) echo ARCH=x86_64 ;;
			
 
				-            arm64) echo ARCH=arm64 ;;
			
 
				-          esac >>$GITHUB_ENV
			
 
				-        shell: bash
			
 
				       - uses: golangci/golangci-lint-action@v6
			
 
				         with:
			
 
				           args: --timeout 10m0s -v
			
 
				-  test:
			
 
				-    strategy:
			
 
				-      matrix:
			
 
				-        os: [ubuntu-latest, macos-latest, windows-2019]
			
 
				-        arch: [amd64]
			
 
				-        exclude:
			
 
				-          - os: ubuntu-latest
			
 
				-            arch: arm64
			
 
				-          - os: windows-2019
			
 
				-            arch: arm64
			
 
				-    runs-on: ${{ matrix.os }}
			
 
				-    env:
			
 
				-      GOARCH: ${{ matrix.arch }}
			
 
				-      CGO_ENABLED: '1'
			
 
				-    steps:
			
 
				-      - uses: actions/checkout@v4
			
 
				-        with:
			
 
				-          submodules: recursive
			
 
				-      - name: Add msys paths
			
 
				-        if: ${{ startsWith(matrix.os, 'windows-') }}
			
 
				-        run: |
			
 
				-          echo "c:\msys64\usr\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
			
 
				-          echo "C:\msys64\clang64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
			
 
				-      - name: Install msys2 tools
			
 
				-        if: ${{ startsWith(matrix.os, 'windows-') }}
			
 
				-        run: |
			
 
				-          Start-Process "c:\msys64\usr\bin\pacman.exe" -ArgumentList @("-S", "--noconfirm", "mingw-w64-clang-x86_64-gcc-compat", "mingw-w64-clang-x86_64-clang") -NoNewWindow -Wait
			
 
				-      - uses: actions/setup-go@v5
			
 
				-        with:
			
 
				-          go-version-file: go.mod
			
 
				-          cache: true
			
 
				-      - run: |
			
 
				-          case ${{ matrix.arch }} in
			
 
				-            amd64) echo ARCH=amd64 ;;
			
 
				-            arm64) echo ARCH=arm64 ;;
			
 
				-          esac >>$GITHUB_ENV
			
 
				-        shell: bash
			
 
				       - run: go test ./...
			
 
				 
			
 
				   patches:
			
 
				-    needs: [changes]
			
 
				-    if: ${{ needs.changes.outputs.RUNNERS == 'True' }}
			
 
				     runs-on: ubuntu-latest
			
 
				     steps:
			
 
				       - uses: actions/checkout@v4
			
 
				-        with:
			
 
				-          submodules: recursive
			
 
				-      - name: Verify patches carry all the changes
			
 
				+      - name: Verify patches apply cleanly and do not change files
			
 
				         run: |
			
 
				-          make apply-patches sync && git diff --compact-summary --exit-code llama
			
 
				+          make -f Makefile2 clean checkout sync
			
 
				+          git diff --compact-summary --exit-code
			
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -0,0 +1,54 @@
 
				+cmake_minimum_required(VERSION 3.21)
			
 
				+
			
 
				+project(Ollama C CXX)
			
 
				+
			
 
				+include(CheckLanguage)
			
 
				+
			
 
				+find_package(Threads REQUIRED)
			
 
				+
			
 
				+set(CMAKE_BUILD_TYPE Release)
			
 
				+set(BUILD_SHARED_LIBS ON)
			
 
				+
			
 
				+set(CMAKE_CXX_STANDARD 17)
			
 
				+set(CMAKE_CXX_STANDARD_REQUIRED ON)
			
 
				+set(CMAKE_CXX_EXTENSIONS OFF)
			
 
				+
			
 
				+set(GGML_BUILD ON)
			
 
				+set(GGML_SHARED ON)
			
 
				+set(GGML_CCACHE ON)
			
 
				+set(GGML_BACKEND_DL ON)
			
 
				+set(GGML_BACKEND_SHARED ON)
			
 
				+set(GGML_SCHED_MAX_COPIES 4)
			
 
				+
			
 
				+set(GGML_LLAMAFILE ON)
			
 
				+set(GGML_CPU_ALL_VARIANTS ON)
			
 
				+set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
			
 
				+set(GGML_CUDA_GRAPHS ON)
			
 
				+
			
 
				+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
			
 
				+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
			
 
				+
			
 
				+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
			
 
				+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/include)
			
 
				+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu)
			
 
				+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cpu/amx)
			
 
				+
			
 
				+set(GGML_CPU ON)
			
 
				+add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src)
			
 
				+set_property(TARGET ggml PROPERTY EXCLUDE_FROM_ALL TRUE)
			
 
				+
			
 
				+check_language(CUDA)
			
 
				+if(CMAKE_CUDA_COMPILER)
			
 
				+    if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24" AND NOT CMAKE_CUDA_ARCHITECTURES)
			
 
				+        set(CMAKE_CUDA_ARCHITECTURES "native")
			
 
				+    endif()
			
 
				+
			
 
				+    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-cuda)
			
 
				+endif()
			
 
				+
			
 
				+check_language(HIP)
			
 
				+if(CMAKE_HIP_COMPILER)
			
 
				+    set(HIP_PLATFORM "amd")
			
 
				+
			
 
				+    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-hip)
			
 
				+endif()
			
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -0,0 +1,109 @@
 
				+{
			
 
				+  "version": 3,
			
 
				+  "configurePresets": [
			
 
				+    {
			
 
				+      "name": "Default",
			
 
				+      "binaryDir": "${sourceDir}/build",
			
 
				+      "cacheVariables": {
			
 
				+        "CMAKE_BUILD_TYPE": "Release"
			
 
				+      }
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "CPU",
			
 
				+      "inherits": [ "Default" ]
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "CUDA",
			
 
				+      "inherits": [ "Default" ]
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "CUDA 11",
			
 
				+      "inherits": [ "CUDA" ],
			
 
				+      "cacheVariables": {
			
 
				+        "CMAKE_CUDA_ARCHITECTURES": "50;52;53;60;61;62;70;72;75;80;86"
			
 
				+      }
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "CUDA 12",
			
 
				+      "inherits": [ "CUDA" ],
			
 
				+      "cacheVariables": {
			
 
				+        "CMAKE_CUDA_ARCHITECTURES": "60;61;62;70;72;75;80;86;87;89;90;90a"
			
 
				+      }
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "JetPack 5",
			
 
				+      "inherits": [ "CUDA" ],
			
 
				+      "cacheVariables": {
			
 
				+        "CMAKE_CUDA_ARCHITECTURES": "72;87"
			
 
				+      }
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "JetPack 6",
			
 
				+      "inherits": [ "CUDA" ],
			
 
				+      "cacheVariables": {
			
 
				+        "CMAKE_CUDA_ARCHITECTURES": "87"
			
 
				+      }
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "ROCm",
			
 
				+      "inherits": [ "Default" ],
			
 
				+      "cacheVariables": {
			
 
				+        "CMAKE_HIP_PLATFORM": "amd"
			
 
				+      }
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "ROCm 6",
			
 
				+      "inherits": [ "ROCm" ],
			
 
				+      "cacheVariables": {
			
 
				+        "AMDGPU_TARGETS": "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
			
 
				+      }
			
 
				+    }
			
 
				+  ],
			
 
				+  "buildPresets": [
			
 
				+    {
			
 
				+      "name": "Default",
			
 
				+      "configurePreset": "Default",
			
 
				+      "configuration": "Release"
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "CPU",
			
 
				+      "configurePreset": "Default",
			
 
				+      "targets": [ "ggml-cpu" ]
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "CUDA",
			
 
				+      "configurePreset": "CUDA",
			
 
				+      "targets": [ "ggml-cuda" ]
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "CUDA 11",
			
 
				+      "inherits": [ "CUDA" ],
			
 
				+      "configurePreset": "CUDA 11"
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "CUDA 12",
			
 
				+      "inherits": [ "CUDA" ],
			
 
				+      "configurePreset": "CUDA 12"
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "JetPack 5",
			
 
				+      "inherits": [ "CUDA" ],
			
 
				+      "configurePreset": "JetPack 5"
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "JetPack 6",
			
 
				+      "inherits": [ "CUDA" ],
			
 
				+      "configurePreset": "JetPack 6"
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "ROCm",
			
 
				+      "configurePreset": "ROCm",
			
 
				+      "targets": [ "ggml-hip" ]
			
 
				+    },
			
 
				+    {
			
 
				+      "name": "ROCm 6",
			
 
				+      "inherits": [ "ROCm" ],
			
 
				+      "configurePreset": "ROCm 6"
			
 
				+    }
			
 
				+  ]
			
 
				+}
			
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,201 +1,161 @@
 
				-ARG GOLANG_VERSION=1.22.8
			
 
				-ARG CUDA_VERSION_11=11.3.1
			
 
				-ARG CUDA_VERSION_12=12.4.0
			
 
				-ARG ROCM_VERSION=6.1.2
			
 
				-ARG JETPACK_6=r36.2.0
			
 
				-ARG JETPACK_5=r35.4.1
			
 
				-
			
 
				-### To create a local image for building linux binaries on mac or windows with efficient incremental builds
			
 
				-#
			
 
				-# docker build --platform linux/amd64 -t builder-amd64 -f Dockerfile --target unified-builder-amd64 .
			
 
				-# docker run --platform linux/amd64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-amd64
			
 
				-#
			
 
				-### Then incremental builds will be much faster in this container
			
 
				-#
			
 
				-# make -j 10 dist
			
 
				-#
			
 
				-FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCM_VERSION}-complete AS unified-builder-amd64
			
 
				-ARG GOLANG_VERSION
			
 
				-ARG CUDA_VERSION_11
			
 
				-ARG CUDA_VERSION_12
			
 
				-COPY ./scripts/rh_linux_deps.sh /
			
 
				-ENV PATH /opt/rh/devtoolset-10/root/usr/bin:/usr/local/cuda/bin:$PATH
			
 
				-ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
			
 
				-RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
			
 
				-RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \
			
 
				-    dnf clean all && \
			
 
				-    dnf install -y \
			
 
				-    zsh \
			
 
				-    cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
			
 
				-    cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
			
 
				-# TODO intel oneapi goes here...
			
 
				-ENV GOARCH amd64
			
 
				-ENV CGO_ENABLED 1
			
 
				-WORKDIR /go/src/github.com/ollama/ollama/
			
 
				-ENTRYPOINT [ "zsh" ]
			
 
				-
			
 
				-### To create a local image for building linux binaries on mac or linux/arm64 with efficient incremental builds
			
 
				-# Note: this does not contain jetson variants
			
 
				-#
			
 
				-# docker build --platform linux/arm64 -t builder-arm64 -f Dockerfile --target unified-builder-arm64 .
			
 
				-# docker run --platform linux/arm64 --rm -it -v $(pwd):/go/src/github.com/ollama/ollama/ builder-arm64
			
 
				-#
			
 
				-FROM --platform=linux/arm64 rockylinux:8 AS unified-builder-arm64
			
 
				-ARG GOLANG_VERSION
			
 
				-ARG CUDA_VERSION_11
			
 
				-ARG CUDA_VERSION_12
			
 
				-COPY ./scripts/rh_linux_deps.sh /
			
 
				-RUN GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
			
 
				-RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo && \
			
 
				-    dnf config-manager --set-enabled appstream && \
			
 
				-    dnf clean all && \
			
 
				-    dnf install -y \
			
 
				-    zsh \
			
 
				-    cuda-toolkit-$(echo ${CUDA_VERSION_11} | cut -f1-2 -d. | sed -e "s/\./-/g") \
			
 
				-    cuda-toolkit-$(echo ${CUDA_VERSION_12} | cut -f1-2 -d. | sed -e "s/\./-/g")
			
 
				-ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH:/usr/local/cuda/bin
			
 
				-ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64
			
 
				-ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:/opt/amdgpu/lib64
			
 
				-ENV GOARCH arm64
			
 
				-ENV CGO_ENABLED 1
			
 
				-WORKDIR /go/src/github.com/ollama/ollama/
			
 
				-ENTRYPOINT [ "zsh" ]
			
 
				-
			
 
				-FROM --platform=linux/amd64 unified-builder-amd64 AS build-amd64
			
 
				-COPY . .
			
 
				-ARG OLLAMA_SKIP_CUDA_GENERATE
			
 
				-ARG OLLAMA_SKIP_ROCM_GENERATE
			
 
				-ARG OLLAMA_FAST_BUILD
			
 
				-ARG VERSION
			
 
				-ARG CUSTOM_CPU_FLAGS
			
 
				-RUN --mount=type=cache,target=/root/.ccache \
			
 
				-    if grep "^flags" /proc/cpuinfo|grep avx>/dev/null; then \
			
 
				-        make -j $(nproc) dist ; \
			
 
				-    else \
			
 
				-        make -j 5 dist ; \
			
 
				-    fi
			
 
				-RUN cd dist/linux-$GOARCH && \
			
 
				-    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
			
 
				-RUN if [ -z ${OLLAMA_SKIP_ROCM_GENERATE} ] ; then \
			
 
				-    cd dist/linux-$GOARCH-rocm && \
			
 
				-    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-rocm.tgz ;\
			
 
				-    fi
			
 
				-
			
 
				-# Jetsons need to be built in discrete stages
			
 
				-FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_5} AS runners-jetpack5-arm64
			
 
				-ARG GOLANG_VERSION
			
 
				-RUN apt-get update && apt-get install -y git curl ccache && \
			
 
				-    curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \
			
 
				-    ln -s /usr/local/go/bin/go /usr/local/bin/go && \
			
 
				-    ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \
			
 
				-    apt-get clean && rm -rf /var/lib/apt/lists/*
			
 
				-WORKDIR /go/src/github.com/ollama/ollama/
			
 
				-COPY . .
			
 
				-ARG CGO_CFLAGS
			
 
				-ENV GOARCH arm64
			
 
				-ARG VERSION
			
 
				-RUN --mount=type=cache,target=/root/.ccache \
			
 
				-    make -j 5 dist_cuda_v11 \
			
 
				-        CUDA_ARCHITECTURES="72;87" \
			
 
				-        GPU_RUNNER_VARIANT=_jetpack5 \
			
 
				-        DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama \
			
 
				-        DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ollama/cuda_jetpack5
			
 
				-
			
 
				-FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK_6} AS runners-jetpack6-arm64
			
 
				-ARG GOLANG_VERSION
			
 
				-RUN apt-get update && apt-get install -y git curl ccache && \
			
 
				-    curl -s -L https://dl.google.com/go/go${GOLANG_VERSION}.linux-arm64.tar.gz | tar xz -C /usr/local && \
			
 
				-    ln -s /usr/local/go/bin/go /usr/local/bin/go && \
			
 
				-    ln -s /usr/local/go/bin/gofmt /usr/local/bin/gofmt && \
			
 
				-    apt-get clean && rm -rf /var/lib/apt/lists/*
			
 
				-WORKDIR /go/src/github.com/ollama/ollama/
			
 
				-COPY . .
			
 
				-ARG CGO_CFLAGS
			
 
				-ENV GOARCH arm64
			
 
				-ARG VERSION
			
 
				+# vim: filetype=dockerfile
			
 
				+
			
 
				+ARG FLAVOR=${TARGETARCH}
			
 
				+
			
 
				+ARG ROCMVERSION=6.1.2
			
 
				+ARG JETPACK5VERSION=r35.4.1
			
 
				+ARG JETPACK6VERSION=r36.2.0
			
 
				+ARG CMAKEVERSION=3.31.2
			
 
				+
			
 
				+FROM --platform=linux/amd64 rocm/dev-centos-7:${ROCMVERSION}-complete AS base-amd64
			
 
				+RUN sed -i -e 's/mirror.centos.org/vault.centos.org/g' -e 's/^#.*baseurl=http/baseurl=http/g' -e 's/^mirrorlist=http/#mirrorlist=http/g' /etc/yum.repos.d/*.repo \
			
 
				+    && yum install -y yum-utils devtoolset-10-gcc devtoolset-10-gcc-c++ \
			
 
				+    && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo \
			
 
				+    && curl -s -L https://github.com/ccache/ccache/releases/download/v4.10.2/ccache-4.10.2-linux-x86_64.tar.xz | tar -Jx -C /usr/local/bin --strip-components 1
			
 
				+ENV PATH=/opt/rh/devtoolset-10/root/usr/bin:/opt/rh/devtoolset-11/root/usr/bin:$PATH
			
 
				+
			
 
				+FROM --platform=linux/arm64 rockylinux:8 AS base-arm64
			
 
				+# install epel-release for ccache
			
 
				+RUN yum install -y yum-utils epel-release \
			
 
				+    && yum install -y clang ccache \
			
 
				+    && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
			
 
				+ENV CC=clang CXX=clang++
			
 
				+
			
 
				+FROM base-${TARGETARCH} AS base
			
 
				+ARG CMAKEVERSION
			
 
				+RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
			
 
				+COPY CMakeLists.txt CMakePresets.json .
			
 
				+COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
			
 
				+ENV LDFLAGS=-s
			
 
				+
			
 
				+FROM base AS cpu
			
 
				+# amd64 uses gcc which requires devtoolset-11 for AVX extensions while arm64 uses clang
			
 
				+RUN if [ "$(uname -m)" = "x86_64" ]; then yum install -y devtoolset-11-gcc devtoolset-11-gcc-c++; fi
			
 
				+ENV PATH=/opt/rh/devtoolset-11/root/usr/bin:$PATH
			
 
				 RUN --mount=type=cache,target=/root/.ccache \
			
 
				-    make -j 5 dist_cuda_v12 \
			
 
				-        CUDA_ARCHITECTURES="87" \
			
 
				-        GPU_RUNNER_VARIANT=_jetpack6 \
			
 
				-        DIST_LIB_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama \
			
 
				-        DIST_GPU_RUNNER_DEPS_DIR=/go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ollama/cuda_jetpack6
			
 
				+    cmake --preset 'CPU' && cmake --build --parallel --preset 'CPU'
			
 
				 
			
 
				-FROM --platform=linux/arm64 unified-builder-arm64 AS build-arm64
			
 
				-COPY . .
			
 
				-ARG OLLAMA_SKIP_CUDA_GENERATE
			
 
				-ARG OLLAMA_FAST_BUILD
			
 
				-ARG VERSION
			
 
				+FROM base AS cuda-11
			
 
				+ARG CUDA11VERSION=11.3
			
 
				+RUN yum install -y cuda-toolkit-${CUDA11VERSION//./-}
			
 
				+ENV PATH=/usr/local/cuda-11/bin:$PATH
			
 
				 RUN --mount=type=cache,target=/root/.ccache \
			
 
				-    make -j 5 dist
			
 
				-COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
			
 
				-COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/ dist/
			
 
				-RUN cd dist/linux-$GOARCH && \
			
 
				-    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH.tgz
			
 
				-RUN cd dist/linux-$GOARCH-jetpack5 && \
			
 
				-    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack5.tgz
			
 
				-RUN cd dist/linux-$GOARCH-jetpack6 && \
			
 
				-    tar -cf - . | pigz --best > ../ollama-linux-$GOARCH-jetpack6.tgz
			
 
				-
			
 
				-FROM --platform=linux/amd64 scratch AS dist-amd64
			
 
				-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
			
 
				-FROM --platform=linux/arm64 scratch AS dist-arm64
			
 
				-COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/ollama-linux-*.tgz /
			
 
				-FROM dist-$TARGETARCH AS dist
			
 
				-
			
 
				-
			
 
				-# For amd64 container images, filter out cuda/rocm to minimize size
			
 
				-FROM build-amd64 AS runners-cuda-amd64
			
 
				-RUN rm -rf \
			
 
				-    ./dist/linux-amd64/lib/ollama/libggml_hipblas.so \
			
 
				-    ./dist/linux-amd64/lib/ollama/runners/rocm*
			
 
				-
			
 
				-FROM build-amd64 AS runners-rocm-amd64
			
 
				-RUN rm -rf \
			
 
				-    ./dist/linux-amd64/lib/ollama/libggml_cuda*.so \
			
 
				-    ./dist/linux-amd64/lib/ollama/libcu*.so* \
			
 
				-    ./dist/linux-amd64/lib/ollama/runners/cuda*
			
 
				-
			
 
				-FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-amd64
			
 
				-RUN apt-get update && \
			
 
				-    apt-get install -y ca-certificates && \
			
 
				-    apt-get clean && rm -rf /var/lib/apt/lists/*
			
 
				-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
			
 
				-COPY --from=runners-cuda-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
			
 
				-
			
 
				-FROM --platform=linux/arm64 ubuntu:22.04 AS runtime-arm64
			
 
				-RUN apt-get update && \
			
 
				-    apt-get install -y ca-certificates && \
			
 
				-    apt-get clean && rm -rf /var/lib/apt/lists/*
			
 
				-COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/bin/ /bin/
			
 
				-COPY --from=build-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64/lib/ /lib/
			
 
				-COPY --from=runners-jetpack5-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack5/lib/ /lib/
			
 
				-COPY --from=runners-jetpack6-arm64 /go/src/github.com/ollama/ollama/dist/linux-arm64-jetpack6/lib/ /lib/
			
 
				-
			
 
				-
			
 
				-# ROCm libraries larger so we keep it distinct from the CPU/CUDA image
			
 
				-FROM --platform=linux/amd64 ubuntu:22.04 AS runtime-rocm
			
 
				-# Frontload the rocm libraries which are large, and rarely change to increase chance of a common layer
			
 
				-# across releases
			
 
				-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64-rocm/lib/ /lib/
			
 
				-RUN apt-get update && \
			
 
				-    apt-get install -y ca-certificates && \
			
 
				-    apt-get clean && rm -rf /var/lib/apt/lists/*
			
 
				-COPY --from=build-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/bin/ /bin/
			
 
				-COPY --from=runners-rocm-amd64 /go/src/github.com/ollama/ollama/dist/linux-amd64/lib/ /lib/
			
 
				+    cmake --preset 'CUDA 11' && cmake --build --parallel --preset 'CUDA 11'
			
 
				 
			
 
				-EXPOSE 11434
			
 
				-ENV OLLAMA_HOST 0.0.0.0
			
 
				+FROM base AS cuda-12
			
 
				+ARG CUDA12VERSION=12.4
			
 
				+RUN yum install -y cuda-toolkit-${CUDA12VERSION//./-}
			
 
				+ENV PATH=/usr/local/cuda-12/bin:$PATH
			
 
				+RUN --mount=type=cache,target=/root/.ccache \
			
 
				+    cmake --preset 'CUDA 12' && cmake --build --parallel --preset 'CUDA 12'
			
 
				 
			
 
				-ENTRYPOINT ["/bin/ollama"]
			
 
				-CMD ["serve"]
			
 
				+FROM base AS rocm-6
			
 
				+RUN --mount=type=cache,target=/root/.ccache \
			
 
				+    cmake --preset 'ROCm 6' && cmake --build --parallel --preset 'ROCm 6'
			
 
				+
			
 
				+FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK5VERSION} AS jetpack-5
			
 
				+ARG CMAKEVERSION
			
 
				+RUN apt-get update && apt-get install -y curl ccache \
			
 
				+    && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
			
 
				+COPY CMakeLists.txt CMakePresets.json .
			
 
				+COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
			
 
				+RUN --mount=type=cache,target=/root/.ccache \
			
 
				+    cmake --preset 'JetPack 5' && cmake --build --parallel --preset 'JetPack 5'
			
 
				+
			
 
				+FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK6VERSION} AS jetpack-6
			
 
				+ARG CMAKEVERSION
			
 
				+RUN apt-get update && apt-get install -y curl ccache \
			
 
				+    && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
			
 
				+COPY CMakeLists.txt CMakePresets.json .
			
 
				+COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
			
 
				+RUN --mount=type=cache,target=/root/.ccache \
			
 
				+    cmake --preset 'JetPack 6' && cmake --build --parallel --preset 'JetPack 6'
			
 
				 
			
 
				-FROM runtime-$TARGETARCH
			
 
				-EXPOSE 11434
			
 
				-ENV OLLAMA_HOST 0.0.0.0
			
 
				+FROM base AS build
			
 
				+ARG GOVERSION=1.23.4
			
 
				+RUN curl -fsSL https://golang.org/dl/go${GOVERSION}.linux-$(case $(uname -m) in x86_64) echo amd64 ;; aarch64) echo arm64 ;; esac).tar.gz | tar xz -C /usr/local
			
 
				+ENV PATH=/usr/local/go/bin:$PATH
			
 
				+WORKDIR /go/src/github.com/ollama/ollama
			
 
				+COPY . .
			
 
				+ARG GOFLAGS="'-ldflags=-w -s'"
			
 
				+ENV CGO_ENABLED=1
			
 
				+RUN --mount=type=cache,target=/root/.cache/go-build \
			
 
				+    go build -trimpath -buildmode=pie -o /bin/ollama .
			
 
				+
			
 
				+FROM --platform=linux/amd64 scratch AS amd64
			
 
				+COPY --from=cuda-11 --chmod=644 \
			
 
				+    build/lib/libggml-cuda.so \
			
 
				+    /usr/local/cuda/lib64/libcublas.so.11 \
			
 
				+    /usr/local/cuda/lib64/libcublasLt.so.11 \
			
 
				+    /usr/local/cuda/lib64/libcudart.so.11.0 \
			
 
				+    /lib/ollama/cuda_v11/
			
 
				+COPY --from=cuda-12 --chmod=644 \
			
 
				+    build/lib/libggml-cuda.so \
			
 
				+    /usr/local/cuda/lib64/libcublas.so.12 \
			
 
				+    /usr/local/cuda/lib64/libcublasLt.so.12 \
			
 
				+    /usr/local/cuda/lib64/libcudart.so.12 \
			
 
				+    /lib/ollama/cuda_v12/
			
 
				+
			
 
				+FROM --platform=linux/arm64 scratch AS arm64
			
 
				+COPY --from=cuda-11 --chmod=644 \
			
 
				+    build/lib/libggml-cuda.so \
			
 
				+    /usr/local/cuda/lib64/libcublas.so.11 \
			
 
				+    /usr/local/cuda/lib64/libcublasLt.so.11 \
			
 
				+    /usr/local/cuda/lib64/libcudart.so.11.0 \
			
 
				+    /lib/ollama/cuda_v11/
			
 
				+COPY --from=cuda-12 --chmod=644 \
			
 
				+    build/lib/libggml-cuda.so \
			
 
				+    /usr/local/cuda/lib64/libcublas.so.12 \
			
 
				+    /usr/local/cuda/lib64/libcublasLt.so.12 \
			
 
				+    /usr/local/cuda/lib64/libcudart.so.12 \
			
 
				+    /lib/ollama/cuda_v12/
			
 
				+COPY --from=jetpack-5 --chmod=644 \
			
 
				+    build/lib/libggml-cuda.so \
			
 
				+    /usr/local/cuda/lib64/libcublas.so.11 \
			
 
				+    /usr/local/cuda/lib64/libcublasLt.so.11 \
			
 
				+    /usr/local/cuda/lib64/libcudart.so.11.0 \
			
 
				+    /lib/ollama/cuda_jetpack5/
			
 
				+COPY --from=jetpack-6 --chmod=644 \
			
 
				+    build/lib/libggml-cuda.so \
			
 
				+    /usr/local/cuda/lib64/libcublas.so.12 \
			
 
				+    /usr/local/cuda/lib64/libcublasLt.so.12 \
			
 
				+    /usr/local/cuda/lib64/libcudart.so.12 \
			
 
				+    /lib/ollama/cuda_jetpack6/
			
 
				+
			
 
				+FROM --platform=linux/arm64 scratch AS rocm
			
 
				+COPY --from=rocm-6 --chmod=644 \
			
 
				+    build/lib/libggml-hip.so \
			
 
				+    /opt/rocm/lib/libamdhip64.so.6 \
			
 
				+    /opt/rocm/lib/libhipblas.so.2 \
			
 
				+    /opt/rocm/lib/librocblas.so.4 \
			
 
				+    /opt/rocm/lib/libamd_comgr.so.2 \
			
 
				+    /opt/rocm/lib/libhsa-runtime64.so.1 \
			
 
				+    /opt/rocm/lib/librocprofiler-register.so.0 \
			
 
				+    /opt/amdgpu/lib64/libdrm_amdgpu.so.1 \
			
 
				+    /opt/amdgpu/lib64/libdrm.so.2 \
			
 
				+    /usr/lib64/libnuma.so.1 \
			
 
				+    /lib/ollama/rocm/
			
 
				+COPY --from=rocm-6 /opt/rocm/lib/rocblas/ /lib/ollama/rocm/rocblas/
			
 
				+
			
 
				+FROM ${FLAVOR} AS archive
			
 
				+COPY --from=cpu --chmod=644 \
			
 
				+    build/lib/libggml-base.so \
			
 
				+    build/lib/libggml-cpu-*.so \
			
 
				+    /lib/ollama/
			
 
				+COPY --from=build /bin/ollama /bin/ollama
			
 
				+
			
 
				+FROM ubuntu:20.04
			
 
				+RUN apt-get update \
			
 
				+    && apt-get install -y ca-certificates \
			
 
				+    && apt-get clean \
			
 
				+    && rm -rf /var/lib/apt/lists/*
			
 
				+COPY --from=archive /bin/ /usr/bin/
			
 
				 ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
			
 
				-ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
			
 
				+COPY --from=archive /lib/ollama/ /usr/lib/ollama/
			
 
				+ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/lib/ollama
			
 
				 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
			
 
				 ENV NVIDIA_VISIBLE_DEVICES=all
			
 
				-
			
 
				+ENV OLLAMA_HOST=0.0.0.0:11434
			
 
				+EXPOSE 11434
			
 
				 ENTRYPOINT ["/bin/ollama"]
			
 
				 CMD ["serve"]
			
--- a/Makefile
+++ b/Makefile
@@ -1,103 +0,0 @@
 
				-# top level makefile for Ollama
			
 
				-include make/common-defs.make
			
 
				-
			
 
				-
			
 
				-# Determine which if any GPU runners we should build
			
 
				-include make/cuda-v11-defs.make
			
 
				-include make/cuda-v12-defs.make
			
 
				-include make/rocm-defs.make
			
 
				-
			
 
				-ifeq ($(CUSTOM_CPU_FLAGS),)
			
 
				-ifeq ($(ARCH),amd64)
			
 
				-	RUNNER_TARGETS=cpu
			
 
				-endif
			
 
				-# Without CUSTOM_CPU_FLAGS we default to build both v11 and v12 if present
			
 
				-ifeq ($(OLLAMA_SKIP_CUDA_GENERATE),)
			
 
				-ifneq ($(CUDA_11_COMPILER),)
			
 
				-	RUNNER_TARGETS += cuda_v11
			
 
				-endif
			
 
				-ifneq ($(CUDA_12_COMPILER),)
			
 
				-	RUNNER_TARGETS += cuda_v12
			
 
				-endif
			
 
				-endif
			
 
				-else # CUSTOM_CPU_FLAGS is set, we'll build only the latest cuda version detected
			
 
				-ifneq ($(CUDA_12_COMPILER),)
			
 
				-	RUNNER_TARGETS += cuda_v12
			
 
				-else ifneq ($(CUDA_11_COMPILER),)
			
 
				-	RUNNER_TARGETS += cuda_v11
			
 
				-endif
			
 
				-endif
			
 
				-
			
 
				-ifeq ($(OLLAMA_SKIP_ROCM_GENERATE),)
			
 
				-ifneq ($(HIP_COMPILER),)
			
 
				-	RUNNER_TARGETS += rocm
			
 
				-endif
			
 
				-endif
			
 
				-
			
 
				-
			
 
				-all: runners exe
			
 
				-
			
 
				-dist: $(addprefix dist_, $(RUNNER_TARGETS)) dist_exe
			
 
				-
			
 
				-dist_%:
			
 
				-	@$(MAKE) --no-print-directory -f make/Makefile.$* dist
			
 
				-
			
 
				-runners: $(RUNNER_TARGETS)
			
 
				-
			
 
				-$(RUNNER_TARGETS):
			
 
				-	@$(MAKE) --no-print-directory -f make/Makefile.$@
			
 
				-
			
 
				-exe dist_exe:
			
 
				-	@$(MAKE) --no-print-directory -f make/Makefile.ollama $@
			
 
				-
			
 
				-help-sync apply-patches create-patches sync sync-clean:
			
 
				-	@$(MAKE) --no-print-directory -f make/Makefile.sync $@
			
 
				-
			
 
				-test integration lint:
			
 
				-	@$(MAKE) --no-print-directory -f make/Makefile.test $@
			
 
				-
			
 
				-clean:
			
 
				-	rm -rf $(BUILD_DIR) $(DIST_LIB_DIR) $(OLLAMA_EXE) $(DIST_OLLAMA_EXE)
			
 
				-	go clean -cache
			
 
				-
			
 
				-help:
			
 
				-	@echo "The following make targets will help you build Ollama"
			
 
				-	@echo ""
			
 
				-	@echo "	make all   		# (default target) Build Ollama llm subprocess runners, and the primary ollama executable"
			
 
				-	@echo "	make runners		# Build Ollama llm subprocess runners; after you may use 'go build .' to build the primary ollama exectuable"
			
 
				-	@echo "	make <runner>		# Build specific runners. Enabled: '$(RUNNER_TARGETS)'"
			
 
				-	@echo "	make dist		# Build the runners and primary ollama executable for distribution"
			
 
				-	@echo "	make help-sync 		# Help information on vendor update targets"
			
 
				-	@echo "	make help-runners 	# Help information on runner targets"
			
 
				-	@echo ""
			
 
				-	@echo "The following make targets will help you test Ollama"
			
 
				-	@echo ""
			
 
				-	@echo "	make test   		# Run unit tests"
			
 
				-	@echo "	make integration	# Run integration tests.  You must 'make all' first"
			
 
				-	@echo "	make lint   		# Run lint and style tests"
			
 
				-	@echo ""
			
 
				-	@echo "For more information see 'docs/development.md'"
			
 
				-	@echo ""
			
 
				-
			
 
				-
			
 
				-help-runners:
			
 
				-	@echo "The following runners will be built based on discovered GPU libraries: '$(RUNNER_TARGETS)'"
			
 
				-	@echo ""
			
 
				-	@echo "GPU Runner CPU Flags: '$(GPU_RUNNER_CPU_FLAGS)'  (Override with CUSTOM_CPU_FLAGS)"
			
 
				-	@echo ""
			
 
				-	@echo "# CUDA_PATH sets the location where CUDA toolkits are present"
			
 
				-	@echo "CUDA_PATH=$(CUDA_PATH)"
			
 
				-	@echo "	CUDA_11_PATH=$(CUDA_11_PATH)"
			
 
				-	@echo "	CUDA_11_COMPILER=$(CUDA_11_COMPILER)"
			
 
				-	@echo "	CUDA_12_PATH=$(CUDA_12_PATH)"
			
 
				-	@echo "	CUDA_12_COMPILER=$(CUDA_12_COMPILER)"
			
 
				-	@echo ""
			
 
				-	@echo "# HIP_PATH sets the location where the ROCm toolkit is present"
			
 
				-	@echo "HIP_PATH=$(HIP_PATH)"
			
 
				-	@echo "	HIP_COMPILER=$(HIP_COMPILER)"
			
 
				-
			
 
				-.PHONY: all exe dist help help-sync help-runners test integration lint runners clean $(RUNNER_TARGETS)
			
 
				-
			
 
				-# Handy debugging for make variables
			
 
				-print-%:
			
 
				-	@echo '$*=$($*)'
			
--- a/Makefile2
+++ b/Makefile2
@@ -0,0 +1,46 @@
 
				+UPSTREAM=https://github.com/ggerganov/llama.cpp.git
			
 
				+WORKDIR=llama/vendor
			
 
				+FETCH_HEAD=46e3556e01b824e52395fb050b29804b6cff2a7c
			
 
				+
			
 
				+all: sync
			
 
				+
			
 
				+.PHONY: sync
			
 
				+sync: llama/llama.cpp ml/backend/ggml/ggml
			
 
				+
			
 
				+.PHONY: llama/llama.cpp
			
 
				+llama/llama.cpp: llama/vendor/ apply_patches
			
 
				+	rsync -arvzc -f "merge $@/.rsync-filter" $< $@
			
 
				+
			
 
				+.PHONY: ml/backend/ggml/ggml apply_patches
			
 
				+ml/backend/ggml/ggml: llama/vendor/ggml/ apply_patches
			
 
				+	rsync -arvzc -f "merge $@/.rsync-filter" $< $@
			
 
				+
			
 
				+PATCHES=$(wildcard llama/patches/*.patch)
			
 
				+
			
 
				+.PHONY: apply_patches
			
 
				+.NOTPARALLEL:
			
 
				+apply_patches: $(addsuffix ed, $(PATCHES))
			
 
				+
			
 
				+%.patched: %.patch
			
 
				+	@if git -c user.name=nobody -c 'user.email=<>' -C $(WORKDIR) am -3 $(realpath $<); then touch $@; else git -C $(WORKDIR) am --abort; exit 1; fi
			
 
				+
			
 
				+.PHONY: checkout
			
 
				+checkout: $(WORKDIR)
			
 
				+	git -C $(WORKDIR) fetch
			
 
				+	git -C $(WORKDIR) checkout -f $(FETCH_HEAD)
			
 
				+
			
 
				+$(WORKDIR):
			
 
				+	git clone $(UPSTREAM) $(WORKDIR)
			
 
				+
			
 
				+.PHONE: format_patches
			
 
				+format_patches: llama/patches
			
 
				+	git -C $(WORKDIR) format-patch \
			
 
				+		--no-signature \
			
 
				+		--no-numbered \
			
 
				+		--zero-commit \
			
 
				+		-o $(realpath $<) \
			
 
				+		$(FETCH_HEAD)
			
 
				+
			
 
				+.PHONE: clean
			
 
				+clean: checkout
			
 
				+	$(RM) $(addsuffix ed, $(PATCHES))
			
--- a/go.mod
+++ b/go.mod
@@ -17,12 +17,14 @@ require (
 
				 require (
			
 
				 	github.com/agnivade/levenshtein v1.1.1
			
 
				 	github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
			
 
				+	github.com/dlclark/regexp2 v1.11.4
			
 
				 	github.com/emirpasic/gods/v2 v2.0.0-alpha
			
 
				 	github.com/google/go-cmp v0.6.0
			
 
				 	github.com/mattn/go-runewidth v0.0.14
			
 
				 	github.com/nlpodyssey/gopickle v0.3.0
			
 
				 	github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
			
 
				 	golang.org/x/image v0.22.0
			
 
				+	gonum.org/v1/gonum v0.15.0
			
 
				 )
			
 
				 
			
 
				 require (
			
@@ -42,7 +44,6 @@ require (
 
				 	github.com/xtgo/set v1.0.0 // indirect
			
 
				 	go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 // indirect
			
 
				 	golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
			
 
				-	gonum.org/v1/gonum v0.15.0 // indirect
			
 
				 	gorgonia.org/vecf32 v0.9.0 // indirect
			
 
				 	gorgonia.org/vecf64 v0.9.0 // indirect
			
 
				 )
			
--- a/go.sum
+++ b/go.sum
@@ -42,6 +42,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
 
				 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
			
 
				 github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48 h1:fRzb/w+pyskVMQ+UbP35JkH8yB7MYb4q/qhBarqZE6g=
			
 
				 github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
			
 
				+github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo=
			
 
				+github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
			
 
				 github.com/emirpasic/gods/v2 v2.0.0-alpha h1:dwFlh8pBg1VMOXWGipNMRt8v96dKAIvBehtCt6OtunU=
			
 
				 github.com/emirpasic/gods/v2 v2.0.0-alpha/go.mod h1:W0y4M2dtBB9U5z3YlghmpuUhiaZT2h6yoeE+C1sCp6A=
			
 
				 github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
			
--- a/llama/README.md
+++ b/llama/README.md
@@ -37,8 +37,7 @@ go build -tags avx .
 
				 ```shell
			
 
				 # go doesn't recognize `-mfma` as a valid compiler flag
			
 
				 # see https://github.com/golang/go/issues/17895
			
 
				-go env -w "CGO_CFLAGS_ALLOW=-mfma|-mf16c"
			
 
				-go env -w "CGO_CXXFLAGS_ALLOW=-mfma|-mf16c"
			
 
				+go env -w "CGO_CPPFLAGS_ALLOW=-mfma|-mf16c"
			
 
				 go build -tags=avx,avx2 .
			
 
				 ```
			
 
				 
			
--- a/llama/amx.h
+++ b/llama/amx.h
@@ -1,34 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "ggml-backend.h"
			
 
				-#include "ggml-cpu-impl.h"
			
 
				-
			
 
				-// GGML internal header
			
 
				-
			
 
				-#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
			
 
				-ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void);
			
 
				-#endif
			
--- a/llama/ggml-blas.h
+++ b/llama/ggml-blas.h
@@ -1,51 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#pragma once
			
 
				-
			
 
				-#include "ggml.h"
			
 
				-#include "ggml-backend.h"
			
 
				-
			
 
				-
			
 
				-#ifdef  __cplusplus
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-// backend API
			
 
				-GGML_BACKEND_API ggml_backend_t ggml_backend_blas_init(void);
			
 
				-
			
 
				-GGML_BACKEND_API bool ggml_backend_is_blas(ggml_backend_t backend);
			
 
				-
			
 
				-// number of threads used for conversion to float
			
 
				-// for openblas and blis, this will also set the number of threads used for blas operations
			
 
				-GGML_BACKEND_API void ggml_backend_blas_set_n_threads(ggml_backend_t backend_blas, int n_threads);
			
 
				-
			
 
				-GGML_BACKEND_API ggml_backend_reg_t ggml_backend_blas_reg(void);
			
 
				-
			
 
				-
			
 
				-#ifdef  __cplusplus
			
 
				-}
			
 
				-#endif
			
--- a/llama/ggml-cpu-aarch64.h
+++ b/llama/ggml-cpu-aarch64.h
@@ -1,34 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#pragma once
			
 
				-
			
 
				-#include "ggml-cpu-traits.h"
			
 
				-#include "ggml.h"
			
 
				-
			
 
				-// GGML internal header
			
 
				-
			
 
				-ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void);
			
--- a/llama/ggml-cpu-traits.h
+++ b/llama/ggml-cpu-traits.h
@@ -1,64 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#pragma once
			
 
				-#include "ggml-backend-impl.h"
			
 
				-#include "ggml-cpu-impl.h"
			
 
				-#include "ggml.h"
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-#    include <vector>
			
 
				-extern "C" {
			
 
				-#endif
			
 
				-
			
 
				-// return true if op part of extra "accelerator"
			
 
				-bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op);
			
 
				-bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size_t * size);
			
 
				-
			
 
				-#ifdef __cplusplus
			
 
				-}
			
 
				-
			
 
				-namespace ggml::cpu {
			
 
				-// register in tensor->extra
			
 
				-class tensor_traits {
			
 
				-  public:
			
 
				-    virtual ~tensor_traits();
			
 
				-    virtual bool work_size(int n_threads, const struct ggml_tensor * op, size_t & size)        = 0;
			
 
				-    virtual bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) = 0;
			
 
				-};
			
 
				-
			
 
				-class extra_buffer_type {
			
 
				-  public:
			
 
				-    virtual ~extra_buffer_type();
			
 
				-    virtual bool            supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) = 0;
			
 
				-    virtual tensor_traits * get_tensor_traits(const struct ggml_tensor * op)                   = 0;
			
 
				-};
			
 
				-}  // namespace ggml::cpu
			
 
				-
			
 
				-// implemented in ggml-cpu.cpp.
			
 
				-std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffers_type();
			
 
				-
			
 
				-#endif
			
--- a/llama/ggml-cuda/acc.cuh
+++ b/llama/ggml-cuda/acc.cuh
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-#define CUDA_ACC_BLOCK_SIZE 256
			
 
				-
			
 
				-void ggml_cuda_op_acc(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/arange.cu
+++ b/llama/ggml-cuda/arange.cu
@@ -1,60 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "arange.cuh"
			
 
				-
			
 
				-static __global__ void arange_f32(float * dst, const int ne0, const float start, const float step) {
			
 
				-    // blockIDx.x: idx of ne0 / BLOCK_SIZE
			
 
				-    int nidx = threadIdx.x + blockIdx.x * blockDim.x;
			
 
				-    if (nidx >= ne0) {
			
 
				-        return;
			
 
				-    }
			
 
				-    dst[nidx] = start + step * nidx;
			
 
				-}
			
 
				-
			
 
				-static void arange_f32_cuda(float * dst, const int ne0, const float start, const float step, cudaStream_t stream) {
			
 
				-    int num_blocks = (ne0 + CUDA_ARANGE_BLOCK_SIZE - 1) / CUDA_ARANGE_BLOCK_SIZE;
			
 
				-    arange_f32<<<num_blocks, CUDA_ARANGE_BLOCK_SIZE, 0, stream>>>(dst, ne0, start,  step);
			
 
				-}
			
 
				-
			
 
				-void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
			
 
				-    float * dst_d = (float *)dst->data;
			
 
				-    cudaStream_t stream = ctx.stream();
			
 
				-
			
 
				-    GGML_ASSERT(dst->type == GGML_TYPE_F32);
			
 
				-
			
 
				-    float start;
			
 
				-    float stop;
			
 
				-    float step;
			
 
				-    memcpy(&start, (float *)dst->op_params + 0, sizeof(float));
			
 
				-    memcpy(&stop,  (float *)dst->op_params + 1, sizeof(float));
			
 
				-    memcpy(&step,  (float *)dst->op_params + 2, sizeof(float));
			
 
				-
			
 
				-    int64_t steps = (int64_t)ceil((stop - start) / step);
			
 
				-    GGML_ASSERT(ggml_nelements(dst) == steps);
			
 
				-
			
 
				-    arange_f32_cuda(dst_d, dst->ne[0], start, step, stream);
			
 
				-}
			
--- a/llama/ggml-cuda/arange.cuh
+++ b/llama/ggml-cuda/arange.cuh
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-#define CUDA_ARANGE_BLOCK_SIZE 256
			
 
				-
			
 
				-void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/argmax.cuh
+++ b/llama/ggml-cuda/argmax.cuh
@@ -1,29 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-void ggml_cuda_argmax(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/argsort.cuh
+++ b/llama/ggml-cuda/argsort.cuh
@@ -1,29 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/binbcast.cuh
+++ b/llama/ggml-cuda/binbcast.cuh
@@ -1,35 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-void ggml_cuda_op_repeat(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
 
				-void ggml_cuda_op_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
 
				-void ggml_cuda_op_sub(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
 
				-void ggml_cuda_op_mul(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
 
				-void ggml_cuda_op_div(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
 
				-
			
 
				-void ggml_cuda_op_repeat_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/clamp.cu
+++ b/llama/ggml-cuda/clamp.cu
@@ -1,60 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "clamp.cuh"
			
 
				-
			
 
				-static __global__ void clamp_f32(const float * x, float * dst, const float min, const float max, const int k) {
			
 
				-    const int i = blockDim.x*blockIdx.x + threadIdx.x;
			
 
				-
			
 
				-    if (i >= k) {
			
 
				-        return;
			
 
				-    }
			
 
				-
			
 
				-    dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]);
			
 
				-}
			
 
				-
			
 
				-static void clamp_f32_cuda(const float * x, float * dst, const float min, const float max, const int k, cudaStream_t stream) {
			
 
				-    const int num_blocks = (k + CUDA_CLAMP_BLOCK_SIZE - 1) / CUDA_CLAMP_BLOCK_SIZE;
			
 
				-    clamp_f32<<<num_blocks, CUDA_CLAMP_BLOCK_SIZE, 0, stream>>>(x, dst, min, max, k);
			
 
				-}
			
 
				-
			
 
				-
			
 
				-void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
			
 
				-    const ggml_tensor * src0 = dst->src[0];
			
 
				-    const float * src0_d = (const float *)src0->data;
			
 
				-    float * dst_d = (float *)dst->data;
			
 
				-    cudaStream_t stream = ctx.stream();
			
 
				-
			
 
				-    GGML_ASSERT(src0->type == GGML_TYPE_F32);
			
 
				-    GGML_ASSERT( dst->type == GGML_TYPE_F32);
			
 
				-
			
 
				-    float min;
			
 
				-    float max;
			
 
				-    memcpy(&min, dst->op_params, sizeof(float));
			
 
				-    memcpy(&max, (float *) dst->op_params + 1, sizeof(float));
			
 
				-
			
 
				-    clamp_f32_cuda(src0_d, dst_d, min, max, ggml_nelements(src0), stream);
			
 
				-}
			
--- a/llama/ggml-cuda/clamp.cuh
+++ b/llama/ggml-cuda/clamp.cuh
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-#define CUDA_CLAMP_BLOCK_SIZE 256
			
 
				-
			
 
				-void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/concat.cuh
+++ b/llama/ggml-cuda/concat.cuh
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-#define CUDA_CONCAT_BLOCK_SIZE 256
			
 
				-
			
 
				-void ggml_cuda_op_concat(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/conv-transpose-1d.cuh
+++ b/llama/ggml-cuda/conv-transpose-1d.cuh
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-#define CUDA_CONV_TRANPOSE_1D_BLOCK_SIZE 256
			
 
				-
			
 
				-void ggml_cuda_op_conv_transpose_1d(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/convert.cuh
+++ b/llama/ggml-cuda/convert.cuh
@@ -1,39 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-#define CUDA_DEQUANTIZE_BLOCK_SIZE 256
			
 
				-
			
 
				-template<typename T>
			
 
				-using to_t_cuda_t = void (*)(const void * __restrict__ x, T * __restrict__ y, int64_t k, cudaStream_t stream);
			
 
				-
			
 
				-typedef to_t_cuda_t<float> to_fp32_cuda_t;
			
 
				-typedef to_t_cuda_t<half> to_fp16_cuda_t;
			
 
				-
			
 
				-to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type);
			
 
				-
			
 
				-to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type);
			
--- a/llama/ggml-cuda/count-equal.cuh
+++ b/llama/ggml-cuda/count-equal.cuh
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-#define CUDA_COUNT_EQUAL_CHUNK_SIZE 128
			
 
				-
			
 
				-void ggml_cuda_count_equal(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/cpy.cuh
+++ b/llama/ggml-cuda/cpy.cuh
@@ -1,35 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-#define CUDA_CPY_BLOCK_SIZE 64
			
 
				-
			
 
				-void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, ggml_tensor * src1);
			
 
				-
			
 
				-void ggml_cuda_dup(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
 
				-
			
 
				-void* ggml_cuda_cpy_fn(const ggml_tensor * src0, ggml_tensor * src1);
			
--- a/llama/ggml-cuda/cross-entropy-loss.cuh
+++ b/llama/ggml-cuda/cross-entropy-loss.cuh
@@ -1,33 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-#define CUDA_CROSS_ENTROPY_LOSS_BLOCK_SIZE 256
			
 
				-
			
 
				-void ggml_cuda_cross_entropy_loss(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
 
				-
			
 
				-void ggml_cuda_cross_entropy_loss_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/diagmask.cuh
+++ b/llama/ggml-cuda/diagmask.cuh
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-#define CUDA_DIAG_MASK_INF_BLOCK_SIZE 32
			
 
				-
			
 
				-void ggml_cuda_op_diag_mask_inf(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/fattn-tile-f16.cuh
+++ b/llama/ggml-cuda/fattn-tile-f16.cuh
@@ -1,29 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-void ggml_cuda_flash_attn_ext_tile_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/fattn-tile-f32.cuh
+++ b/llama/ggml-cuda/fattn-tile-f32.cuh
@@ -1,29 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-void ggml_cuda_flash_attn_ext_tile_f32(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/fattn.cuh
+++ b/llama/ggml-cuda/fattn.cuh
@@ -1,29 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/getrows.cuh
+++ b/llama/ggml-cuda/getrows.cuh
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-#define CUDA_GET_ROWS_BLOCK_SIZE 256
			
 
				-
			
 
				-void ggml_cuda_op_get_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/im2col.cuh
+++ b/llama/ggml-cuda/im2col.cuh
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-#define CUDA_IM2COL_BLOCK_SIZE 256
			
 
				-
			
 
				-void ggml_cuda_op_im2col(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/mmv.cuh
+++ b/llama/ggml-cuda/mmv.cuh
@@ -1,38 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-// maximum number of src0 rows with which to use mul_mat_vec over cuBLAS if FP16 tensor cores are available
			
 
				-#define MMV_MAX_ROWS 512
			
 
				-
			
 
				-void ggml_cuda_mul_mat_vec(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
			
 
				-
			
 
				-void ggml_cuda_op_mul_mat_vec(
			
 
				-    ggml_backend_cuda_context & ctx,
			
 
				-    const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i,
			
 
				-    const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols,
			
 
				-    const int64_t src1_padded_row_size, cudaStream_t stream);
			
--- a/llama/ggml-cuda/mmvq.cuh
+++ b/llama/ggml-cuda/mmvq.cuh
@@ -1,35 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-#define MMVQ_MAX_BATCH_SIZE 8 // Max. batch size for which to use MMVQ kernels.
			
 
				-
			
 
				-void ggml_cuda_op_mul_mat_vec_q(
			
 
				-    ggml_backend_cuda_context & ctx,
			
 
				-    const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i,
			
 
				-    const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols,
			
 
				-    const int64_t src1_padded_row_size, cudaStream_t stream);
			
--- a/llama/ggml-cuda/norm.cuh
+++ b/llama/ggml-cuda/norm.cuh
@@ -1,33 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-void ggml_cuda_op_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
 
				-
			
 
				-void ggml_cuda_op_group_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
 
				-
			
 
				-void ggml_cuda_op_rms_norm(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/opt-step-adamw.cuh
+++ b/llama/ggml-cuda/opt-step-adamw.cuh
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-#define CUDA_OPT_STEP_ADAMW_BLOCK_SIZE 256
			
 
				-
			
 
				-void ggml_cuda_opt_step_adamw(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/out-prod.cuh
+++ b/llama/ggml-cuda/out-prod.cuh
@@ -1,29 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-void ggml_cuda_out_prod(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/pad.cuh
+++ b/llama/ggml-cuda/pad.cuh
@@ -1,32 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-#define CUDA_PAD_BLOCK_SIZE 256
			
 
				-
			
 
				-void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
 
				-void ggml_cuda_op_unpad(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/pool2d.cuh
+++ b/llama/ggml-cuda/pool2d.cuh
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-#define CUDA_POOL2D_BLOCK_SIZE 256
			
 
				-
			
 
				-void ggml_cuda_op_pool2d(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/quantize.cuh
+++ b/llama/ggml-cuda/quantize.cuh
@@ -1,50 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#pragma once
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-#include "mmq.cuh"
			
 
				-
			
 
				-#include <cstdint>
			
 
				-
			
 
				-#define CUDA_QUANTIZE_BLOCK_SIZE     256
			
 
				-#define CUDA_QUANTIZE_BLOCK_SIZE_MMQ 128
			
 
				-
			
 
				-static_assert(MATRIX_ROW_PADDING %    CUDA_QUANTIZE_BLOCK_SIZE      == 0, "Risk of out-of-bounds access.");
			
 
				-static_assert(MATRIX_ROW_PADDING % (4*CUDA_QUANTIZE_BLOCK_SIZE_MMQ) == 0, "Risk of out-of-bounds access.");
			
 
				-
			
 
				-typedef void (*quantize_cuda_t)(
			
 
				-    const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
			
 
				-    const ggml_type type_x, cudaStream_t stream);
			
 
				-
			
 
				-void quantize_row_q8_1_cuda(
			
 
				-    const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
			
 
				-    const ggml_type type_x, cudaStream_t stream);
			
 
				-
			
 
				-void quantize_mmq_q8_1_cuda(
			
 
				-    const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
			
 
				-    const ggml_type type_x, cudaStream_t stream);
			
--- a/llama/ggml-cuda/rope.cuh
+++ b/llama/ggml-cuda/rope.cuh
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-#define CUDA_ROPE_BLOCK_SIZE 256
			
 
				-
			
 
				-void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/scale.cu
+++ b/llama/ggml-cuda/scale.cu
@@ -1,57 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "scale.cuh"
			
 
				-
			
 
				-static __global__ void scale_f32(const float * x, float * dst, const float scale, const int k) {
			
 
				-    const int i = blockDim.x*blockIdx.x + threadIdx.x;
			
 
				-
			
 
				-    if (i >= k) {
			
 
				-        return;
			
 
				-    }
			
 
				-
			
 
				-    dst[i] = scale * x[i];
			
 
				-}
			
 
				-
			
 
				-static void scale_f32_cuda(const float * x, float * dst, const float scale, const int k, cudaStream_t stream) {
			
 
				-    const int num_blocks = (k + CUDA_SCALE_BLOCK_SIZE - 1) / CUDA_SCALE_BLOCK_SIZE;
			
 
				-    scale_f32<<<num_blocks, CUDA_SCALE_BLOCK_SIZE, 0, stream>>>(x, dst, scale, k);
			
 
				-}
			
 
				-
			
 
				-void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
			
 
				-    const ggml_tensor * src0 = dst->src[0];
			
 
				-    const float * src0_d = (const float *)src0->data;
			
 
				-    float * dst_d = (float *)dst->data;
			
 
				-    cudaStream_t stream = ctx.stream();
			
 
				-
			
 
				-    GGML_ASSERT(src0->type == GGML_TYPE_F32);
			
 
				-    GGML_ASSERT( dst->type == GGML_TYPE_F32);
			
 
				-
			
 
				-    float scale;
			
 
				-    memcpy(&scale, dst->op_params, sizeof(float));
			
 
				-
			
 
				-    scale_f32_cuda(src0_d, dst_d, scale, ggml_nelements(src0), stream);
			
 
				-}
			
--- a/llama/ggml-cuda/scale.cuh
+++ b/llama/ggml-cuda/scale.cuh
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-#define CUDA_SCALE_BLOCK_SIZE 256
			
 
				-
			
 
				-void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/softmax.cuh
+++ b/llama/ggml-cuda/softmax.cuh
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-#define CUDA_SOFT_MAX_BLOCK_SIZE 1024
			
 
				-
			
 
				-void ggml_cuda_op_soft_max(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/sum.cuh
+++ b/llama/ggml-cuda/sum.cuh
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-void sum_f32_cuda(ggml_cuda_pool & pool, const float * x, float * dst, const int64_t ne, cudaStream_t stream);
			
 
				-
			
 
				-void ggml_cuda_op_sum(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/sumrows.cu
+++ b/llama/ggml-cuda/sumrows.cu
@@ -1,65 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "sumrows.cuh"
			
 
				-
			
 
				-static __global__ void k_sum_rows_f32(const float * x, float * dst, const int ncols) {
			
 
				-    const int row = blockIdx.x;
			
 
				-    const int col = threadIdx.x;
			
 
				-
			
 
				-    float sum = 0.0f;
			
 
				-    for (int i = col; i < ncols; i += blockDim.x) {
			
 
				-        sum += x[row * ncols + i];
			
 
				-    }
			
 
				-
			
 
				-    sum = warp_reduce_sum(sum);
			
 
				-
			
 
				-    if (col == 0) {
			
 
				-        dst[row] = sum;
			
 
				-    }
			
 
				-}
			
 
				-
			
 
				-void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream) {
			
 
				-    const dim3 block_dims(WARP_SIZE, 1, 1);
			
 
				-    const dim3 block_nums(nrows, 1, 1);
			
 
				-    k_sum_rows_f32<<<block_nums, block_dims, 0, stream>>>(x, dst, ncols);
			
 
				-}
			
 
				-
			
 
				-void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
			
 
				-    const ggml_tensor * src0 = dst->src[0];
			
 
				-    const float * src0_d = (const float *)src0->data;
			
 
				-    float * dst_d = (float *)dst->data;
			
 
				-    cudaStream_t stream = ctx.stream();
			
 
				-
			
 
				-    GGML_ASSERT(src0->type == GGML_TYPE_F32);
			
 
				-    GGML_ASSERT( dst->type == GGML_TYPE_F32);
			
 
				-    GGML_ASSERT(ggml_is_contiguous(src0));
			
 
				-
			
 
				-    const int64_t ncols = src0->ne[0];
			
 
				-    const int64_t nrows = ggml_nrows(src0);
			
 
				-
			
 
				-    sum_rows_f32_cuda(src0_d, dst_d, ncols, nrows, stream);
			
 
				-}
			
--- a/llama/ggml-cuda/sumrows.cuh
+++ b/llama/ggml-cuda/sumrows.cuh
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-#include "common.cuh"
			
 
				-
			
 
				-void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream);
			
 
				-
			
 
				-void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_1);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_1);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q8_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_F16);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_1);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q8_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_F16);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_1);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_1);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q8_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_F16);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_1);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q8_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_F16);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_1);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_1);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q8_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_F16);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_1);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q8_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(256, GGML_TYPE_F16, GGML_TYPE_F16);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_F16);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_1);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_1);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f16.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q8_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f32.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f32.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f32.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_1);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f32.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f32.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_1);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f32.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q8_0);
			
--- a/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu
+++ b/llama/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu
@@ -1,31 +0,0 @@
 
				-/**
			
 
				- * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
			
 
				- *
			
 
				- * MIT License
			
 
				- *
			
 
				- * Copyright (c) 2023-2024 The ggml authors
			
 
				- *
			
 
				- * Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				- * of this software and associated documentation files (the "Software"), to deal
			
 
				- * in the Software without restriction, including without limitation the rights
			
 
				- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				- * copies of the Software, and to permit persons to whom the Software is
			
 
				- * furnished to do so, subject to the following conditions:
			
 
				- *
			
 
				- * The above copyright notice and this permission notice shall be included in all
			
 
				- * copies or substantial portions of the Software.
			
 
				- *
			
 
				- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				- * SOFTWARE.
			
 
				- */
			
 
				-
			
 
				-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
			
 
				-
			
 
				-#include "../fattn-vec-f32.cuh"
			
 
				-
			
 
				-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_F16);