1 year ago · 1524f323a3
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -95,8 +95,7 @@ jobs:
 
				           cd $env:GITHUB_WORKSPACE
			
 
				           $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
			
 
				           $env:PATH="$gopath;$env:PATH"
			
 
				-          
			
 
				-          $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
			
 
				+          go generate -x ./...
			
 
				         name: go generate
			
 
				       - uses: actions/upload-artifact@v4
			
 
				         with:
			
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -1,16 +1,5 @@
 
				 name: test
			
 
				 
			
 
				-concurrency:
			
 
				-  # For PRs, later CI runs preempt previous ones. e.g. a force push on a PR
			
 
				-  # cancels running CI jobs and starts all new ones.
			
 
				-  #
			
 
				-  # For non-PR pushes, concurrency.group needs to be unique for every distinct
			
 
				-  # CI run we want to have happen. Use run_id, which in practice means all
			
 
				-  # non-PR CI runs will be allowed to run without preempting each other.
			
 
				-  group: ${{ github.workflow }}-$${{ github.pull_request.number || github.run_id }}
			
 
				-  cancel-in-progress: true
			
 
				-
			
 
				-
			
 
				 on:
			
 
				   pull_request:
			
 
				     paths:
			
@@ -73,12 +62,10 @@ jobs:
 
				           $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
			
 
				           $env:PATH="$gopath;$gccpath;$env:PATH"
			
 
				           echo $env:PATH
			
 
				-
			
 
				-          $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
			
 
				+          go generate -x ./...
			
 
				         if: ${{ startsWith(matrix.os, 'windows-') }}
			
 
				         name: 'Windows Go Generate'
			
 
				-      - run: |
			
 
				-          GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
			
 
				+      - run: go generate -x ./...
			
 
				         if: ${{ ! startsWith(matrix.os, 'windows-') }}
			
 
				         name: 'Unix Go Generate'
			
 
				       - uses: actions/upload-artifact@v4
			
@@ -111,7 +98,7 @@ jobs:
 
				       - run: go get ./...
			
 
				       - run: |
			
 
				           git config --global --add safe.directory /__w/ollama/ollama
			
 
				-          GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
			
 
				+          go generate -x ./...
			
 
				         env:
			
 
				           OLLAMA_SKIP_CPU_GENERATE: '1'
			
 
				       - uses: actions/upload-artifact@v4
			
@@ -142,7 +129,7 @@ jobs:
 
				       - run: go get ./...
			
 
				       - run: |
			
 
				           git config --global --add safe.directory /__w/ollama/ollama
			
 
				-          GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
			
 
				+          go generate -x ./...
			
 
				         env:
			
 
				           OLLAMA_SKIP_CPU_GENERATE: '1'
			
 
				       - uses: actions/upload-artifact@v4
			
@@ -181,9 +168,8 @@ jobs:
 
				           $env:PATH="$gopath;$env:PATH"
			
 
				           $env:OLLAMA_SKIP_CPU_GENERATE="1"
			
 
				           $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
			
 
				-
			
 
				-          $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
			
 
				-        name: go run build.go
			
 
				+          go generate -x ./...
			
 
				+        name: go generate
			
 
				         env:
			
 
				           OLLAMA_SKIP_CPU_GENERATE: '1'
			
 
				       # TODO - do we need any artifacts?
			
@@ -216,7 +202,7 @@ jobs:
 
				       - name: 'Verify CUDA'
			
 
				         run: nvcc -V
			
 
				       - run: go get ./...
			
 
				-      - name: go run build.go
			
 
				+      - name: go generate
			
 
				         run: |
			
 
				           $gopath=(get-command go).source | split-path -parent
			
 
				           $cudabin=(get-command nvcc).source | split-path
			
@@ -225,8 +211,7 @@ jobs:
 
				           $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
			
 
				           $env:PATH="$gopath;$cudabin;$env:PATH"
			
 
				           $env:OLLAMA_SKIP_CPU_GENERATE="1"
			
 
				-          
			
 
				-          $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
			
 
				+          go generate -x ./...
			
 
				         env:
			
 
				           OLLAMA_SKIP_CPU_GENERATE: '1'
			
 
				       # TODO - do we need any artifacts?
			
@@ -300,12 +285,6 @@ jobs:
 
				         with:
			
 
				           go-version-file: go.mod
			
 
				           cache: true
			
 
				-      - run: |
			
 
				-          GOARCH= go run build.go -f -d -target=${{ matrix.arch }}
			
 
				-        if: ${{ ! startsWith(matrix.os, 'windows-') }}
			
 
				-      - run: |
			
 
				-          $env:GOARCH = ""; go run build.go -f -d -target=${{ matrix.arch }}
			
 
				-        if: ${{ startsWith(matrix.os, 'windows-') }}
			
 
				       - run: go get
			
 
				       - run: |
			
 
				           case ${{ matrix.arch }} in
			
@@ -326,8 +305,9 @@ jobs:
 
				           touch llm/build/windows/$ARCH/stub/bin/ollama_llama_server
			
 
				         if: ${{ startsWith(matrix.os, 'windows-') }}
			
 
				         shell: bash
			
 
				-      - run: |
			
 
				-          go test -v ./...
			
 
				+      - run: go generate ./...
			
 
				+      - run: go build
			
 
				+      - run: go test -v ./...
			
 
				       - uses: actions/upload-artifact@v4
			
 
				         with:
			
 
				           name: ${{ matrix.os }}-binaries
			
--- a/README.md
+++ b/README.md
@@ -201,10 +201,16 @@ Install `cmake` and `go`:
 
				 brew install cmake go
			
 
				 ```
			
 
				 
			
 
				+Then generate dependencies:
			
 
				+
			
 
				+```
			
 
				+go generate ./...
			
 
				+```
			
 
				+
			
 
				 Then build the binary:
			
 
				 
			
 
				 ```
			
 
				-go run build.go
			
 
				+go build .
			
 
				 ```
			
 
				 
			
 
				 More detailed instructions can be found in the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md)
			
--- a/build.go
+++ b/build.go
@@ -1,192 +0,0 @@
 
				-//go:build ignore
			
 
				-
			
 
				-package main
			
 
				-
			
 
				-import (
			
 
				-	"cmp"
			
 
				-	"errors"
			
 
				-	"flag"
			
 
				-	"log"
			
 
				-	"os"
			
 
				-	"os/exec"
			
 
				-	"path/filepath"
			
 
				-	"runtime"
			
 
				-)
			
 
				-
			
 
				-// Flags
			
 
				-var (
			
 
				-	flagForce     = flag.Bool("f", false, "force re-generation of dependencies")
			
 
				-	flagSkipBuild = flag.Bool("d", false, "generate dependencies only (e.g. skip 'go build .')")
			
 
				-
			
 
				-	// Flags to set GOARCH and GOOS explicitly for cross-platform builds,
			
 
				-	// e.g., in CI to target a different platform than the build matrix
			
 
				-	// default. These allows us to run generate without a separate build
			
 
				-	// step for building the script binary for the host ARCH and then
			
 
				-	// runing the generate script for the target ARCH. Instead, we can
			
 
				-	// just run `go run build.go -target=$GOARCH` to generate the
			
 
				-	// deps.
			
 
				-	flagGOARCH = flag.String("target", "", "sets GOARCH to use when generating dependencies and building")
			
 
				-)
			
 
				-
			
 
				-func buildEnv() []string {
			
 
				-	return append(os.Environ(),
			
 
				-		"GOARCH="+cmp.Or(*flagGOARCH, runtime.GOARCH),
			
 
				-	)
			
 
				-}
			
 
				-
			
 
				-func main() {
			
 
				-	log.SetFlags(0)
			
 
				-	flag.Usage = func() {
			
 
				-		log.Printf("Usage: go run build.go [flags]")
			
 
				-		log.Println()
			
 
				-		log.Println("Flags:")
			
 
				-		flag.PrintDefaults()
			
 
				-		log.Println()
			
 
				-		log.Println("This script builds the Ollama server binary and generates the llama.cpp")
			
 
				-		log.Println("bindings for the current platform. It assumes that the current working")
			
 
				-		log.Println("directory is the root directory of the Ollama project.")
			
 
				-		log.Println()
			
 
				-		log.Println("If the -d flag is provided, the script will only generate the dependencies")
			
 
				-		log.Println("and skip building the Ollama server binary.")
			
 
				-		log.Println()
			
 
				-		log.Println("If the -f flag is provided, the script will force re-generation of the")
			
 
				-		log.Println("dependencies.")
			
 
				-		log.Println()
			
 
				-		log.Println("If the -target flag is provided, the script will set GOARCH to the value")
			
 
				-		log.Println("of the flag. This is useful for cross-platform builds.")
			
 
				-		log.Println()
			
 
				-		log.Println("The script will check for the required dependencies (cmake, gcc) and")
			
 
				-		log.Println("print their version.")
			
 
				-		log.Println()
			
 
				-		log.Println("The script will also check if it is being run from the root directory of")
			
 
				-		log.Println("the Ollama project.")
			
 
				-		log.Println()
			
 
				-		os.Exit(1)
			
 
				-	}
			
 
				-	flag.Parse()
			
 
				-
			
 
				-	log.Printf("=== Building Ollama ===")
			
 
				-	defer func() {
			
 
				-		log.Printf("=== Done building Ollama ===")
			
 
				-		log.Println()
			
 
				-		log.Println("To run the Ollama server, use:")
			
 
				-		log.Println()
			
 
				-		log.Println("    ./ollama serve")
			
 
				-		log.Println()
			
 
				-	}()
			
 
				-
			
 
				-	if flag.NArg() > 0 {
			
 
				-		flag.Usage()
			
 
				-	}
			
 
				-
			
 
				-	if !inRootDir() {
			
 
				-		log.Fatalf("Please run this script from the root directory of the Ollama project.")
			
 
				-	}
			
 
				-
			
 
				-	if err := checkDependencies(); err != nil {
			
 
				-		log.Fatalf("Failed dependency check: %v", err)
			
 
				-	}
			
 
				-	if err := buildLlammaCPP(); err != nil {
			
 
				-		log.Fatalf("Failed to build llama.cpp: %v", err)
			
 
				-	}
			
 
				-	if err := goBuildOllama(); err != nil {
			
 
				-		log.Fatalf("Failed to build ollama Go binary: %v", err)
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-// checkDependencies does a quick check to see if the required dependencies are
			
 
				-// installed on the system and functioning enough to print their version.
			
 
				-//
			
 
				-// TODO(bmizerany): Check the actual version of the dependencies? Seems a
			
 
				-// little daunting given diff versions might print diff things. This should
			
 
				-// be good enough for now.
			
 
				-func checkDependencies() error {
			
 
				-	var err error
			
 
				-	check := func(name string, args ...string) {
			
 
				-		log.Printf("=== Checking for %s ===", name)
			
 
				-		defer log.Printf("=== Done checking for %s ===\n\n", name)
			
 
				-		cmd := exec.Command(name, args...)
			
 
				-		cmd.Stdout = os.Stdout
			
 
				-		cmd.Stderr = os.Stderr
			
 
				-		err = errors.Join(err, cmd.Run())
			
 
				-	}
			
 
				-
			
 
				-	check("cmake", "--version")
			
 
				-	check("gcc", "--version")
			
 
				-	return err
			
 
				-}
			
 
				-
			
 
				-func goBuildOllama() error {
			
 
				-	log.Println("=== Building Ollama binary ===")
			
 
				-	defer log.Printf("=== Done building Ollama binary ===\n\n")
			
 
				-	if *flagSkipBuild {
			
 
				-		log.Println("Skipping 'go build -o ollama .'")
			
 
				-		return nil
			
 
				-	}
			
 
				-	cmd := exec.Command("go", "build", "-o", "ollama", ".")
			
 
				-	cmd.Stdout = os.Stdout
			
 
				-	cmd.Stderr = os.Stderr
			
 
				-	cmd.Env = buildEnv()
			
 
				-	return cmd.Run()
			
 
				-}
			
 
				-
			
 
				-// buildLlammaCPP generates the llama.cpp bindings for the current platform.
			
 
				-//
			
 
				-// It assumes that the current working directory is the root directory of the
			
 
				-// Ollama project.
			
 
				-func buildLlammaCPP() error {
			
 
				-	log.Println("=== Generating dependencies ===")
			
 
				-	defer log.Printf("=== Done generating dependencies ===\n\n")
			
 
				-	if *flagForce {
			
 
				-		if err := os.RemoveAll(filepath.Join("llm", "build")); err != nil {
			
 
				-			return err
			
 
				-		}
			
 
				-	}
			
 
				-	if isDirectory(filepath.Join("llm", "build")) {
			
 
				-		log.Println("llm/build already exists; skipping.  Use -f to force re-generate.")
			
 
				-		return nil
			
 
				-	}
			
 
				-
			
 
				-	scriptDir, err := filepath.Abs(filepath.Join("llm", "generate"))
			
 
				-	if err != nil {
			
 
				-		return err
			
 
				-	}
			
 
				-
			
 
				-	var cmd *exec.Cmd
			
 
				-	switch runtime.GOOS {
			
 
				-	case "windows":
			
 
				-		script := filepath.Join(scriptDir, "gen_windows.ps1")
			
 
				-		cmd = exec.Command("powershell", "-ExecutionPolicy", "Bypass", "-File", script)
			
 
				-	case "linux":
			
 
				-		script := filepath.Join(scriptDir, "gen_linux.sh")
			
 
				-		cmd = exec.Command("bash", script)
			
 
				-	case "darwin":
			
 
				-		script := filepath.Join(scriptDir, "gen_darwin.sh")
			
 
				-		cmd = exec.Command("bash", script)
			
 
				-	default:
			
 
				-		log.Fatalf("Unsupported OS: %s", runtime.GOOS)
			
 
				-	}
			
 
				-	cmd.Dir = filepath.Join("llm", "generate")
			
 
				-	cmd.Stdout = os.Stdout
			
 
				-	cmd.Stderr = os.Stderr
			
 
				-	cmd.Env = buildEnv()
			
 
				-
			
 
				-	log.Printf("Running GOOS=%s GOARCH=%s %s", runtime.GOOS, runtime.GOARCH, cmd.Args)
			
 
				-
			
 
				-	return cmd.Run()
			
 
				-}
			
 
				-
			
 
				-func isDirectory(path string) bool {
			
 
				-	info, err := os.Stat(path)
			
 
				-	if err != nil {
			
 
				-		return false
			
 
				-	}
			
 
				-	return info.IsDir()
			
 
				-}
			
 
				-
			
 
				-// inRootDir returns true if the current working directory is the root
			
 
				-// directory of the Ollama project. It looks for a file named "go.mod".
			
 
				-func inRootDir() bool {
			
 
				-	_, err := os.Stat("go.mod")
			
 
				-	return err == nil
			
 
				-}
			
--- a/docs/development.md
+++ b/docs/development.md
@@ -23,23 +23,19 @@ export OLLAMA_DEBUG=1
 
				 Get the required libraries and build the native LLM code:
			
 
				 
			
 
				 ```bash
			
 
				-go run build.go
			
 
				+go generate ./...
			
 
				 ```
			
 
				 
			
 
				-Now you can run `ollama`:
			
 
				+Then build ollama:
			
 
				 
			
 
				 ```bash
			
 
				-./ollama
			
 
				+go build .
			
 
				 ```
			
 
				 
			
 
				-### Rebuilding the native code
			
 
				-
			
 
				-If at any point you need to rebuild the native code, you can run the
			
 
				-build.go script again using the `-f` flag to force a rebuild, and,
			
 
				-optionally, the `-d` flag to skip building the Go binary:
			
 
				+Now you can run `ollama`:
			
 
				 
			
 
				 ```bash
			
 
				-go run build.go -f -d
			
 
				+./ollama
			
 
				 ```
			
 
				 
			
 
				 ### Linux
			
@@ -57,10 +53,16 @@ specifying an environment variable `CUDA_LIB_DIR` to the location of the shared
 
				 libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize
			
 
				 set set of target CUDA architectues by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
			
 
				 
			
 
				+Then generate dependencies:
			
 
				+
			
 
				+```
			
 
				+go generate ./...
			
 
				+```
			
 
				+
			
 
				 Then build the binary:
			
 
				 
			
 
				 ```
			
 
				-go run build.go
			
 
				+go build .
			
 
				 ```
			
 
				 
			
 
				 #### Linux ROCm (AMD)
			
@@ -76,17 +78,21 @@ install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the
 
				 CLBlast install (typically `/usr/lib/cmake/CLBlast`). You can also customize
			
 
				 the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx1102"`)
			
 
				 
			
 
				+```
			
 
				+go generate ./...
			
 
				+```
			
 
				+
			
 
				 Then build the binary:
			
 
				 
			
 
				 ```
			
 
				-go run build.go
			
 
				+go build .
			
 
				 ```
			
 
				 
			
 
				 ROCm requires elevated privileges to access the GPU at runtime. On most distros you can add your user account to the `render` group, or run as root.
			
 
				 
			
 
				 #### Advanced CPU Settings
			
 
				 
			
 
				-By default, running `go run build.go` will compile a few different variations
			
 
				+By default, running `go generate ./...` will compile a few different variations
			
 
				 of the LLM library based on common CPU families and vector math capabilities,
			
 
				 including a lowest-common-denominator which should run on almost any 64 bit CPU
			
 
				 somewhat slowly. At runtime, Ollama will auto-detect the optimal variation to
			
@@ -96,7 +102,8 @@ like to use. For example, to compile an optimized binary for an Intel i9-9880H,
 
				 you might use:
			
 
				 
			
 
				 ```
			
 
				-OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go run build.go
			
 
				+OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go generate ./...
			
 
				+go build .
			
 
				 ```
			
 
				 
			
 
				 #### Containerized Linux Build
			
@@ -117,7 +124,8 @@ Install required tools:
 
				 
			
 
				 ```powershell
			
 
				 $env:CGO_ENABLED="1"
			
 
				-go run build.go
			
 
				+go generate ./...
			
 
				+go build .
			
 
				 ```
			
 
				 
			
 
				 #### Windows CUDA (NVIDIA)
			
@@ -134,4 +142,4 @@ In addition to the common Windows development tools described above, install AMD
 
				 - [AMD HIP](https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html)
			
 
				 - [Strawberry Perl](https://strawberryperl.com/)
			
 
				 
			
 
				-Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`).
			
 
				+Lastly, add `ninja.exe` included with MSVC to the system path (e.g. `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\CommonExtensions\Microsoft\CMake\Ninja`).
			
--- a/llm/generate/gen_darwin.sh
+++ b/llm/generate/gen_darwin.sh
@@ -1,6 +1,6 @@
 
				 #!/bin/bash
			
 
				-# This script is intended to run inside the `go run build.go` script, which
			
 
				-# sets the working directory to the correct location: ./llm/generate/.
			
 
				+# This script is intended to run inside the go generate
			
 
				+# working directory must be ./llm/generate/
			
 
				 
			
 
				 # TODO - add hardening to detect missing tools (cmake, etc.)
			
 
				 
			
@@ -89,10 +89,10 @@ case "${GOARCH}" in
 
				     ;;
			
 
				 *)
			
 
				     echo "GOARCH must be set"
			
 
				-    echo "this script is meant to be run from within 'go run build.go'"
			
 
				+    echo "this script is meant to be run from within go generate"
			
 
				     exit 1
			
 
				     ;;
			
 
				 esac
			
 
				 
			
 
				 cleanup
			
 
				-echo "code generation completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
			
 
				+echo "go generate completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
			
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -1,6 +1,6 @@
 
				 #!/bin/bash
			
 
				-# This script is intended to run with the `go run build.go` script, which
			
 
				-# sets the working directory to the correct location: ./llm/generate/.
			
 
				+# This script is intended to run inside the go generate
			
 
				+# working directory must be llm/generate/
			
 
				 
			
 
				 # First we build one or more CPU based LLM libraries
			
 
				 #
			
@@ -237,4 +237,4 @@ if [ -d "${ROCM_PATH}" ]; then
 
				 fi
			
 
				 
			
 
				 cleanup
			
 
				-echo "code generation completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
			
 
				+echo "go generate completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
			
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -288,4 +288,4 @@ if ($null -ne $env:HIP_PATH) {
 
				 
			
 
				 
			
 
				 cleanup
			
 
				-write-host "`ncode generation completed.  LLM runners: $(get-childitem -path ${script:SRC_DIR}\llm\build\windows\${script:ARCH})"
			
 
				+write-host "`ngo generate completed.  LLM runners: $(get-childitem -path ${script:SRC_DIR}\llm\build\windows\${script:ARCH})"
			
--- a/llm/generate/generate_darwin.go
+++ b/llm/generate/generate_darwin.go
@@ -0,0 +1,3 @@
 
				+package generate
			
 
				+
			
 
				+//go:generate bash ./gen_darwin.sh
			
--- a/llm/generate/generate_linux.go
+++ b/llm/generate/generate_linux.go
@@ -0,0 +1,3 @@
 
				+package generate
			
 
				+
			
 
				+//go:generate bash ./gen_linux.sh
			
--- a/llm/generate/generate_windows.go
+++ b/llm/generate/generate_windows.go
@@ -0,0 +1,3 @@
 
				+package generate
			
 
				+
			
 
				+//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1