1 year ago · 7893ccb68c
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -73,12 +73,12 @@ jobs:
 
				           $env:CMAKE_SYSTEM_VERSION="10.0.22621.0"
			
 
				           $env:PATH="$gopath;$gccpath;$env:PATH"
			
 
				           echo $env:PATH
			
 
				-          go generate -x ./...
			
 
				+          $env:GOARCH=""; $env:OLLAMA_BUILD_TARGET_ARCH="${{ matrix.arch }}"; go generate -x ./...
			
 
				         if: ${{ startsWith(matrix.os, 'windows-') }}
			
 
				-        name: 'Windows Go Generate'
			
 
				-      - run: go generate -x ./...
			
 
				+        name: 'Windows Generate'
			
 
				+      - run: GOARCH= OLLAMA_BUILD_TARGET_ARCH=${{ matrix.arch }} go generate -x ./...
			
 
				         if: ${{ ! startsWith(matrix.os, 'windows-') }}
			
 
				-        name: 'Unix Go Generate'
			
 
				+        name: 'Unix Generate'
			
 
				       - uses: actions/upload-artifact@v4
			
 
				         with:
			
 
				           name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
			
@@ -184,7 +184,7 @@ jobs:
 
				           $env:OLLAMA_SKIP_CPU_GENERATE="1"
			
 
				           $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
			
 
				           go generate -x ./...
			
 
				-        name: go generate
			
 
				+        name: go generate -x ./...
			
 
				         env:
			
 
				           OLLAMA_SKIP_CPU_GENERATE: '1'
			
 
				       # TODO - do we need any artifacts?
			
@@ -217,7 +217,7 @@ jobs:
 
				       - name: 'Verify CUDA'
			
 
				         run: nvcc -V
			
 
				       - run: go get ./...
			
 
				-      - name: go generate
			
 
				+      - name: go generate -x ./...
			
 
				         run: |
			
 
				           $gopath=(get-command go).source | split-path -parent
			
 
				           $cudabin=(get-command nvcc).source | split-path
			
@@ -312,7 +312,10 @@ jobs:
 
				           touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server
			
 
				         if: ${{ startsWith(matrix.os, 'macos-') }}
			
 
				         shell: bash
			
 
				-      - run: go generate ./...
			
 
				+      - run: $env:GOARCH=""; $env:OLLAMA_BUILD_TARGET_ARCH="${{ matrix.arch }}"; go generate -x ./...
			
 
				+        if: ${{ startsWith(matrix.os, 'windows-') }}
			
 
				+      - run: GOARCH= OLLAMA_BUILD_TARGET_ARCH=${{ matrix.arch }} go generate -x ./...
			
 
				+        if: ${{ ! startsWith(matrix.os, 'windows-') }}
			
 
				       - run: go build
			
 
				       - run: go test -v ./...
			
 
				       - uses: actions/upload-artifact@v4
			
--- a/README.md
+++ b/README.md
@@ -1,12 +1,12 @@
 
				 <div align="center">
			
 
				- <img alt="ollama" height="200px" src="https://github.com/ollama/ollama/assets/3325447/0d0b44e2-8f4a-4e99-9b52-a5c1c741c8f7">
			
 
				+  <img alt="ollama" height="200px" src="https://github.com/ollama/ollama/assets/3325447/0d0b44e2-8f4a-4e99-9b52-a5c1c741c8f7">
			
 
				 </div>
			
 
				 
			
 
				 # Ollama
			
 
				 
			
 
				 [![Discord](https://dcbadge.vercel.app/api/server/ollama?style=flat&compact=true)](https://discord.gg/ollama)
			
 
				 
			
 
				-Get up and running with large language models.
			
 
				+Get up and running with large language models locally.
			
 
				 
			
 
				 ### macOS
			
 
				 
			
@@ -51,17 +51,15 @@ Here are some example models that can be downloaded:
 
				 | ------------------ | ---------- | ----- | ------------------------------ |
			
 
				 | Llama 3            | 8B         | 4.7GB | `ollama run llama3`            |
			
 
				 | Llama 3            | 70B        | 40GB  | `ollama run llama3:70b`        |
			
 
				-| Phi 3 Mini         | 3.8B       | 2.3GB | `ollama run phi3`              |
			
 
				-| Phi 3 Medium       | 14B        | 7.9GB | `ollama run phi3:medium`       |
			
 
				-| Gemma 2            | 9B         | 5.5GB | `ollama run gemma2`            |
			
 
				-| Gemma 2            | 27B        | 16GB  | `ollama run gemma2:27b`        |
			
 
				+| Phi-3              | 3,8B       | 2.3GB | `ollama run phi3`              |
			
 
				 | Mistral            | 7B         | 4.1GB | `ollama run mistral`           |
			
 
				-| Moondream 2        | 1.4B       | 829MB | `ollama run moondream`         |
			
 
				 | Neural Chat        | 7B         | 4.1GB | `ollama run neural-chat`       |
			
 
				 | Starling           | 7B         | 4.1GB | `ollama run starling-lm`       |
			
 
				 | Code Llama         | 7B         | 3.8GB | `ollama run codellama`         |
			
 
				 | Llama 2 Uncensored | 7B         | 3.8GB | `ollama run llama2-uncensored` |
			
 
				 | LLaVA              | 7B         | 4.5GB | `ollama run llava`             |
			
 
				+| Gemma              | 2B         | 1.4GB | `ollama run gemma:2b`          |
			
 
				+| Gemma              | 7B         | 4.8GB | `ollama run gemma:7b`          |
			
 
				 | Solar              | 10.7B      | 6.1GB | `ollama run solar`             |
			
 
				 
			
 
				 > Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
			
@@ -175,19 +173,13 @@ I'm a basic program that prints the famous "Hello, world!" message to the consol
 
				 The image features a yellow smiley face, which is likely the central focus of the picture.
			
 
				 ```
			
 
				 
			
 
				-### Pass the prompt as an argument
			
 
				+### Pass in prompt as arguments
			
 
				 
			
 
				 ```
			
 
				 $ ollama run llama3 "Summarize this file: $(cat README.md)"
			
 
				  Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
			
 
				 ```
			
 
				 
			
 
				-### Show model information
			
 
				-
			
 
				-```
			
 
				-ollama show llama3
			
 
				-```
			
 
				-
			
 
				 ### List models on your computer
			
 
				 
			
 
				 ```
			
@@ -200,7 +192,19 @@ ollama list
 
				 
			
 
				 ## Building
			
 
				 
			
 
				-See the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md)
			
 
				+Install `cmake` and `go`:
			
 
				+
			
 
				+```
			
 
				+brew install cmake go
			
 
				+```
			
 
				+
			
 
				+Then build the binary:
			
 
				+
			
 
				+```
			
 
				+go run build.go
			
 
				+```
			
 
				+
			
 
				+More detailed instructions can be found in the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md)
			
 
				 
			
 
				 ### Running local builds
			
 
				 
			
@@ -248,7 +252,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 
				 
			
 
				 - [Open WebUI](https://github.com/open-webui/open-webui)
			
 
				 - [Enchanted (macOS native)](https://github.com/AugustDev/enchanted)
			
 
				-- [Hollama](https://github.com/fmaclen/hollama)
			
 
				 - [Lollms-Webui](https://github.com/ParisNeo/lollms-webui)
			
 
				 - [LibreChat](https://github.com/danny-avila/LibreChat)
			
 
				 - [Bionic GPT](https://github.com/bionic-gpt/bionic-gpt)
			
@@ -275,24 +278,17 @@ See the [API documentation](./docs/api.md) for all endpoints.
 
				 - [OllamaGUI](https://github.com/enoch1118/ollamaGUI)
			
 
				 - [OpenAOE](https://github.com/InternLM/OpenAOE)
			
 
				 - [Odin Runes](https://github.com/leonid20000/OdinRunes)
			
 
				-- [LLM-X](https://github.com/mrdjohnson/llm-x) (Progressive Web App)
			
 
				+- [LLM-X: Progressive Web App](https://github.com/mrdjohnson/llm-x)
			
 
				 - [AnythingLLM (Docker + MacOs/Windows/Linux native app)](https://github.com/Mintplex-Labs/anything-llm)
			
 
				 - [Ollama Basic Chat: Uses HyperDiv Reactive UI](https://github.com/rapidarchitect/ollama_basic_chat)
			
 
				 - [Ollama-chats RPG](https://github.com/drazdra/ollama-chats)
			
 
				-- [QA-Pilot](https://github.com/reid41/QA-Pilot) (Chat with Code Repository)
			
 
				-- [ChatOllama](https://github.com/sugarforever/chat-ollama) (Open Source Chatbot based on Ollama with Knowledge Bases)
			
 
				-- [CRAG Ollama Chat](https://github.com/Nagi-ovo/CRAG-Ollama-Chat) (Simple Web Search with Corrective RAG)
			
 
				-- [RAGFlow](https://github.com/infiniflow/ragflow) (Open-source Retrieval-Augmented Generation engine based on deep document understanding)
			
 
				-- [StreamDeploy](https://github.com/StreamDeploy-DevRel/streamdeploy-llm-app-scaffold) (LLM Application Scaffold)
			
 
				-- [chat](https://github.com/swuecho/chat) (chat web app for teams)
			
 
				+- [QA-Pilot: Chat with Code Repository](https://github.com/reid41/QA-Pilot)
			
 
				+- [ChatOllama: Open Source Chatbot based on Ollama with Knowledge Bases](https://github.com/sugarforever/chat-ollama)
			
 
				+- [CRAG Ollama Chat: Simple Web Search with Corrective RAG](https://github.com/Nagi-ovo/CRAG-Ollama-Chat)
			
 
				+- [RAGFlow: Open-source Retrieval-Augmented Generation engine based on deep document understanding](https://github.com/infiniflow/ragflow)
			
 
				+- [chat: chat web app for teams](https://github.com/swuecho/chat)
			
 
				 - [Lobe Chat](https://github.com/lobehub/lobe-chat) with [Integrating Doc](https://lobehub.com/docs/self-hosting/examples/ollama)
			
 
				-- [Ollama RAG Chatbot](https://github.com/datvodinh/rag-chatbot.git) (Local Chat with multiple PDFs using Ollama and RAG)
			
 
				-- [BrainSoup](https://www.nurgo-software.com/products/brainsoup) (Flexible native client with RAG & multi-agent automation)
			
 
				-- [macai](https://github.com/Renset/macai) (macOS client for Ollama, ChatGPT, and other compatible API back-ends)
			
 
				-- [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama)
			
 
				-- [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS)
			
 
				-- [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama)
			
 
				-- [Ollama with Google Mesop](https://github.com/rapidarchitect/ollama_mesop/) (Mesop Chat Client implementation with Ollama)
			
 
				+- [Ollama RAG Chatbot: Local Chat with multiples PDFs using Ollama and RAG.](https://github.com/datvodinh/rag-chatbot.git)
			
 
				 
			
 
				 ### Terminal
			
 
				 
			
@@ -315,7 +311,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
 
				 - [ShellOracle](https://github.com/djcopley/ShellOracle)
			
 
				 - [tlm](https://github.com/yusufcanb/tlm)
			
 
				 - [podman-ollama](https://github.com/ericcurtin/podman-ollama)
			
 
				-- [gollama](https://github.com/sammcj/gollama)
			
 
				 
			
 
				 ### Database
			
 
				 
			
@@ -326,20 +321,17 @@ See the [API documentation](./docs/api.md) for all endpoints.
 
				 
			
 
				 - [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
			
 
				 - [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
			
 
				-- [Guix channel](https://codeberg.org/tusharhero/ollama-guix)
			
 
				 
			
 
				 ### Libraries
			
 
				 
			
 
				 - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
			
 
				 - [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
			
 
				 - [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
			
 
				-- [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
			
 
				 - [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
			
 
				 - [LiteLLM](https://github.com/BerriAI/litellm)
			
 
				 - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
			
 
				 - [Ollama for Ruby](https://github.com/gbaptista/ollama-ai)
			
 
				 - [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
			
 
				-- [Ollama-hpp for C++](https://github.com/jmont-dev/ollama-hpp)
			
 
				 - [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j)
			
 
				 - [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
			
 
				 - [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
			
@@ -350,13 +342,9 @@ See the [API documentation](./docs/api.md) for all endpoints.
 
				 - [Haystack](https://github.com/deepset-ai/haystack-integrations/blob/main/integrations/ollama.md)
			
 
				 - [Elixir LangChain](https://github.com/brainlid/langchain)
			
 
				 - [Ollama for R - rollama](https://github.com/JBGruber/rollama)
			
 
				-- [Ollama for R - ollama-r](https://github.com/hauselin/ollama-r)
			
 
				 - [Ollama-ex for Elixir](https://github.com/lebrunel/ollama-ex)
			
 
				 - [Ollama Connector for SAP ABAP](https://github.com/b-tocs/abap_btocs_ollama)
			
 
				 - [Testcontainers](https://testcontainers.com/modules/ollama/)
			
 
				-- [Portkey](https://portkey.ai/docs/welcome/integration-guides/ollama)
			
 
				-- [PromptingTools.jl](https://github.com/svilupp/PromptingTools.jl) with an [example](https://svilupp.github.io/PromptingTools.jl/dev/examples/working_with_ollama)
			
 
				-- [LlamaScript](https://github.com/Project-Llama/llamascript)
			
 
				 
			
 
				 ### Mobile
			
 
				 
			
@@ -376,23 +364,18 @@ See the [API documentation](./docs/api.md) for all endpoints.
 
				 - [Ollama Telegram Bot](https://github.com/ruecat/ollama-telegram)
			
 
				 - [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
			
 
				 - [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
			
 
				+- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
			
 
				 - [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot)
			
 
				 - [Cliobot](https://github.com/herval/cliobot) (Telegram bot with Ollama support)
			
 
				 - [Copilot for Obsidian plugin](https://github.com/logancyang/obsidian-copilot)
			
 
				 - [Obsidian Local GPT plugin](https://github.com/pfrankov/obsidian-local-gpt)
			
 
				 - [Open Interpreter](https://docs.openinterpreter.com/language-model-setup/local-models/ollama)
			
 
				-- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
			
 
				-- [Ollama Copilot](https://github.com/bernardo-bruning/ollama-copilot) (Proxy that allows you to use ollama as a copilot like Github copilot)
			
 
				 - [twinny](https://github.com/rjmacarthy/twinny) (Copilot and Copilot chat alternative using Ollama)
			
 
				 - [Wingman-AI](https://github.com/RussellCanfield/wingman-ai) (Copilot code and chat alternative using Ollama and HuggingFace)
			
 
				 - [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
			
 
				 - [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
			
 
				 - [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
			
 
				 - [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
			
 
				-- [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities.
			
 
				-- [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server)
			
 
				-
			
 
				-### Supported backends
			
 
				-
			
 
				-- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
			
 
				 
			
 
				+### Supported backends 
			
 
				+- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov. 
			
--- a/build.go
+++ b/build.go
@@ -0,0 +1,199 @@
 
				+//go:build ignore
			
 
				+
			
 
				+package main
			
 
				+
			
 
				+import (
			
 
				+	"cmp"
			
 
				+	"errors"
			
 
				+	"flag"
			
 
				+	"log"
			
 
				+	"os"
			
 
				+	"os/exec"
			
 
				+	"path/filepath"
			
 
				+	"runtime"
			
 
				+)
			
 
				+
			
 
				+// Flags
			
 
				+var (
			
 
				+	flagRegenerateDestroy = flag.Bool("d", false, "force regenerate the dependencies (destructive)")
			
 
				+	flagRegenerateGently  = flag.Bool("g", false, "regenerate the dependencies (non-destructive)")
			
 
				+	flagSkipBuild         = flag.Bool("s", false, "generate dependencies only (e.g. skip 'go build .')")
			
 
				+
			
 
				+	// Flags to set GOARCH explicitly for cross-platform builds,
			
 
				+	// e.g., in CI to target a different platform than the build matrix
			
 
				+	// default. These allows us to run generate without a separate build
			
 
				+	// step for building the script binary for the host ARCH and then
			
 
				+	// runing the generate script for the target ARCH. Instead, we can
			
 
				+	// just run `go run build.go -target=$GOARCH` to generate the
			
 
				+	// deps.
			
 
				+	flagGOARCH = flag.String("target", "", "sets GOARCH to use when generating dependencies and building")
			
 
				+)
			
 
				+
			
 
				+func buildEnv() []string {
			
 
				+	return append(os.Environ(), "GOARCH="+cmp.Or(
			
 
				+		*flagGOARCH,
			
 
				+		os.Getenv("OLLAMA_BUILD_TARGET_ARCH"),
			
 
				+		runtime.GOARCH,
			
 
				+	))
			
 
				+}
			
 
				+
			
 
				+func main() {
			
 
				+	log.SetFlags(0)
			
 
				+	flag.Usage = func() {
			
 
				+		log.Printf("Usage: go run build.go [flags]")
			
 
				+		log.Println()
			
 
				+		log.Println("Flags:")
			
 
				+		flag.PrintDefaults()
			
 
				+		log.Println()
			
 
				+		log.Println("This script builds the Ollama server binary and generates the llama.cpp")
			
 
				+		log.Println("bindings for the current platform. It assumes that the current working")
			
 
				+		log.Println("directory is the root directory of the Ollama project.")
			
 
				+		log.Println()
			
 
				+		log.Println("If the -d flag is provided, the script will force regeneration of the")
			
 
				+		log.Println("dependencies; removing the 'llm/build' directory before starting.")
			
 
				+		log.Println()
			
 
				+		log.Println("If the -g flag is provided, the script will regenerate the dependencies")
			
 
				+		log.Println("without removing the 'llm/build' directory.")
			
 
				+		log.Println()
			
 
				+		log.Println("If the -s flag is provided, the script will skip building the Ollama binary")
			
 
				+		log.Println()
			
 
				+		log.Println("If the -target flag is provided, the script will set GOARCH to the value")
			
 
				+		log.Println("of the flag. This is useful for cross-platform builds.")
			
 
				+		log.Println()
			
 
				+		log.Println("The script will check for the required dependencies (cmake, gcc) and")
			
 
				+		log.Println("print their version.")
			
 
				+		log.Println()
			
 
				+		log.Println("The script will also check if it is being run from the root directory of")
			
 
				+		log.Println("the Ollama project.")
			
 
				+		log.Println()
			
 
				+		os.Exit(1)
			
 
				+	}
			
 
				+	flag.Parse()
			
 
				+
			
 
				+	log.Printf("=== Building Ollama ===")
			
 
				+	defer func() {
			
 
				+		log.Printf("=== Done building Ollama ===")
			
 
				+		if !*flagSkipBuild {
			
 
				+			log.Println()
			
 
				+			log.Println("To run the Ollama server, use:")
			
 
				+			log.Println()
			
 
				+			log.Println("    ./ollama serve")
			
 
				+			log.Println()
			
 
				+		}
			
 
				+	}()
			
 
				+
			
 
				+	if flag.NArg() > 0 {
			
 
				+		flag.Usage()
			
 
				+	}
			
 
				+
			
 
				+	if !inRootDir() {
			
 
				+		log.Fatalf("Please run this script from the root directory of the Ollama project.")
			
 
				+	}
			
 
				+
			
 
				+	if err := checkDependencies(); err != nil {
			
 
				+		log.Fatalf("Failed dependency check: %v", err)
			
 
				+	}
			
 
				+	if err := buildLlammaCPP(); err != nil {
			
 
				+		log.Fatalf("Failed to build llama.cpp: %v", err)
			
 
				+	}
			
 
				+	if err := goBuildOllama(); err != nil {
			
 
				+		log.Fatalf("Failed to build ollama Go binary: %v", err)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// checkDependencies does a quick check to see if the required dependencies are
			
 
				+// installed on the system and functioning enough to print their version.
			
 
				+//
			
 
				+// TODO(bmizerany): Check the actual version of the dependencies? Seems a
			
 
				+// little daunting given diff versions might print diff things. This should
			
 
				+// be good enough for now.
			
 
				+func checkDependencies() error {
			
 
				+	var err error
			
 
				+	check := func(name string, args ...string) {
			
 
				+		log.Printf("=== Checking for %s ===", name)
			
 
				+		defer log.Printf("=== Done checking for %s ===\n\n", name)
			
 
				+		cmd := exec.Command(name, args...)
			
 
				+		cmd.Stdout = os.Stdout
			
 
				+		cmd.Stderr = os.Stderr
			
 
				+		err = errors.Join(err, cmd.Run())
			
 
				+	}
			
 
				+
			
 
				+	check("cmake", "--version")
			
 
				+	check("gcc", "--version")
			
 
				+	return err
			
 
				+}
			
 
				+
			
 
				+func goBuildOllama() error {
			
 
				+	log.Println("=== Building Ollama binary ===")
			
 
				+	defer log.Printf("=== Done building Ollama binary ===\n\n")
			
 
				+	if *flagSkipBuild {
			
 
				+		log.Println("Skipping 'go build -o ollama .'")
			
 
				+		return nil
			
 
				+	}
			
 
				+	cmd := exec.Command("go", "build", "-o", "ollama", ".")
			
 
				+	cmd.Stdout = os.Stdout
			
 
				+	cmd.Stderr = os.Stderr
			
 
				+	cmd.Env = buildEnv()
			
 
				+	return cmd.Run()
			
 
				+}
			
 
				+
			
 
				+// buildLlammaCPP generates the llama.cpp bindings for the current platform.
			
 
				+//
			
 
				+// It assumes that the current working directory is the root directory of the
			
 
				+// Ollama project.
			
 
				+func buildLlammaCPP() error {
			
 
				+	log.Println("=== Generating dependencies ===")
			
 
				+	defer log.Printf("=== Done generating dependencies ===\n\n")
			
 
				+	if *flagRegenerateDestroy {
			
 
				+		if err := os.RemoveAll(filepath.Join("llm", "build")); err != nil {
			
 
				+			return err
			
 
				+		}
			
 
				+	}
			
 
				+	if isDirectory(filepath.Join("llm", "build")) && !*flagRegenerateGently {
			
 
				+		log.Println("llm/build already exists; skipping.  Use -d or -g to re-generate.")
			
 
				+		return nil
			
 
				+	}
			
 
				+
			
 
				+	scriptDir, err := filepath.Abs(filepath.Join("llm", "generate"))
			
 
				+	if err != nil {
			
 
				+		return err
			
 
				+	}
			
 
				+
			
 
				+	var cmd *exec.Cmd
			
 
				+	switch runtime.GOOS {
			
 
				+	case "windows":
			
 
				+		script := filepath.Join(scriptDir, "gen_windows.ps1")
			
 
				+		cmd = exec.Command("powershell", "-ExecutionPolicy", "Bypass", "-File", script)
			
 
				+	case "linux":
			
 
				+		script := filepath.Join(scriptDir, "gen_linux.sh")
			
 
				+		cmd = exec.Command("bash", script)
			
 
				+	case "darwin":
			
 
				+		script := filepath.Join(scriptDir, "gen_darwin.sh")
			
 
				+		cmd = exec.Command("bash", script)
			
 
				+	default:
			
 
				+		log.Fatalf("Unsupported OS: %s", runtime.GOOS)
			
 
				+	}
			
 
				+	cmd.Dir = filepath.Join("llm", "generate")
			
 
				+	cmd.Stdout = os.Stdout
			
 
				+	cmd.Stderr = os.Stderr
			
 
				+	cmd.Env = buildEnv()
			
 
				+
			
 
				+	log.Printf("Running GOOS=%s GOARCH=%s %s", runtime.GOOS, runtime.GOARCH, cmd.Args)
			
 
				+
			
 
				+	return cmd.Run()
			
 
				+}
			
 
				+
			
 
				+func isDirectory(path string) bool {
			
 
				+	info, err := os.Stat(path)
			
 
				+	if err != nil {
			
 
				+		return false
			
 
				+	}
			
 
				+	return info.IsDir()
			
 
				+}
			
 
				+
			
 
				+// inRootDir returns true if the current working directory is the root
			
 
				+// directory of the Ollama project. It looks for a file named "go.mod".
			
 
				+func inRootDir() bool {
			
 
				+	_, err := os.Stat("go.mod")
			
 
				+	return err == nil
			
 
				+}
			
--- a/docs/development.md
+++ b/docs/development.md
@@ -25,19 +25,23 @@ export OLLAMA_DEBUG=1
 
				 Get the required libraries and build the native LLM code:
			
 
				 
			
 
				 ```bash
			
 
				-go generate ./...
			
 
				+go run build.go
			
 
				 ```
			
 
				 
			
 
				-Then build ollama:
			
 
				+Now you can run `ollama`:
			
 
				 
			
 
				 ```bash
			
 
				-go build .
			
 
				+./ollama
			
 
				 ```
			
 
				 
			
 
				-Now you can run `ollama`:
			
 
				+### Rebuilding the native code
			
 
				+
			
 
				+If at any point you need to rebuild the native code, you can run the
			
 
				+build.go script again using the `-f` flag to force a rebuild, and,
			
 
				+optionally, the `-d` flag to skip building the Go binary:
			
 
				 
			
 
				 ```bash
			
 
				-./ollama
			
 
				+go run build.go -d -s
			
 
				 ```
			
 
				 
			
 
				 ### Linux
			
@@ -55,16 +59,10 @@ specifying an environment variable `CUDA_LIB_DIR` to the location of the shared
 
				 libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize
			
 
				 a set of target CUDA architectures by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70")
			
 
				 
			
 
				-Then generate dependencies:
			
 
				-
			
 
				-```
			
 
				-go generate ./...
			
 
				-```
			
 
				-
			
 
				 Then build the binary:
			
 
				 
			
 
				 ```
			
 
				-go build .
			
 
				+go run build.go
			
 
				 ```
			
 
				 
			
 
				 #### Linux ROCm (AMD)
			
@@ -80,21 +78,17 @@ install (typically `/opt/rocm`), and `CLBlast_DIR` to the location of the
 
				 CLBlast install (typically `/usr/lib/cmake/CLBlast`). You can also customize
			
 
				 the AMD GPU targets by setting AMDGPU_TARGETS (e.g. `AMDGPU_TARGETS="gfx1101;gfx1102"`)
			
 
				 
			
 
				-```
			
 
				-go generate ./...
			
 
				-```
			
 
				-
			
 
				 Then build the binary:
			
 
				 
			
 
				 ```
			
 
				-go build .
			
 
				+go run build.go
			
 
				 ```
			
 
				 
			
 
				 ROCm requires elevated privileges to access the GPU at runtime. On most distros you can add your user account to the `render` group, or run as root.
			
 
				 
			
 
				 #### Advanced CPU Settings
			
 
				 
			
 
				-By default, running `go generate ./...` will compile a few different variations
			
 
				+By default, running `go run build.go` will compile a few different variations
			
 
				 of the LLM library based on common CPU families and vector math capabilities,
			
 
				 including a lowest-common-denominator which should run on almost any 64 bit CPU
			
 
				 somewhat slowly. At runtime, Ollama will auto-detect the optimal variation to
			
@@ -104,8 +98,7 @@ like to use. For example, to compile an optimized binary for an Intel i9-9880H,
 
				 you might use:
			
 
				 
			
 
				 ```
			
 
				-OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go generate ./...
			
 
				-go build .
			
 
				+OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go run build.go
			
 
				 ```
			
 
				 
			
 
				 #### Containerized Linux Build
			
@@ -129,8 +122,7 @@ Then, build the `ollama` binary:
 
				 
			
 
				 ```powershell
			
 
				 $env:CGO_ENABLED="1"
			
 
				-go generate ./...
			
 
				-go build .
			
 
				+go run build.go
			
 
				 ```
			
 
				 
			
 
				 #### Windows CUDA (NVIDIA)
			
--- a/llm/generate/gen_darwin.sh
+++ b/llm/generate/gen_darwin.sh
@@ -1,6 +1,6 @@
 
				 #!/bin/bash
			
 
				-# This script is intended to run inside the go generate
			
 
				-# working directory must be ./llm/generate/
			
 
				+# This script is intended to run inside the `go run build.go` script, which
			
 
				+# sets the working directory to the correct location: ./llm/generate/.
			
 
				 
			
 
				 # TODO - add hardening to detect missing tools (cmake, etc.)
			
 
				 
			
@@ -92,10 +92,10 @@ case "${GOARCH}" in
 
				     ;;
			
 
				 *)
			
 
				     echo "GOARCH must be set"
			
 
				-    echo "this script is meant to be run from within go generate"
			
 
				+    echo "this script is meant to be run from within 'go run build.go'"
			
 
				     exit 1
			
 
				     ;;
			
 
				 esac
			
 
				 
			
 
				 cleanup
			
 
				-echo "go generate completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
			
 
				+echo "code generation completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
			
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -1,6 +1,6 @@
 
				 #!/bin/bash
			
 
				-# This script is intended to run inside the go generate
			
 
				-# working directory must be llm/generate/
			
 
				+# This script is intended to run with the `go run build.go` script, which
			
 
				+# sets the working directory to the correct location: ./llm/generate/.
			
 
				 
			
 
				 # First we build one or more CPU based LLM libraries
			
 
				 #
			
@@ -281,4 +281,4 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
 
				 fi
			
 
				 
			
 
				 cleanup
			
 
				-echo "go generate completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
			
 
				+echo "code generation completed.  LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
			
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -26,26 +26,15 @@ function amdGPUs {
 
				     $GPU_LIST -join ';'
			
 
				 }
			
 
				 
			
 
				-
			
 
				 function init_vars {
			
 
				-    if (!$script:SRC_DIR) {
			
 
				-        $script:SRC_DIR = $(resolve-path "..\..\")
			
 
				-    }
			
 
				-    if (!$script:llamacppDir) {
			
 
				-        $script:llamacppDir = "../llama.cpp"
			
 
				-    }
			
 
				-    if (!$script:cmakeTargets) {
			
 
				-        $script:cmakeTargets = @("ollama_llama_server")
			
 
				-    }
			
 
				+    $script:SRC_DIR = $(resolve-path "..\..\")
			
 
				+    $script:llamacppDir = "../llama.cpp"
			
 
				     $script:cmakeDefs = @(
			
 
				         "-DBUILD_SHARED_LIBS=on",
			
 
				-        "-DLLAMA_NATIVE=off",
			
 
				-        "-DLLAMA_OPENMP=off"
			
 
				+        "-DLLAMA_NATIVE=off"
			
 
				         )
			
 
				-    $script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
			
 
				-    $script:ARCH = $Env:PROCESSOR_ARCHITECTURE.ToLower()
			
 
				-    $script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_runners"
			
 
				-    md "$script:DIST_BASE" -ea 0 > $null
			
 
				+    $script:cmakeTargets = @("ollama_llama_server")
			
 
				+    $script:ARCH = "amd64" # arm not yet supported.
			
 
				     if ($env:CGO_CFLAGS -contains "-g") {
			
 
				         $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
			
 
				         $script:config = "RelWithDebInfo"
			
@@ -66,6 +55,7 @@ function init_vars {
 
				     } else {
			
 
				         $script:CUDA_LIB_DIR=$env:CUDA_LIB_DIR
			
 
				     }
			
 
				+    $script:GZIP=(get-command -ea 'silentlycontinue' gzip).path
			
 
				     $script:DUMPBIN=(get-command -ea 'silentlycontinue' dumpbin).path
			
 
				     if ($null -eq $env:CMAKE_CUDA_ARCHITECTURES) {
			
 
				         $script:CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80"
			
@@ -123,13 +113,8 @@ function build {
 
				     & cmake --version
			
 
				     & cmake -S "${script:llamacppDir}" -B $script:buildDir $script:cmakeDefs
			
 
				     if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
			
 
				-    if ($cmakeDefs -contains "-G") {
			
 
				-        $extra=@("-j8")
			
 
				-    } else {
			
 
				-        $extra= @("--", "/p:CL_MPcount=8")
			
 
				-    }
			
 
				-    write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ }) $extra"
			
 
				-    & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) $extra
			
 
				+    write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ })"
			
 
				+    & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })
			
 
				     if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
			
 
				     # Rearrange output to be consistent between different generators
			
 
				     if ($null -ne ${script:config} -And (test-path -path "${script:buildDir}/bin/${script:config}" ) ) {
			
@@ -149,18 +134,21 @@ function sign {
 
				     }
			
 
				 }
			
 
				 
			
 
				-function install {
			
 
				-    write-host "Installing binaries to dist dir ${script:distDir}"
			
 
				-    mkdir ${script:distDir} -ErrorAction SilentlyContinue
			
 
				+function compress {
			
 
				+    if ($script:GZIP -eq $null) {
			
 
				+        write-host "gzip not installed, not compressing files"
			
 
				+        return
			
 
				+    }
			
 
				+    write-host "Compressing binaries..."
			
 
				     $binaries = dir "${script:buildDir}/bin/*.exe"
			
 
				     foreach ($file in $binaries) {
			
 
				-        copy-item -Path $file -Destination ${script:distDir} -Force
			
 
				+        & "$script:GZIP" --best -f $file
			
 
				     }
			
 
				 
			
 
				-    write-host "Installing dlls to dist dir ${script:distDir}"
			
 
				+    write-host "Compressing dlls..."
			
 
				     $dlls = dir "${script:buildDir}/bin/*.dll"
			
 
				     foreach ($file in $dlls) {
			
 
				-        copy-item -Path $file -Destination ${script:distDir} -Force
			
 
				+        & "$script:GZIP" --best -f $file
			
 
				     }
			
 
				 }
			
 
				 
			
@@ -181,252 +169,132 @@ function cleanup {
 
				     }
			
 
				 }
			
 
				 
			
 
				+init_vars
			
 
				+git_module_setup
			
 
				+apply_patches
			
 
				 
			
 
				 # -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
			
 
				 # -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
			
 
				 # -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
			
 
				 
			
 
				+$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
			
 
				 
			
 
				-function build_static() {
			
 
				-    if ((-not "${env:OLLAMA_SKIP_STATIC_GENERATE}") -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "static"))) {
			
 
				-        # GCC build for direct linking into the Go binary
			
 
				-        init_vars
			
 
				-        # cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
			
 
				-        # as we need this to be compiled by gcc for golang to be able to link with itx
			
 
				-        write-host "Checking for MinGW..."
			
 
				-        # error action ensures we exit on failure
			
 
				-        get-command gcc
			
 
				-        get-command mingw32-make
			
 
				-        $oldTargets = $script:cmakeTargets
			
 
				-        $script:cmakeTargets = @("llama", "ggml")
			
 
				-        $script:cmakeDefs = @(
			
 
				-            "-G", "MinGW Makefiles"
			
 
				-            "-DCMAKE_C_COMPILER=gcc.exe",
			
 
				-            "-DCMAKE_CXX_COMPILER=g++.exe",
			
 
				-            "-DBUILD_SHARED_LIBS=off",
			
 
				-            "-DLLAMA_NATIVE=off",
			
 
				-            "-DLLAMA_AVX=off",
			
 
				-            "-DLLAMA_AVX2=off",
			
 
				-            "-DLLAMA_AVX512=off",
			
 
				-            "-DLLAMA_F16C=off",
			
 
				-            "-DLLAMA_FMA=off",
			
 
				-            "-DLLAMA_OPENMP=off")
			
 
				-        $script:buildDir="../build/windows/${script:ARCH}_static"
			
 
				-        write-host "Building static library"
			
 
				-        build
			
 
				-        $script:cmakeTargets = $oldTargets
			
 
				-    } else {
			
 
				-        write-host "Skipping CPU generation step as requested"
			
 
				-    }
			
 
				-}
			
 
				+if ($null -eq ${env:OLLAMA_SKIP_CPU_GENERATE}) {
			
 
				 
			
 
				-function build_cpu($gen_arch) {
			
 
				-    if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) {
			
 
				-        # remaining llama.cpp builds use MSVC 
			
 
				-        init_vars
			
 
				-        $script:cmakeDefs = $script:commonCpuDefs + @("-A", $gen_arch, "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
			
 
				-        $script:buildDir="../build/windows/${script:ARCH}/cpu"
			
 
				-        $script:distDir="$script:DIST_BASE\cpu"
			
 
				-        write-host "Building LCD CPU"
			
 
				-        build
			
 
				-        sign
			
 
				-        install
			
 
				-    } else {
			
 
				-        write-host "Skipping CPU generation step as requested"
			
 
				-    }
			
 
				-}
			
 
				+# GCC build for direct linking into the Go binary
			
 
				+init_vars
			
 
				+# cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
			
 
				+# as we need this to be compiled by gcc for golang to be able to link with itx
			
 
				+write-host "Checking for MinGW..."
			
 
				+# error action ensures we exit on failure
			
 
				+get-command gcc
			
 
				+get-command mingw32-make
			
 
				+$script:cmakeTargets = @("llama", "ggml")
			
 
				+$script:cmakeDefs = @(
			
 
				+    "-G", "MinGW Makefiles"
			
 
				+    "-DCMAKE_C_COMPILER=gcc.exe",
			
 
				+    "-DCMAKE_CXX_COMPILER=g++.exe",
			
 
				+    "-DBUILD_SHARED_LIBS=off",
			
 
				+    "-DLLAMA_NATIVE=off",
			
 
				+    "-DLLAMA_AVX=off",
			
 
				+    "-DLLAMA_AVX2=off",
			
 
				+    "-DLLAMA_AVX512=off",
			
 
				+    "-DLLAMA_F16C=off",
			
 
				+    "-DLLAMA_FMA=off")
			
 
				+$script:buildDir="../build/windows/${script:ARCH}_static"
			
 
				+write-host "Building static library"
			
 
				+build
			
 
				+
			
 
				+# remaining llama.cpp builds use MSVC 
			
 
				+    init_vars
			
 
				+    $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
			
 
				+    $script:buildDir="../build/windows/${script:ARCH}/cpu"
			
 
				+    write-host "Building LCD CPU"
			
 
				+    build
			
 
				+    sign
			
 
				+    compress
			
 
				 
			
 
				-function build_cpu_avx() {
			
 
				-    if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx"))) {
			
 
				-        init_vars
			
 
				-        $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
			
 
				-        $script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
			
 
				-        $script:distDir="$script:DIST_BASE\cpu_avx"
			
 
				-        write-host "Building AVX CPU"
			
 
				-        build
			
 
				-        sign
			
 
				-        install
			
 
				-    } else {
			
 
				-        write-host "Skipping CPU AVX generation step as requested"
			
 
				-    }
			
 
				-}
			
 
				+    init_vars
			
 
				+    $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
			
 
				+    $script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
			
 
				+    write-host "Building AVX CPU"
			
 
				+    build
			
 
				+    sign
			
 
				+    compress
			
 
				 
			
 
				-function build_cpu_avx2() {
			
 
				-    if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx2"))) {
			
 
				-        init_vars
			
 
				-        $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
			
 
				-        $script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
			
 
				-        $script:distDir="$script:DIST_BASE\cpu_avx2"
			
 
				-        write-host "Building AVX2 CPU"
			
 
				-        build
			
 
				-        sign
			
 
				-        install
			
 
				-    } else {
			
 
				-        write-host "Skipping CPU AVX2 generation step as requested"
			
 
				-    }
			
 
				+    init_vars
			
 
				+    $script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
			
 
				+    $script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
			
 
				+    write-host "Building AVX2 CPU"
			
 
				+    build
			
 
				+    sign
			
 
				+    compress
			
 
				+} else {
			
 
				+    write-host "Skipping CPU generation step as requested"
			
 
				 }
			
 
				 
			
 
				-function build_cuda() {
			
 
				-    if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${script:CUDA_LIB_DIR}")) {
			
 
				-        # Then build cuda as a dynamically loaded library
			
 
				-        $nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
			
 
				-        $script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
			
 
				-        if ($null -ne $script:CUDA_VERSION) {
			
 
				-            $script:CUDA_VARIANT="_"+$script:CUDA_VERSION
			
 
				-        }
			
 
				-        init_vars
			
 
				-        $script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
			
 
				-        $script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT"
			
 
				-        $script:cmakeDefs += @(
			
 
				-            "-A", "x64",
			
 
				-            "-DLLAMA_CUDA=ON",
			
 
				-            "-DLLAMA_AVX=on",
			
 
				-            "-DLLAMA_AVX2=off",
			
 
				-            "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR",
			
 
				-            "-DCMAKE_CUDA_FLAGS=-t8",
			
 
				-            "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}"
			
 
				-            )
			
 
				-        if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
			
 
				-            write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
			
 
				-            $script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
			
 
				-            write-host "building custom CUDA GPU"
			
 
				-        }
			
 
				-        build
			
 
				-        sign
			
 
				-        install
			
 
				-
			
 
				-        rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
			
 
				-        md "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" -ea 0 > $null
			
 
				-        write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
			
 
				-        cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
			
 
				-        cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
			
 
				-        cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
			
 
				-    } else {
			
 
				-        write-host "Skipping CUDA generation step"
			
 
				+if ($null -ne $script:CUDA_LIB_DIR) {
			
 
				+    # Then build cuda as a dynamically loaded library
			
 
				+    $nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
			
 
				+    $script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
			
 
				+    if ($null -ne $script:CUDA_VERSION) {
			
 
				+        $script:CUDA_VARIANT="_"+$script:CUDA_VERSION
			
 
				+    }
			
 
				+    init_vars
			
 
				+    $script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
			
 
				+    $script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUDA=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}")
			
 
				+    if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
			
 
				+        write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
			
 
				+        $script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
			
 
				+        write-host "building custom CUDA GPU"
			
 
				     }
			
 
				+    build
			
 
				+    sign
			
 
				+    compress
			
 
				 }
			
 
				 
			
 
				-function build_oneapi() {
			
 
				-  if ((-not "${env:OLLAMA_SKIP_ONEAPI_GENERATE}") -and ("${env:ONEAPI_ROOT}"))  {
			
 
				-    # Get oneAPI version
			
 
				-    $script:ONEAPI_VERSION = icpx --version
			
 
				-    $script:ONEAPI_VERSION = [regex]::Match($script:ONEAPI_VERSION, '(?<=oneAPI DPC\+\+/C\+\+ Compiler )(?<version>\d+\.\d+\.\d+)').Value
			
 
				-    if ($null -ne $script:ONEAPI_VERSION) {
			
 
				-      $script:ONEAPI_VARIANT = "_v" + $script:ONEAPI_VERSION
			
 
				+if ($null -ne $env:HIP_PATH) {
			
 
				+    $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
			
 
				+    if ($null -ne $script:ROCM_VERSION) {
			
 
				+        $script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
			
 
				     }
			
 
				+
			
 
				     init_vars
			
 
				-    $script:buildDir = "../build/windows/${script:ARCH}/oneapi$script:ONEAPI_VARIANT"
			
 
				-    $script:distDir ="$script:DIST_BASE\oneapi$script:ONEAPI_VARIANT"
			
 
				+    $script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
			
 
				     $script:cmakeDefs += @(
			
 
				-      "-G", "MinGW Makefiles",
			
 
				-      "-DLLAMA_SYCL=ON",
			
 
				-      "-DCMAKE_C_COMPILER=icx",
			
 
				-      "-DCMAKE_CXX_COMPILER=icx",
			
 
				-      "-DCMAKE_BUILD_TYPE=Release"
			
 
				-    )
			
 
				+        "-G", "Ninja", 
			
 
				+        "-DCMAKE_C_COMPILER=clang.exe",
			
 
				+        "-DCMAKE_CXX_COMPILER=clang++.exe",
			
 
				+        "-DLLAMA_HIPBLAS=on",
			
 
				+        "-DHIP_PLATFORM=amd",
			
 
				+        "-DLLAMA_AVX=on",
			
 
				+        "-DLLAMA_AVX2=off",
			
 
				+        "-DCMAKE_POSITION_INDEPENDENT_CODE=on",
			
 
				+        "-DAMDGPU_TARGETS=$(amdGPUs)",
			
 
				+        "-DGPU_TARGETS=$(amdGPUs)"
			
 
				+        )
			
 
				 
			
 
				-    Write-Host "Building oneAPI"
			
 
				+    # Make sure the ROCm binary dir is first in the path
			
 
				+    $env:PATH="$env:HIP_PATH\bin;$env:PATH"
			
 
				+
			
 
				+    # We have to clobber the LIB var from the developer shell for clang to work properly
			
 
				+    $env:LIB=""
			
 
				+    if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
			
 
				+        write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
			
 
				+        $script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
			
 
				+        write-host "building custom ROCM GPU"
			
 
				+    }
			
 
				+    write-host "Building ROCm"
			
 
				     build
			
 
				     # Ninja doesn't prefix with config name
			
 
				+    ${script:config}=""
			
 
				     if ($null -ne $script:DUMPBIN) {
			
 
				-      & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | Select-String ".dll"
			
 
				+        & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
			
 
				     }
			
 
				     sign
			
 
				-    install
			
 
				-
			
 
				-    rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
			
 
				-    md "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" -ea 0 > $null
			
 
				-    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
			
 
				-    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
			
 
				-    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
			
 
				-    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
			
 
				-    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
			
 
				-    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
			
 
				-    cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
			
 
				-    cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
			
 
				-    cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
			
 
				-    cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
			
 
				-  } else {
			
 
				-    Write-Host "Skipping oneAPI generation step"
			
 
				-  }
			
 
				+    compress
			
 
				 }
			
 
				 
			
 
				-function build_rocm() {
			
 
				-    if ((-not "${env:OLLAMA_SKIP_ROCM_GENERATE}") -and ("${env:HIP_PATH}")) {
			
 
				-        $script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
			
 
				-        if ($null -ne $script:ROCM_VERSION) {
			
 
				-            $script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
			
 
				-        }
			
 
				-
			
 
				-        init_vars
			
 
				-        $script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
			
 
				-        $script:distDir="$script:DIST_BASE\rocm$script:ROCM_VARIANT"
			
 
				-        $script:cmakeDefs += @(
			
 
				-            "-G", "Ninja", 
			
 
				-            "-DCMAKE_C_COMPILER=clang.exe",
			
 
				-            "-DCMAKE_CXX_COMPILER=clang++.exe",
			
 
				-            "-DLLAMA_HIPBLAS=on",
			
 
				-            "-DHIP_PLATFORM=amd",
			
 
				-            "-DLLAMA_AVX=on",
			
 
				-            "-DLLAMA_AVX2=off",
			
 
				-            "-DCMAKE_POSITION_INDEPENDENT_CODE=on",
			
 
				-            "-DAMDGPU_TARGETS=$(amdGPUs)",
			
 
				-            "-DGPU_TARGETS=$(amdGPUs)"
			
 
				-            )
			
 
				-
			
 
				-        # Make sure the ROCm binary dir is first in the path
			
 
				-        $env:PATH="$env:HIP_PATH\bin;$env:PATH"
			
 
				-
			
 
				-        # We have to clobber the LIB var from the developer shell for clang to work properly
			
 
				-        $env:LIB=""
			
 
				-        if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
			
 
				-            write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
			
 
				-            $script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
			
 
				-            write-host "building custom ROCM GPU"
			
 
				-        }
			
 
				-        write-host "Building ROCm"
			
 
				-        build
			
 
				-        # Ninja doesn't prefix with config name
			
 
				-        ${script:config}=""
			
 
				-        if ($null -ne $script:DUMPBIN) {
			
 
				-            & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
			
 
				-        }
			
 
				-        sign
			
 
				-        install
			
 
				-
			
 
				-        # Assumes v5.7, may need adjustments for v6
			
 
				-        rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
			
 
				-        md "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" -ea 0 > $null
			
 
				-        cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
			
 
				-        cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
			
 
				-        # amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
			
 
				-        cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\"
			
 
				-    } else {
			
 
				-        write-host "Skipping ROCm generation step"
			
 
				-    }
			
 
				-}
			
 
				 
			
 
				-init_vars
			
 
				-if ($($args.count) -eq 0) {
			
 
				-    git_module_setup
			
 
				-    apply_patches
			
 
				-    build_static
			
 
				-    if ($script:ARCH -eq "arm64") {
			
 
				-        build_cpu("ARM64")
			
 
				-    } else { # amd64
			
 
				-        build_cpu("x64")
			
 
				-        build_cpu_avx
			
 
				-        build_cpu_avx2
			
 
				-        build_cuda
			
 
				-        build_oneapi
			
 
				-        build_rocm
			
 
				-    }
			
 
				-
			
 
				-    cleanup
			
 
				-    write-host "`ngo generate completed.  LLM runners: $(get-childitem -path $script:DIST_BASE)"
			
 
				-} else {
			
 
				-    for ( $i = 0; $i -lt $args.count; $i++ ) {
			
 
				-        write-host "performing $($args[$i])"
			
 
				-        & $($args[$i])
			
 
				-    } 
			
 
				-}
			
 
				+cleanup
			
 
				+write-host "`code generation completed.  LLM runners: $(get-childitem -path ${script:SRC_DIR}\llm\build\windows\${script:ARCH})"
			
--- a/llm/generate/generate_darwin.go
+++ b/llm/generate/generate_darwin.go
@@ -1,3 +0,0 @@
 
				-package generate
			
 
				-
			
 
				-//go:generate bash ./gen_darwin.sh
			
--- a/llm/generate/generate_linux.go
+++ b/llm/generate/generate_linux.go
@@ -1,3 +0,0 @@
 
				-package generate
			
 
				-
			
 
				-//go:generate bash ./gen_linux.sh
			
--- a/llm/generate/generate_windows.go
+++ b/llm/generate/generate_windows.go
@@ -1,3 +0,0 @@
 
				-package generate
			
 
				-
			
 
				-//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1
			
--- a/main.go
+++ b/main.go
@@ -1,5 +1,7 @@
 
				 package main
			
 
				 
			
 
				+//go:generate go run build.go -g -s
			
 
				+
			
 
				 import (
			
 
				 	"context"