1 year ago · d88c527be3
--- a/Dockerfile.build
+++ b/Dockerfile.build
@@ -49,6 +49,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
 
															 FROM --platform=linux/amd64 centos:7 AS cpu-build-amd64
														
 
															 ARG CMAKE_VERSION
														
 
															 ARG GOLANG_VERSION
														
 
															+ARG OLLAMA_CUSTOM_CPU_DEFS
														
 
															 COPY ./scripts/rh_linux_deps.sh /
														
 
															 RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
														
 
															 ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
														
@@ -59,6 +60,7 @@ RUN sh gen_linux.sh
 
															 FROM --platform=linux/arm64 centos:7 AS cpu-build-arm64
														
 
															 ARG CMAKE_VERSION
														
 
															 ARG GOLANG_VERSION
														
 
															+ARG OLLAMA_CUSTOM_CPU_DEFS
														
 
															 COPY ./scripts/rh_linux_deps.sh /
														
 
															 RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
														
 
															 ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
														
--- a/docs/development.md
+++ b/docs/development.md
@@ -76,6 +76,22 @@ go build .
 
															 ROCm requires elevated privileges to access the GPU at runtime.  On most distros you can add your user account to the `render` group, or run as root.
														
 
															+#### Advanced CPU Settings
														
 
															+
														
 
															+By default, running `go generate ./...` will compile a few different variations
														
 
															+of the LLM library based on common CPU families and vector math capabilities,
														
 
															+including a lowest-common-denominator which should run on almost any 64 bit CPU
														
 
															+somewhat slowly.  At runtime, Ollama will auto-detect the optimal variation to
														
 
															+load.  If you would like to build a CPU-based build customized for your
														
 
															+processor, you can set `OLLAMA_CUSTOM_CPU_DEFS` to the llama.cpp flags you would
														
 
															+like to use.  For example, to compile an optimized binary for an Intel i9-9880H,
														
 
															+you might use:
														
 
															+
														
 
															+```
														
 
															+OLLAMA_CUSTOM_CPU_DEFS="-DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_F16C=on -DLLAMA_FMA=on" go generate ./...
														
 
															+go build .
														
 
															+```
														
 
															+
														
 
															 #### Containerized Linux Build
														
 
															 If you have Docker available, you can build linux binaries with `./scripts/build_linux.sh` which has the CUDA and ROCm dependencies included.  The resulting binary is placed in `./dist`
														
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -16,7 +16,38 @@ If manually running `ollama serve` in a terminal, the logs will be on that termi
 
															 Join the [Discord](https://discord.gg/ollama) for help interpreting the logs.
														
 
															-## Known issues
														
 
															+## LLM libraries
														
 
															+
														
 
															+Ollama includes multiple LLM libraries compiled for different GPUs and CPU
														
 
															+vector features.  Ollama tries to pick the best one based on the capabilities of
														
 
															+your system.  If this autodetection has problems, or you run into other problems
														
 
															+(e.g. crashes in your GPU) you can workaround this by forcing a specific LLM
														
 
															+library.  `cpu_avx2` will perform the best, followed by `cpu_avx` an the slowest
														
 
															+but most compatible is `cpu`.  Rosetta emulation under MacOS will work with the
														
 
															+`cpu` library. 
														
 
															+
														
 
															+In the server log, you will see a message that looks something like this (varies
														
 
															+from release to release):
														
 
															+
														
 
															+```
														
 
															+Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v11 rocm_v5]
														
 
															+```
														
 
															+
														
 
															+**Experimental LLM Library Override**
														
 
															+
														
 
															+You can set OLLAMA_LLM_LIBRARY to any of the available LLM libraries to bypass
														
 
															+autodetection, so for example, if you have a CUDA card, but want to force the
														
 
															+CPU LLM library with AVX2 vector support, use:
														
 
															+```
														
 
															+OLLAMA_LLM_LIBRARY="cpu_avx2" ollama serve
														
 
															+```
														
 
															+
														
 
															+You can see what features your CPU has with the following.  
														
 
															+```
														
 
															+cat /proc/cpuinfo| grep flags  | head -1
														
 
															+```
														
 
															+
														
 
															+## Known issues
														
 
															-* `signal: illegal instruction (core dumped)`: Ollama requires AVX support from the CPU. This was introduced in 2011 and CPUs started offering it in 2012. CPUs from before that and some lower end CPUs after that may not have AVX support and thus are not supported by Ollama. Some users have had luck with building Ollama on their machines disabling the need for AVX.
														
 
															+* N/A
														
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -158,12 +158,8 @@ func GetGPUInfo() GpuInfo {
 
															 	}
														
 
															 	if resp.Library == "" {
														
 
															 		C.cpu_check_ram(&memInfo)
														
 
															-		// In the future we may offer multiple CPU variants to tune CPU features
														
 
															-		if runtime.GOOS == "windows" {
														
 
															-			resp.Library = "cpu"
														
 
															-		} else {
														
 
															-			resp.Library = "default"
														
 
															-		}
														
 
															+		resp.Library = "cpu"
														
 
															+		resp.Variant = GetCPUVariant()
														
 
															 	}
														
 
															 	if memInfo.err != nil {
														
 
															 		log.Printf("error looking up CPU memory: %s", C.GoString(memInfo.err))
														
--- a/gpu/gpu_darwin.go
+++ b/gpu/gpu_darwin.go
@@ -49,3 +49,8 @@ func getCPUMem() (memInfo, error) {
 
															 func nativeInit() error {
														
 
															 	return nil
														
 
															 }
														
 
															+
														
 
															+func GetCPUVariant() string {
														
 
															+	// We don't yet have CPU based builds for Darwin...
														
 
															+	return ""
														
 
															+}
														
--- a/llm/ext_server_windows.go
+++ b/llm/ext_server_windows.go
@@ -1,3 +1,5 @@
 
															+//go:build !darwin
														
 
															+
														
 
															 package llm
														
 
															 import (
														
@@ -7,9 +9,9 @@ import (
 
															 )
														
 
															 func newDefaultExtServer(model string, adapters, projectors []string, opts api.Options) (extServer, error) {
														
 
															-	// On windows we always load the llama.cpp libraries dynamically to avoid startup DLL dependencies
														
 
															+	// On windows and linux we always load the llama.cpp libraries dynamically to avoid startup DLL dependencies
														
 
															 	// This ensures we can update the PATH at runtime to get everything loaded
														
 
															 	// This should never happen as we'll always try to load one or more cpu dynamic libaries before hitting default
														
 
															-	return nil, fmt.Errorf("no available default llm library on windows")
														
 
															+	return nil, fmt.Errorf("no available default llm library")
														
 
															 }
														
--- a/llm/ext_server_common.go
+++ b/llm/ext_server_common.go
@@ -15,12 +15,6 @@ package llm
 
															 #cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libllama.a
														
 
															 #cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libggml_static.a
														
 
															 #cgo linux CFLAGS: -D_GNU_SOURCE
														
 
															-#cgo linux windows CFLAGS: -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_USE_CUBLAS
														
 
															-#cgo linux LDFLAGS: -L/usr/local/cuda/targets/x86_64-linux/lib -L/usr/local/cuda/lib64 -L/usr/local/cuda/targets/x86_64-linux/lib/stubs
														
 
															-#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libext_server.a
														
 
															-#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libcommon.a
														
 
															-#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libllama.a
														
 
															-#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/build/linux/cpu/lib/libggml_static.a
														
 
															 #cgo linux LDFLAGS: -lrt -ldl -lstdc++ -lm
														
 
															 #cgo linux windows LDFLAGS: -lpthread
														
@@ -43,6 +37,8 @@ import (
 
															 	"github.com/jmorganca/ollama/api"
														
 
															 )
														
 
															+// TODO switch Linux to always be dynamic
														
 
															+// If that works out, then look at the impact of doing the same for Mac
														
 
															 type extServer interface {
														
 
															 	LLM
														
 
															 	llama_server_init(sparams *C.ext_server_params_t, err *C.ext_server_resp_t)
														
--- a/llm/ext_server_default.go
+++ b/llm/ext_server_default.go
@@ -1,4 +1,4 @@
 
															-//go:build !windows
														
 
															+//go:build darwin
														
 
															 package llm
														
@@ -14,6 +14,8 @@ import (
 
															 	"github.com/jmorganca/ollama/api"
														
 
															 )
														
 
															+// TODO - explore shifting Darwin to a dynamic loading pattern for consistency with Linux and Windows
														
 
															+
														
 
															 type llamaExtServer struct {
														
 
															 	api.Options
														
 
															 }
														
--- a/llm/generate/gen_common.sh
+++ b/llm/generate/gen_common.sh
@@ -51,6 +51,16 @@ install() {
 
															     cp ${BUILD_DIR}/libggml_static.a ${BUILD_DIR}/lib
														
 
															 }
														
 
															+link_server_lib() {
														
 
															+    gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
														
 
															+        -Wl,--whole-archive \
														
 
															+        ${BUILD_DIR}/lib/libext_server.a \
														
 
															+        -Wl,--no-whole-archive \
														
 
															+        ${BUILD_DIR}/lib/libcommon.a \
														
 
															+        ${BUILD_DIR}/lib/libllama.a
														
 
															+
														
 
															+}
														
 
															+
														
 
															 # Keep the local tree clean after we're done with the build
														
 
															 cleanup() {
														
 
															     (cd ${LLAMACPP_DIR}/examples/server/ && git checkout CMakeLists.txt server.cpp)
														
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -49,17 +49,68 @@ git_module_setup
 
															 apply_patches
														
 
															 if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
														
 
															-    #
														
 
															-    # CPU first for the default library
														
 
															-    #
														
 
															-    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
														
 
															-    BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
														
 
															+    # Users building from source can tune the exact flags we pass to cmake for configuring
														
 
															+    # llama.cpp, and we'll build only 1 CPU variant in that case as the default.
														
 
															+    if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
														
 
															+        echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
														
 
															+        CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
														
 
															+        BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
														
 
															+        echo "Building custom CPU"
														
 
															+        build
														
 
															+        install
														
 
															+        link_server_lib
														
 
															+    else
														
 
															+        # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
														
 
															+        # -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
														
 
															+        # -DLLAMA_F16C -- 2012 Intel Ivy Bridge & AMD 2011 Bulldozer (No significant improvement over just AVX)
														
 
															+        # -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
														
 
															+        # -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
														
 
															+        # Note: the following seem to yield slower results than AVX2 - ymmv
														
 
															+        # -DLLAMA_AVX512 -- 2017 Intel Skylake and High End DeskTop (HEDT)
														
 
															+        # -DLLAMA_AVX512_VBMI -- 2018 Intel Cannon Lake
														
 
															+        # -DLLAMA_AVX512_VNNI -- 2021 Intel Alder Lake
														
 
															-    build
														
 
															-    install
														
 
															+        COMMON_CPU_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off"
														
 
															+        #
														
 
															+        # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
														
 
															+        #
														
 
															+        CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
														
 
															+        BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
														
 
															+        echo "Building LCD CPU"
														
 
															+        build
														
 
															+        install
														
 
															+        link_server_lib
														
 
															-    # Placeholder to keep go embed happy until we start building dynamic CPU lib variants
														
 
															-    touch ${BUILD_DIR}/lib/dummy.so
														
 
															+        #
														
 
															+        # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
														
 
															+        # Approximately 400% faster than LCD on same CPU
														
 
															+        #
														
 
															+        init_vars
														
 
															+        CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
														
 
															+        BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu_avx"
														
 
															+        echo "Building AVX CPU"
														
 
															+        build
														
 
															+        install
														
 
															+        link_server_lib
														
 
															+
														
 
															+        #
														
 
															+        # ~2013 CPU Dynamic library
														
 
															+        # Approximately 10% faster than AVX on same CPU
														
 
															+        #
														
 
															+        init_vars
														
 
															+        CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
														
 
															+        BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu_avx2"
														
 
															+        echo "Building AVX2 CPU"
														
 
															+        build
														
 
															+        install
														
 
															+        link_server_lib
														
 
															+        gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
														
 
															+            -Wl,--whole-archive \
														
 
															+            ${BUILD_DIR}/lib/libext_server.a \
														
 
															+            -Wl,--no-whole-archive \
														
 
															+            ${BUILD_DIR}/lib/libcommon.a \
														
 
															+            ${BUILD_DIR}/lib/libllama.a
														
 
															+    fi
														
 
															 else
														
 
															     echo "Skipping CPU generation step as requested"
														
 
															 fi
														
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -139,7 +139,22 @@ func Init(workdir string) error {
 
															 }
														
 
															 func newLlmServer(gpuInfo gpu.GpuInfo, model string, adapters, projectors []string, opts api.Options) (extServer, error) {
														
 
															-	for _, shim := range getShims(gpuInfo) {
														
 
															+	shims := getShims(gpuInfo)
														
 
															+
														
 
															+	// Check to see if the user has requested a specific library instead of auto-detecting
														
 
															+	demandLib := os.Getenv("OLLAMA_LLM_LIBRARY")
														
 
															+	if demandLib != "" {
														
 
															+		libPath := availableShims[demandLib]
														
 
															+		if libPath == "" {
														
 
															+			log.Printf("Invalid OLLAMA_LLM_LIBRARY %s - not found", demandLib)
														
 
															+		} else {
														
 
															+			log.Printf("Loading OLLAMA_LLM_LIBRARY=%s", demandLib)
														
 
															+			shims = []string{libPath}
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	for _, shim := range shims {
														
 
															+		// TODO - only applies on Darwin (switch to fully dynamic there too...)
														
 
															 		if shim == "default" {
														
 
															 			break
														
 
															 		}
														
--- a/llm/shim.go
+++ b/llm/shim.go
@@ -15,14 +15,20 @@ import (
 
															 	"github.com/jmorganca/ollama/gpu"
														
 
															 )
														
 
															-// Shims names may contain an optional variant separated by '_'
														
 
															+// Libraries names may contain an optional variant separated by '_'
														
 
															 // For example, "rocm_v6" and "rocm_v5" or "cpu" and "cpu_avx2"
														
 
															+// Any library without a variant is the lowest common denominator
														
 
															 var availableShims = map[string]string{}
														
 
															 const pathComponentCount = 6
														
 
															 // getShims returns an ordered list of shims to try, starting with the best
														
 
															 func getShims(gpuInfo gpu.GpuInfo) []string {
														
 
															+	// Short circuit if we know we're using the default built-in (darwin only)
														
 
															+	if gpuInfo.Library == "default" {
														
 
															+		return []string{"default"}
														
 
															+	}
														
 
															+
														
 
															 	exactMatch := ""
														
 
															 	shims := []string{}
														
 
															 	altShims := []string{}
														
@@ -30,30 +36,18 @@ func getShims(gpuInfo gpu.GpuInfo) []string {
 
															 	if gpuInfo.Variant != "" {
														
 
															 		requested += "_" + gpuInfo.Variant
														
 
															 	}
														
 
															-	// First try to find an exact match
														
 
															+	// Try to find an exact match
														
 
															 	for cmp := range availableShims {
														
 
															 		if requested == cmp {
														
 
															 			exactMatch = cmp
														
 
															-			shims = append(shims, availableShims[cmp])
														
 
															+			shims = []string{availableShims[cmp]}
														
 
															 			break
														
 
															 		}
														
 
															 	}
														
 
															-	// Then load alternates and sort the list for consistent load ordering
														
 
															-	for cmp := range availableShims {
														
 
															-		if gpuInfo.Library == strings.Split(cmp, "_")[0] && cmp != exactMatch {
														
 
															-			altShims = append(altShims, cmp)
														
 
															-		}
														
 
															-	}
														
 
															-	slices.Sort(altShims)
														
 
															-	for _, altShim := range altShims {
														
 
															-		shims = append(shims, availableShims[altShim])
														
 
															-	}
														
 
															-
														
 
															-	// Load up the CPU alternates if not primary requested
														
 
															+	// Then for GPUs load alternates and sort the list for consistent load ordering
														
 
															 	if gpuInfo.Library != "cpu" {
														
 
															-		altShims = []string{}
														
 
															 		for cmp := range availableShims {
														
 
															-			if strings.Split(cmp, "_")[0] == "cpu" {
														
 
															+			if gpuInfo.Library == strings.Split(cmp, "_")[0] && cmp != exactMatch {
														
 
															 				altShims = append(altShims, cmp)
														
 
															 			}
														
 
															 		}
														
@@ -62,8 +56,30 @@ func getShims(gpuInfo gpu.GpuInfo) []string {
 
															 			shims = append(shims, availableShims[altShim])
														
 
															 		}
														
 
															 	}
														
 
															-	// default is always last as the lowest common denominator
														
 
															-	shims = append(shims, "default")
														
 
															+
														
 
															+	// Load up the best CPU variant if not primary requested
														
 
															+	if gpuInfo.Library != "cpu" {
														
 
															+		variant := gpu.GetCPUVariant()
														
 
															+		// If no variant, then we fall back to default
														
 
															+		// If we have a variant, try that if we find an exact match
														
 
															+		// Attempting to run the wrong CPU instructions will panic the
														
 
															+		// process
														
 
															+		if variant != "" {
														
 
															+			for cmp := range availableShims {
														
 
															+				if cmp == "cpu_"+variant {
														
 
															+					shims = append(shims, availableShims[cmp])
														
 
															+					break
														
 
															+				}
														
 
															+			}
														
 
															+		} else {
														
 
															+			shims = append(shims, availableShims["cpu"])
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	// Finaly, if we didn't find any matches, LCD CPU FTW
														
 
															+	if len(shims) == 0 {
														
 
															+		shims = []string{availableShims["cpu"]}
														
 
															+	}
														
 
															 	return shims
														
 
															 }
														
@@ -116,7 +132,8 @@ func nativeInit(workdir string) error {
 
															 		variants[i] = variant
														
 
															 		i++
														
 
															 	}
														
 
															-	log.Printf("Dynamic LLM variants %v", variants)
														
 
															+	log.Printf("Dynamic LLM libraries %v", variants)
														
 
															+	log.Printf("Override detection logic by setting OLLAMA_LLM_LIBRARY")
														
 
															 	return nil
														
 
															 }
														
--- a/llm/shim_ext_server_linux.go
+++ b/llm/shim_ext_server_linux.go
@@ -11,13 +11,13 @@ import (
 
															 var libEmbed embed.FS
														
 
															 func updatePath(dir string) {
														
 
															-	pathComponents := strings.Split(os.Getenv("PATH"), ":")
														
 
															+	pathComponents := strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
														
 
															 	for _, comp := range pathComponents {
														
 
															 		if comp == dir {
														
 
															 			return
														
 
															 		}
														
 
															 	}
														
 
															-	newPath := strings.Join(append(pathComponents, dir), ":")
														
 
															-	log.Printf("Updating PATH to %s", newPath)
														
 
															-	os.Setenv("PATH", newPath)
														
 
															+	newPath := strings.Join(append([]string{dir}, pathComponents...), ":")
														
 
															+	log.Printf("Updating LD_LIBRARY_PATH to %s", newPath)
														
 
															+	os.Setenv("LD_LIBRARY_PATH", newPath)
														
 
															 }
														
--- a/llm/shim_test.go
+++ b/llm/shim_test.go
@@ -13,9 +13,8 @@ func TestGetShims(t *testing.T) {
 
															 	}
														
 
															 	assert.Equal(t, false, rocmShimPresent())
														
 
															 	res := getShims(gpu.GpuInfo{Library: "cpu"})
														
 
															-	assert.Len(t, res, 2)
														
 
															+	assert.Len(t, res, 1)
														
 
															 	assert.Equal(t, availableShims["cpu"], res[0])
														
 
															-	assert.Equal(t, "default", res[1])
														
 
															 	availableShims = map[string]string{
														
 
															 		"rocm_v5": "X_rocm_v5",
														
@@ -24,28 +23,24 @@ func TestGetShims(t *testing.T) {
 
															 	}
														
 
															 	assert.Equal(t, true, rocmShimPresent())
														
 
															 	res = getShims(gpu.GpuInfo{Library: "rocm"})
														
 
															-	assert.Len(t, res, 4)
														
 
															+	assert.Len(t, res, 3)
														
 
															 	assert.Equal(t, availableShims["rocm_v5"], res[0])
														
 
															 	assert.Equal(t, availableShims["rocm_v6"], res[1])
														
 
															 	assert.Equal(t, availableShims["cpu"], res[2])
														
 
															-	assert.Equal(t, "default", res[3])
														
 
															 	res = getShims(gpu.GpuInfo{Library: "rocm", Variant: "v6"})
														
 
															-	assert.Len(t, res, 4)
														
 
															+	assert.Len(t, res, 3)
														
 
															 	assert.Equal(t, availableShims["rocm_v6"], res[0])
														
 
															 	assert.Equal(t, availableShims["rocm_v5"], res[1])
														
 
															 	assert.Equal(t, availableShims["cpu"], res[2])
														
 
															-	assert.Equal(t, "default", res[3])
														
 
															 	res = getShims(gpu.GpuInfo{Library: "cuda"})
														
 
															-	assert.Len(t, res, 2)
														
 
															+	assert.Len(t, res, 1)
														
 
															 	assert.Equal(t, availableShims["cpu"], res[0])
														
 
															-	assert.Equal(t, "default", res[1])
														
 
															 	res = getShims(gpu.GpuInfo{Library: "default"})
														
 
															-	assert.Len(t, res, 2)
														
 
															-	assert.Equal(t, availableShims["cpu"], res[0])
														
 
															-	assert.Equal(t, "default", res[1])
														
 
															+	assert.Len(t, res, 1)
														
 
															+	assert.Equal(t, "default", res[0])
														
 
															 	availableShims = map[string]string{
														
 
															 		"rocm": "X_rocm_v5",
														
@@ -53,9 +48,7 @@ func TestGetShims(t *testing.T) {
 
															 	}
														
 
															 	assert.Equal(t, true, rocmShimPresent())
														
 
															 	res = getShims(gpu.GpuInfo{Library: "rocm", Variant: "v6"})
														
 
															-	assert.Len(t, res, 3)
														
 
															+	assert.Len(t, res, 2)
														
 
															 	assert.Equal(t, availableShims["rocm"], res[0])
														
 
															 	assert.Equal(t, availableShims["cpu"], res[1])
														
 
															-	assert.Equal(t, "default", res[2])
														
 
															-
														
 
															 }
														
--- a/scripts/build_linux.sh
+++ b/scripts/build_linux.sh
@@ -9,7 +9,7 @@ BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"}
 
															 mkdir -p dist
														
 
															 for TARGETARCH in ${BUILD_ARCH}; do
														
 
															-    docker build --platform=linux/$TARGETARCH --build-arg=GOFLAGS --build-arg=CGO_CFLAGS -f Dockerfile.build -t builder:$TARGETARCH .
														
 
															+    docker build --platform=linux/$TARGETARCH --build-arg=GOFLAGS --build-arg=CGO_CFLAGS --build-arg=OLLAMA_CUSTOM_CPU_DEFS -f Dockerfile.build -t builder:$TARGETARCH .
														
 
															     docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
														
 
															     docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH
														
 
															     docker rm builder-$TARGETARCH