1 năm trước cách đây · 1b249748ab
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -86,6 +86,9 @@ jobs:
 
															           - os: windows-latest
														
 
															             arch: arm64
														
 
															     runs-on: ${{ matrix.os }}
														
 
															+    env:
														
 
															+      GOARCH: ${{ matrix.arch }}
														
 
															+      CGO_ENABLED: "1"
														
 
															     steps:
														
 
															       - uses: actions/checkout@v4
														
 
															         with:
														
--- a/Dockerfile.build
+++ b/Dockerfile.build
@@ -10,6 +10,7 @@ COPY llm llm
 
															 FROM --platform=linux/amd64 nvidia/cuda:$CUDA_VERSION-devel-centos7 AS cuda-build-amd64
														
 
															 ARG CMAKE_VERSION
														
 
															+ARG CGO_CFLAGS
														
 
															 COPY ./scripts/rh_linux_deps.sh /
														
 
															 RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
														
 
															 ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
														
@@ -19,6 +20,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
 
															 FROM --platform=linux/arm64 nvidia/cuda:$CUDA_VERSION-devel-rockylinux8 AS cuda-build-arm64
														
 
															 ARG CMAKE_VERSION
														
 
															+ARG CGO_CFLAGS
														
 
															 COPY ./scripts/rh_linux_deps.sh /
														
 
															 RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
														
 
															 ENV PATH /opt/rh/gcc-toolset-10/root/usr/bin:$PATH
														
@@ -28,6 +30,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
 
															 FROM --platform=linux/amd64 rocm/dev-centos-7:5.7.1-complete AS rocm-5-build-amd64
														
 
															 ARG CMAKE_VERSION
														
 
															+ARG CGO_CFLAGS
														
 
															 COPY ./scripts/rh_linux_deps.sh /
														
 
															 RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
														
 
															 ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
														
@@ -38,6 +41,7 @@ RUN OLLAMA_SKIP_CPU_GENERATE=1 sh gen_linux.sh
 
															 FROM --platform=linux/amd64 rocm/dev-centos-7:6.0-complete AS rocm-6-build-amd64
														
 
															 ARG CMAKE_VERSION
														
 
															+ARG CGO_CFLAGS
														
 
															 COPY ./scripts/rh_linux_deps.sh /
														
 
															 RUN CMAKE_VERSION=${CMAKE_VERSION} sh /rh_linux_deps.sh
														
 
															 ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
														
@@ -50,6 +54,7 @@ FROM --platform=linux/amd64 centos:7 AS cpu-build-amd64
 
															 ARG CMAKE_VERSION
														
 
															 ARG GOLANG_VERSION
														
 
															 ARG OLLAMA_CUSTOM_CPU_DEFS
														
 
															+ARG CGO_CFLAGS
														
 
															 COPY ./scripts/rh_linux_deps.sh /
														
 
															 RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
														
 
															 ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
														
@@ -61,6 +66,7 @@ FROM --platform=linux/arm64 centos:7 AS cpu-build-arm64
 
															 ARG CMAKE_VERSION
														
 
															 ARG GOLANG_VERSION
														
 
															 ARG OLLAMA_CUSTOM_CPU_DEFS
														
 
															+ARG CGO_CFLAGS
														
 
															 COPY ./scripts/rh_linux_deps.sh /
														
 
															 RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /rh_linux_deps.sh
														
 
															 ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
														
@@ -72,7 +78,7 @@ RUN sh gen_linux.sh
 
															 FROM --platform=linux/amd64 cpu-build-amd64 AS build-amd64
														
 
															 ENV CGO_ENABLED 1
														
 
															 ARG GOFLAGS
														
 
															-ARG CGO_FLAGS
														
 
															+ARG CGO_CFLAGS
														
 
															 WORKDIR /go/src/github.com/jmorganca/ollama
														
 
															 COPY . .
														
 
															 COPY --from=cuda-build-amd64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
														
@@ -84,7 +90,7 @@ FROM --platform=linux/arm64 cpu-build-arm64 AS build-arm64
 
															 ENV CGO_ENABLED 1
														
 
															 ARG GOLANG_VERSION
														
 
															 ARG GOFLAGS
														
 
															-ARG CGO_FLAGS
														
 
															+ARG CGO_CFLAGS
														
 
															 WORKDIR /go/src/github.com/jmorganca/ollama
														
 
															 COPY . .
														
 
															 COPY --from=cuda-build-arm64 /go/src/github.com/jmorganca/ollama/llm/llama.cpp/build/linux/ llm/llama.cpp/build/linux/
														
--- a/llm/dyn_ext_server.c
+++ b/llm/dyn_ext_server.c
@@ -5,7 +5,7 @@
 
															 #ifdef __linux__
														
 
															 #include <dlfcn.h>
														
 
															-#define LOAD_LIBRARY(lib, flags) dlopen(lib, flags | RTLD_DEEPBIND)
														
 
															+#define LOAD_LIBRARY(lib, flags) dlopen(lib, flags)
														
 
															 #define LOAD_SYMBOL(handle, sym) dlsym(handle, sym)
														
 
															 #define LOAD_ERR() strdup(dlerror())
														
 
															 #define UNLOAD_LIBRARY(handle) dlclose(handle)
														
@@ -58,8 +58,8 @@ void dyn_init(const char *libPath, struct dynamic_llama_server *s,
 
															       {"", NULL},
														
 
															   };
														
 
															-  printf("loading %s library\n", libPath);
														
 
															-  s->handle = LOAD_LIBRARY(libPath, RTLD_NOW);
														
 
															+  printf("loading library %s\n", libPath);
														
 
															+  s->handle = LOAD_LIBRARY(libPath, RTLD_GLOBAL|RTLD_NOW);
														
 
															   if (!s->handle) {
														
 
															     err->id = -1;
														
 
															     char *msg = LOAD_ERR();
														
--- a/llm/dyn_ext_server.go
+++ b/llm/dyn_ext_server.go
@@ -372,15 +372,6 @@ func updatePath(dir string) {
 
															 		newPath := strings.Join(append([]string{dir}, pathComponents...), ";")
														
 
															 		log.Printf("Updating PATH to %s", newPath)
														
 
															 		os.Setenv("PATH", newPath)
														
 
															-	} else {
														
 
															-		pathComponents := strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
														
 
															-		for _, comp := range pathComponents {
														
 
															-			if comp == dir {
														
 
															-				return
														
 
															-			}
														
 
															-		}
														
 
															-		newPath := strings.Join(append([]string{dir}, pathComponents...), ":")
														
 
															-		log.Printf("Updating LD_LIBRARY_PATH to %s", newPath)
														
 
															-		os.Setenv("LD_LIBRARY_PATH", newPath)
														
 
															 	}
														
 
															+	// linux and darwin rely on rpath
														
 
															 }
														
--- a/llm/ext_server/CMakeLists.txt
+++ b/llm/ext_server/CMakeLists.txt
@@ -2,28 +2,24 @@
 
															 set(TARGET ext_server)
														
 
															 option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
														
 
															-add_library(${TARGET} STATIC ../../../ext_server/ext_server.cpp)
														
 
															+if (WIN32)
														
 
															+    add_library(${TARGET} SHARED ../../../ext_server/ext_server.cpp ../../llama.cpp)
														
 
															+else()
														
 
															+    add_library(${TARGET} STATIC ../../../ext_server/ext_server.cpp ../../llama.cpp)
														
 
															+endif()
														
 
															 target_include_directories(${TARGET} PRIVATE ../../common)
														
 
															 target_include_directories(${TARGET} PRIVATE ../..)
														
 
															 target_include_directories(${TARGET} PRIVATE ../../..)
														
 
															 target_compile_features(${TARGET} PRIVATE cxx_std_11)
														
 
															 target_compile_definitions(${TARGET} PUBLIC LLAMA_SERVER_LIBRARY=1)
														
 
															-target_link_libraries(${TARGET} PRIVATE common llama llava ${CMAKE_THREAD_LIBS_INIT})
														
 
															-target_compile_definitions(${TARGET} PRIVATE
														
 
															-    SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
														
 
															-)
														
 
															-
														
 
															-if (BUILD_SHARED_LIBS)
														
 
															-    set_target_properties(ext_server PROPERTIES POSITION_INDEPENDENT_CODE ON)
														
 
															-    target_compile_definitions(ext_server PRIVATE LLAMA_SHARED LLAMA_BUILD)
														
 
															-    add_library(ext_server_shared SHARED $<TARGET_OBJECTS:ext_server>)
														
 
															-    target_link_libraries(ext_server_shared PRIVATE ggml llama llava common ${CMAKE_THREAD_LIBS_INIT})
														
 
															-    install(TARGETS ext_server_shared LIBRARY)
														
 
															-endif()
														
 
															+target_link_libraries(${TARGET} PRIVATE ggml llava common )
														
 
															+set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
														
 
															+target_compile_definitions(${TARGET} PRIVATE SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>)
														
 
															+install(TARGETS ext_server LIBRARY)
														
 
															 if (CUDAToolkit_FOUND)
														
 
															     target_include_directories(${TARGET} PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
														
 
															     if (WIN32)
														
 
															-        target_link_libraries(ext_server_shared PRIVATE nvml)
														
 
															+        target_link_libraries(${TARGET} PRIVATE nvml)
														
 
															     endif()
														
 
															 endif()
														
--- a/llm/generate/gen_common.sh
+++ b/llm/generate/gen_common.sh
@@ -1,15 +1,44 @@
 
															 # common logic accross linux and darwin
														
 
															 init_vars() {
														
 
															+    case "${GOARCH}" in
														
 
															+    "amd64")
														
 
															+        ARCH="x86_64"
														
 
															+        ;;
														
 
															+    "arm64")
														
 
															+        ARCH="arm64"
														
 
															+        ;;
														
 
															+    *)
														
 
															+        ARCH=$(uname -m | sed -e "s/aarch64/arm64/g")
														
 
															+    esac
														
 
															+
														
 
															     LLAMACPP_DIR=../llama.cpp
														
 
															     CMAKE_DEFS=""
														
 
															-    CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
														
 
															+    CMAKE_TARGETS="--target ext_server"
														
 
															     if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
														
 
															-        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on"
														
 
															+        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on ${CMAKE_DEFS}"
														
 
															     else
														
 
															         # TODO - add additional optimization flags...
														
 
															-        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off"
														
 
															+        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off ${CMAKE_DEFS}"
														
 
															     fi
														
 
															+    case $(uname -s) in 
														
 
															+    "Darwin")
														
 
															+        LIB_EXT="dylib"
														
 
															+        WHOLE_ARCHIVE="-Wl,-force_load"
														
 
															+        NO_WHOLE_ARCHIVE=""
														
 
															+        GCC_ARCH="-arch ${ARCH}"
														
 
															+        ;;
														
 
															+    "Linux")
														
 
															+        LIB_EXT="so"
														
 
															+        WHOLE_ARCHIVE="-Wl,--whole-archive"
														
 
															+        NO_WHOLE_ARCHIVE="-Wl,--no-whole-archive"
														
 
															+
														
 
															+        # Cross compiling not supported on linux - Use docker
														
 
															+        GCC_ARCH=""
														
 
															+        ;;
														
 
															+    *)
														
 
															+        ;;
														
 
															+    esac
														
 
															 }
														
 
															 git_module_setup() {
														
@@ -40,25 +69,29 @@ apply_patches() {
 
															 build() {
														
 
															     cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
														
 
															     cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
														
 
															+    mkdir -p ${BUILD_DIR}/lib/
														
 
															+    g++ -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.${LIB_EXT} \
														
 
															+        ${GCC_ARCH} \
														
 
															+        ${WHOLE_ARCHIVE} ${BUILD_DIR}/examples/server/libext_server.a ${NO_WHOLE_ARCHIVE} \
														
 
															+        ${BUILD_DIR}/common/libcommon.a \
														
 
															+        ${BUILD_DIR}/libllama.a \
														
 
															+        -Wl,-rpath,\$ORIGIN \
														
 
															+        -lpthread -ldl -lm \
														
 
															+        ${EXTRA_LIBS}
														
 
															 }
														
 
															-install() {
														
 
															-    rm -rf ${BUILD_DIR}/lib
														
 
															-    mkdir -p ${BUILD_DIR}/lib
														
 
															-    cp ${BUILD_DIR}/examples/server/libext_server.a ${BUILD_DIR}/lib
														
 
															-    cp ${BUILD_DIR}/common/libcommon.a ${BUILD_DIR}/lib
														
 
															-    cp ${BUILD_DIR}/libllama.a ${BUILD_DIR}/lib
														
 
															-    cp ${BUILD_DIR}/libggml_static.a ${BUILD_DIR}/lib
														
 
															-}
														
 
															-
														
 
															-link_server_lib() {
														
 
															-    gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
														
 
															-        -Wl,--whole-archive \
														
 
															-        ${BUILD_DIR}/lib/libext_server.a \
														
 
															-        -Wl,--no-whole-archive \
														
 
															-        ${BUILD_DIR}/lib/libcommon.a \
														
 
															-        ${BUILD_DIR}/lib/libllama.a \
														
 
															-        -lstdc++
														
 
															+compress_libs() {
														
 
															+    echo "Compressing payloads to reduce overall binary size..."
														
 
															+    pids=""
														
 
															+    for lib in ${BUILD_DIR}/lib/*.${LIB_EXT}* ; do
														
 
															+        bzip2 -v9 ${lib} &
														
 
															+        pids+=" $!"
														
 
															+    done
														
 
															+    echo 
														
 
															+    for pid in ${pids}; do
														
 
															+        wait $pid
														
 
															+    done
														
 
															+    echo "Finished compression"
														
 
															 }
														
 
															 # Keep the local tree clean after we're done with the build
														
--- a/llm/generate/gen_darwin.sh
+++ b/llm/generate/gen_darwin.sh
@@ -9,16 +9,52 @@ set -o pipefail
 
															 echo "Starting darwin generate script"
														
 
															 source $(dirname $0)/gen_common.sh
														
 
															 init_vars
														
 
															-CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DCMAKE_SYSTEM_NAME=Darwin -DLLAMA_ACCELERATE=on ${CMAKE_DEFS}"
														
 
															-BUILD_DIR="${LLAMACPP_DIR}/build/darwin/metal"
														
 
															+git_module_setup
														
 
															+apply_patches
														
 
															+
														
 
															+COMMON_DARWIN_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DCMAKE_SYSTEM_NAME=Darwin -DLLAMA_ACCELERATE=off"
														
 
															+
														
 
															 case "${GOARCH}" in
														
 
															 "amd64")
														
 
															-    CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_METAL=off -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
														
 
															-    ARCH="x86_64"
														
 
															+    COMMON_CPU_DEFS="${COMMON_DARWIN_DEFS} -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} -DLLAMA_METAL=off -DLLAMA_NATIVE=off"
														
 
															+
														
 
															+    #
														
 
															+    # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
														
 
															+    #
														
 
															+    CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
														
 
															+    BUILD_DIR="${LLAMACPP_DIR}/build/darwin/${ARCH}/cpu"
														
 
															+    echo "Building LCD CPU"
														
 
															+    build
														
 
															+    compress_libs
														
 
															+
														
 
															+    #
														
 
															+    # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
														
 
															+    # Approximately 400% faster than LCD on same CPU
														
 
															+    #
														
 
															+    init_vars
														
 
															+    CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
														
 
															+    BUILD_DIR="${LLAMACPP_DIR}/build/darwin/${ARCH}/cpu_avx"
														
 
															+    echo "Building AVX CPU"
														
 
															+    build
														
 
															+    compress_libs
														
 
															+
														
 
															+    #
														
 
															+    # ~2013 CPU Dynamic library
														
 
															+    # Approximately 10% faster than AVX on same CPU
														
 
															+    #
														
 
															+    init_vars
														
 
															+    CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
														
 
															+    BUILD_DIR="${LLAMACPP_DIR}/build/darwin/${ARCH}/cpu_avx2"
														
 
															+    echo "Building AVX2 CPU"
														
 
															+    build
														
 
															+    compress_libs
														
 
															     ;;
														
 
															 "arm64")
														
 
															-    CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DLLAMA_METAL=on ${CMAKE_DEFS}"
														
 
															-    ARCH="arm64"
														
 
															+    CMAKE_DEFS="${COMMON_DARWIN_DEFS} -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on ${CMAKE_DEFS}"
														
 
															+    BUILD_DIR="${LLAMACPP_DIR}/build/darwin/${ARCH}/metal"
														
 
															+    EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders"
														
 
															+    build
														
 
															+    compress_libs
														
 
															     ;;
														
 
															 *)
														
 
															     echo "GOARCH must be set"
														
@@ -27,21 +63,4 @@ case "${GOARCH}" in
 
															     ;;
														
 
															 esac
														
 
															-git_module_setup
														
 
															-apply_patches
														
 
															-build
														
 
															-install
														
 
															-gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
														
 
															-    -arch ${ARCH} \
														
 
															-    -Wl,-force_load ${BUILD_DIR}/lib/libext_server.a \
														
 
															-    ${BUILD_DIR}/lib/libcommon.a \
														
 
															-    ${BUILD_DIR}/lib/libllama.a \
														
 
															-    ${BUILD_DIR}/lib/libggml_static.a \
														
 
															-    -lpthread -ldl -lm -lc++ \
														
 
															-    -framework Accelerate \
														
 
															-    -framework Foundation \
														
 
															-    -framework Metal \
														
 
															-    -framework MetalKit \
														
 
															-    -framework MetalPerformanceShaders
														
 
															-
														
 
															 cleanup
														
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -2,16 +2,14 @@
 
															 # This script is intended to run inside the go generate
														
 
															 # working directory must be llm/generate/
														
 
															-# First we build our default built-in library which will be linked into the CGO
														
 
															-# binary as a normal dependency. This default build is CPU based.
														
 
															+# First we build one or more CPU based LLM libraries
														
 
															 #
														
 
															-# Then we build a CUDA dynamic library (although statically linked with the CUDA
														
 
															-# library dependencies for maximum portability)
														
 
															+# Then if we detect CUDA, we build a CUDA dynamic library, and carry the required
														
 
															+# library dependencies
														
 
															 #
														
 
															-# Then if we detect ROCm, we build a dynamically loaded ROCm lib.  ROCm is particularly
														
 
															-# important to be a dynamic lib even if it's the only GPU library detected because
														
 
															-# we can't redistribute the objectfiles but must rely on dynamic libraries at
														
 
															-# runtime, which could lead the server not to start if not present.
														
 
															+# Then if we detect ROCm, we build a dynamically loaded ROCm lib.  The ROCM
														
 
															+# libraries are quite large, and also dynamically load data files at runtime
														
 
															+# which in turn are large, so we don't attempt to cary them as payload
														
 
															 set -ex
														
 
															 set -o pipefail
														
@@ -59,11 +57,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
 
															     if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
														
 
															         echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
														
 
															         CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
														
 
															-        BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
														
 
															+        BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu"
														
 
															         echo "Building custom CPU"
														
 
															         build
														
 
															-        install
														
 
															-        link_server_lib
														
 
															+        compress_libs
														
 
															     else
														
 
															         # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
														
 
															         # -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
														
@@ -80,11 +77,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
 
															         # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
														
 
															         #
														
 
															         CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
														
 
															-        BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
														
 
															+        BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu"
														
 
															         echo "Building LCD CPU"
														
 
															         build
														
 
															-        install
														
 
															-        link_server_lib
														
 
															+        compress_libs
														
 
															         #
														
 
															         # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
														
@@ -92,11 +88,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
 
															         #
														
 
															         init_vars
														
 
															         CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
														
 
															-        BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu_avx"
														
 
															+        BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx"
														
 
															         echo "Building AVX CPU"
														
 
															         build
														
 
															-        install
														
 
															-        link_server_lib
														
 
															+        compress_libs
														
 
															         #
														
 
															         # ~2013 CPU Dynamic library
														
@@ -104,11 +99,10 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
 
															         #
														
 
															         init_vars
														
 
															         CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
														
 
															-        BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu_avx2"
														
 
															+        BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx2"
														
 
															         echo "Building AVX2 CPU"
														
 
															         build
														
 
															-        install
														
 
															-        link_server_lib
														
 
															+        compress_libs
														
 
															     fi
														
 
															 else
														
 
															     echo "Skipping CPU generation step as requested"
														
@@ -127,22 +121,27 @@ if [ -d "${CUDA_LIB_DIR}" ]; then
 
															         CUDA_VARIANT=_v${CUDA_MAJOR}
														
 
															     fi
														
 
															     CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
														
 
															-    BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda${CUDA_VARIANT}"
														
 
															+    BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cuda${CUDA_VARIANT}"
														
 
															+    EXTRA_LIBS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
														
 
															     build
														
 
															-    install
														
 
															-    gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
														
 
															-        -Wl,--whole-archive \
														
 
															-        ${BUILD_DIR}/lib/libext_server.a \
														
 
															-        ${BUILD_DIR}/lib/libcommon.a \
														
 
															-        ${BUILD_DIR}/lib/libllama.a \
														
 
															-        -Wl,--no-whole-archive \
														
 
															-        ${CUDA_LIB_DIR}/libcudart_static.a \
														
 
															-        ${CUDA_LIB_DIR}/libcublas_static.a \
														
 
															-        ${CUDA_LIB_DIR}/libcublasLt_static.a \
														
 
															-        ${CUDA_LIB_DIR}/libcudadevrt.a \
														
 
															-        ${CUDA_LIB_DIR}/libculibos.a \
														
 
															-        -lcuda \
														
 
															-        -lrt -lpthread -ldl -lstdc++ -lm
														
 
															+
														
 
															+    # Cary the CUDA libs as payloads to help reduce dependency burden on users
														
 
															+    #
														
 
															+    # TODO - in the future we may shift to packaging these separately and conditionally
														
 
															+    #        downloading them in the install script.
														
 
															+    DEPS="$(ldd ${BUILD_DIR}/lib/libext_server.so )"
														
 
															+    for lib in libcudart.so libcublas.so libcublasLt.so ; do
														
 
															+        DEP=$(echo "${DEPS}" | grep ${lib} | cut -f1 -d' ' | xargs || true)
														
 
															+        if [ -n "${DEP}" -a -e "${CUDA_LIB_DIR}/${DEP}" ]; then
														
 
															+            cp "${CUDA_LIB_DIR}/${DEP}" "${BUILD_DIR}/lib/"
														
 
															+        elif [ -e "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" ]; then
														
 
															+            cp "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" "${BUILD_DIR}/lib/"
														
 
															+        else
														
 
															+            cp -d "${CUDA_LIB_DIR}/${lib}*" "${BUILD_DIR}/lib/"
														
 
															+        fi
														
 
															+    done
														
 
															+    compress_libs
														
 
															+
														
 
															 fi
														
 
															 if [ -z "${ROCM_PATH}" ]; then
														
@@ -164,19 +163,13 @@ if [ -d "${ROCM_PATH}" ]; then
 
															     fi
														
 
															     init_vars
														
 
															     CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
														
 
															-    BUILD_DIR="${LLAMACPP_DIR}/build/linux/rocm${ROCM_VARIANT}"
														
 
															+    BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/rocm${ROCM_VARIANT}"
														
 
															+    EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,${ROCM_PATH}/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
														
 
															     build
														
 
															-    install
														
 
															-    gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
														
 
															-        -Wl,--whole-archive \
														
 
															-        ${BUILD_DIR}/lib/libext_server.a \
														
 
															-        ${BUILD_DIR}/lib/libcommon.a \
														
 
															-        ${BUILD_DIR}/lib/libllama.a \
														
 
															-        -Wl,--no-whole-archive \
														
 
															-        -lrt -lpthread -ldl -lstdc++ -lm \
														
 
															-        -L/opt/rocm/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ \
														
 
															-        -Wl,-rpath,/opt/rocm/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ \
														
 
															-        -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu
														
 
															+
														
 
															+    # Note: the ROCM libs and runtime library files are too large to embed, so we depend on
														
 
															+    #       them being present at runtime on the host
														
 
															+    compress_libs
														
 
															 fi
														
 
															 cleanup
														
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -5,7 +5,8 @@ $ErrorActionPreference = "Stop"
 
															 function init_vars {
														
 
															     $script:llamacppDir = "../llama.cpp"
														
 
															     $script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off",  "-A","x64")
														
 
															-    $script:cmakeTargets = @("ggml", "ggml_static", "llama", "build_info", "common", "ext_server_shared", "llava_static")
														
 
															+    $script:cmakeTargets = @("ext_server")
														
 
															+    $script:ARCH = "amd64" # arm not yet supported.
														
 
															     if ($env:CGO_CFLAGS -contains "-g") {
														
 
															         $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on")
														
 
															         $script:config = "RelWithDebInfo"
														
@@ -13,6 +14,17 @@ function init_vars {
 
															         $script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off")
														
 
															         $script:config = "Release"
														
 
															     }
														
 
															+    # Try to find the CUDA dir
														
 
															+    if ($env:CUDA_LIB_DIR -eq $null) {
														
 
															+        $d=(get-command -ea 'silentlycontinue' nvcc).path
														
 
															+        if ($d -ne $null) {
														
 
															+            $script:CUDA_LIB_DIR=($d| split-path -parent)
														
 
															+        }
														
 
															+    } else {
														
 
															+        $script:CUDA_LIB_DIR=$env:CUDA_LIB_DIR
														
 
															+    }
														
 
															+    $script:BZIP2=(get-command -ea 'silentlycontinue' bzip2).path
														
 
															+    $script:DUMPBIN=(get-command -ea 'silentlycontinue' dumpbin).path
														
 
															 }
														
 
															 function git_module_setup {
														
@@ -47,11 +59,25 @@ function build {
 
															 function install {
														
 
															     rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
														
 
															     md "${script:buildDir}/lib" -ea 0 > $null
														
 
															-    cp "${script:buildDir}/bin/${script:config}/ext_server_shared.dll" "${script:buildDir}/lib"
														
 
															+    cp "${script:buildDir}/bin/${script:config}/ext_server.dll" "${script:buildDir}/lib"
														
 
															     cp "${script:buildDir}/bin/${script:config}/llama.dll" "${script:buildDir}/lib"
														
 
															     # Display the dll dependencies in the build log
														
 
															-    dumpbin /dependents "${script:buildDir}/bin/${script:config}/ext_server_shared.dll" | select-string ".dll"
														
 
															+    if ($script:DUMPBIN -ne $null) {
														
 
															+        & "$script:DUMPBIN" /dependents "${script:buildDir}/bin/${script:config}/ext_server.dll" | select-string ".dll"
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+function compress_libs {
														
 
															+    if ($script:BZIP2 -eq $null) {
														
 
															+        write-host "bzip2 not installed, not compressing files"
														
 
															+        return
														
 
															+    }
														
 
															+    write-host "Compressing dlls..."
														
 
															+    $libs = dir "${script:buildDir}/lib/*.dll"
														
 
															+    foreach ($file in $libs) {
														
 
															+        & "$script:BZIP2" -v9 $file
														
 
															+    }
														
 
															 }
														
 
															 function cleanup {
														
@@ -71,33 +97,47 @@ apply_patches
 
															 $script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on", "-DLLAMA_NATIVE=off")
														
 
															 $script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
														
 
															-$script:buildDir="${script:llamacppDir}/build/windows/cpu"
														
 
															+$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu"
														
 
															 write-host "Building LCD CPU"
														
 
															 build
														
 
															 install
														
 
															+compress_libs
														
 
															 $script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
														
 
															-$script:buildDir="${script:llamacppDir}/build/windows/cpu_avx"
														
 
															+$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx"
														
 
															 write-host "Building AVX CPU"
														
 
															 build
														
 
															 install
														
 
															+compress_libs
														
 
															 $script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
														
 
															-$script:buildDir="${script:llamacppDir}/build/windows/cpu_avx2"
														
 
															+$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cpu_avx2"
														
 
															 write-host "Building AVX2 CPU"
														
 
															 build
														
 
															 install
														
 
															+compress_libs
														
 
															-# Then build cuda as a dynamically loaded library
														
 
															-# TODO figure out how to detect cuda version
														
 
															-init_vars
														
 
															-$script:buildDir="${script:llamacppDir}/build/windows/cuda"
														
 
															-$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on")
														
 
															-build
														
 
															-install
														
 
															-
														
 
															+if ($null -ne $script:CUDA_LIB_DIR) {
														
 
															+    # Then build cuda as a dynamically loaded library
														
 
															+    $nvcc = (get-command -ea 'silentlycontinue' nvcc)
														
 
															+    if ($null -ne $nvcc) {
														
 
															+        $script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
														
 
															+    }
														
 
															+    if ($null -ne $script:CUDA_VERSION) {
														
 
															+        $script:CUDA_VARIANT="_"+$script:CUDA_VERSION
														
 
															+    }
														
 
															+    init_vars
														
 
															+    $script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
														
 
															+    $script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on")
														
 
															+    build
														
 
															+    install
														
 
															+    cp "${script:CUDA_LIB_DIR}/cudart64_*.dll" "${script:buildDir}/lib"
														
 
															+    cp "${script:CUDA_LIB_DIR}/cublas64_*.dll" "${script:buildDir}/lib"
														
 
															+    cp "${script:CUDA_LIB_DIR}/cublasLt64_*.dll" "${script:buildDir}/lib"
														
 
															+    compress_libs
														
 
															+}
														
 
															 # TODO - actually implement ROCm support on windows
														
 
															-$script:buildDir="${script:llamacppDir}/build/windows/rocm"
														
 
															+$script:buildDir="${script:llamacppDir}/build/windows/${script:ARCH}/rocm"
														
 
															 rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
														
 
															 md "${script:buildDir}/lib" -ea 0 > $null
														
--- a/llm/payload_common.go
+++ b/llm/payload_common.go
@@ -1,9 +1,9 @@
 
															 package llm
														
 
															 import (
														
 
															+	"compress/bzip2"
														
 
															 	"errors"
														
 
															 	"fmt"
														
 
															-	"golang.org/x/exp/slices"
														
 
															 	"io"
														
 
															 	"io/fs"
														
 
															 	"log"
														
@@ -12,6 +12,9 @@ import (
 
															 	"runtime"
														
 
															 	"strings"
														
 
															+	"golang.org/x/exp/slices"
														
 
															+	"golang.org/x/sync/errgroup"
														
 
															+
														
 
															 	"github.com/jmorganca/ollama/gpu"
														
 
															 )
														
@@ -20,7 +23,7 @@ import (
 
															 // Any library without a variant is the lowest common denominator
														
 
															 var availableDynLibs = map[string]string{}
														
 
															-const pathComponentCount = 6
														
 
															+const pathComponentCount = 7
														
 
															 // getDynLibs returns an ordered list of LLM libraries to try, starting with the best
														
 
															 func getDynLibs(gpuInfo gpu.GpuInfo) []string {
														
@@ -100,6 +103,7 @@ func rocmDynLibPresent() bool {
 
															 }
														
 
															 func nativeInit(workdir string) error {
														
 
															+	log.Printf("Extracting dynamic libraries...")
														
 
															 	if runtime.GOOS == "darwin" {
														
 
															 		err := extractPayloadFiles(workdir, "llama.cpp/ggml-metal.metal")
														
 
															 		if err != nil {
														
@@ -113,7 +117,7 @@ func nativeInit(workdir string) error {
 
															 		os.Setenv("GGML_METAL_PATH_RESOURCES", workdir)
														
 
															 	}
														
 
															-	libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/lib/*")
														
 
															+	libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/*/lib/*")
														
 
															 	if err != nil {
														
 
															 		if err == payloadMissing {
														
 
															 			log.Printf("%s", payloadMissing)
														
@@ -151,45 +155,61 @@ func extractDynamicLibs(workDir, glob string) ([]string, error) {
 
															 	}
														
 
															 	libs := []string{}
														
 
															+	// TODO consider making this idempotent with some sort of persistent directory (where we store models probably)
														
 
															+	// and tracking by version so we don't reexpand the files every time
														
 
															+	// Also maybe consider lazy loading only what is needed
														
 
															+
														
 
															+	g := new(errgroup.Group)
														
 
															 	for _, file := range files {
														
 
															 		pathComps := strings.Split(file, "/")
														
 
															 		if len(pathComps) != pathComponentCount {
														
 
															 			log.Printf("unexpected payload components: %v", pathComps)
														
 
															 			continue
														
 
															 		}
														
 
															-		// llama.cpp/build/$OS/$VARIANT/lib/$LIBRARY
														
 
															-		// Include the variant in the path to avoid conflicts between multiple server libs
														
 
															-		targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
														
 
															-		srcFile, err := libEmbed.Open(file)
														
 
															-		if err != nil {
														
 
															-			return nil, fmt.Errorf("read payload %s: %v", file, err)
														
 
															-		}
														
 
															-		defer srcFile.Close()
														
 
															-		if err := os.MkdirAll(targetDir, 0o755); err != nil {
														
 
															-			return nil, fmt.Errorf("create payload temp dir %s: %v", workDir, err)
														
 
															-		}
														
 
															-
														
 
															-		destFile := filepath.Join(targetDir, filepath.Base(file))
														
 
															-		if strings.Contains(destFile, "server") {
														
 
															-			libs = append(libs, destFile)
														
 
															-		}
														
 
															-		_, err = os.Stat(destFile)
														
 
															-		switch {
														
 
															-		case errors.Is(err, os.ErrNotExist):
														
 
															-			destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
														
 
															+		file := file
														
 
															+		g.Go(func() error {
														
 
															+			// llama.cpp/build/$OS/$GOARCH/$VARIANT/lib/$LIBRARY
														
 
															+			// Include the variant in the path to avoid conflicts between multiple server libs
														
 
															+			targetDir := filepath.Join(workDir, pathComps[pathComponentCount-3])
														
 
															+			srcFile, err := libEmbed.Open(file)
														
 
															 			if err != nil {
														
 
															-				return nil, fmt.Errorf("write payload %s: %v", file, err)
														
 
															+				return fmt.Errorf("read payload %s: %v", file, err)
														
 
															 			}
														
 
															-			defer destFile.Close()
														
 
															-			if _, err := io.Copy(destFile, srcFile); err != nil {
														
 
															-				return nil, fmt.Errorf("copy payload %s: %v", file, err)
														
 
															+			defer srcFile.Close()
														
 
															+			if err := os.MkdirAll(targetDir, 0o755); err != nil {
														
 
															+				return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
														
 
															 			}
														
 
															-		case err != nil:
														
 
															-			return nil, fmt.Errorf("stat payload %s: %v", file, err)
														
 
															-		}
														
 
															+			src := io.Reader(srcFile)
														
 
															+			filename := file
														
 
															+			if strings.HasSuffix(file, ".bz2") {
														
 
															+				src = bzip2.NewReader(src)
														
 
															+				filename = strings.TrimSuffix(filename, ".bz2")
														
 
															+			}
														
 
															+
														
 
															+			destFile := filepath.Join(targetDir, filepath.Base(filename))
														
 
															+			if strings.Contains(destFile, "server") {
														
 
															+				libs = append(libs, destFile)
														
 
															+			}
														
 
															+
														
 
															+			_, err = os.Stat(destFile)
														
 
															+			switch {
														
 
															+			case errors.Is(err, os.ErrNotExist):
														
 
															+				destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
														
 
															+				if err != nil {
														
 
															+					return fmt.Errorf("write payload %s: %v", file, err)
														
 
															+				}
														
 
															+				defer destFile.Close()
														
 
															+				if _, err := io.Copy(destFile, src); err != nil {
														
 
															+					return fmt.Errorf("copy payload %s: %v", file, err)
														
 
															+				}
														
 
															+			case err != nil:
														
 
															+				return fmt.Errorf("stat payload %s: %v", file, err)
														
 
															+			}
														
 
															+			return nil
														
 
															+		})
														
 
															 	}
														
 
															-	return libs, nil
														
 
															+	return libs, g.Wait()
														
 
															 }
														
 
															 func extractPayloadFiles(workDir, glob string) error {
														
@@ -207,8 +227,14 @@ func extractPayloadFiles(workDir, glob string) error {
 
															 		if err := os.MkdirAll(workDir, 0o755); err != nil {
														
 
															 			return fmt.Errorf("create payload temp dir %s: %v", workDir, err)
														
 
															 		}
														
 
															+		src := io.Reader(srcFile)
														
 
															+		filename := file
														
 
															+		if strings.HasSuffix(file, ".bz2") {
														
 
															+			src = bzip2.NewReader(src)
														
 
															+			filename = strings.TrimSuffix(filename, ".bz2")
														
 
															+		}
														
 
															-		destFile := filepath.Join(workDir, filepath.Base(file))
														
 
															+		destFile := filepath.Join(workDir, filepath.Base(filename))
														
 
															 		_, err = os.Stat(destFile)
														
 
															 		switch {
														
 
															 		case errors.Is(err, os.ErrNotExist):
														
@@ -217,7 +243,7 @@ func extractPayloadFiles(workDir, glob string) error {
 
															 				return fmt.Errorf("write payload %s: %v", file, err)
														
 
															 			}
														
 
															 			defer destFile.Close()
														
 
															-			if _, err := io.Copy(destFile, srcFile); err != nil {
														
 
															+			if _, err := io.Copy(destFile, src); err != nil {
														
 
															 				return fmt.Errorf("copy payload %s: %v", file, err)
														
 
															 			}
														
 
															 		case err != nil:
														
--- a/llm/payload_darwin.go
+++ b/llm/payload_darwin.go
@@ -1,8 +0,0 @@
 
															-package llm
														
 
															-
														
 
															-import (
														
 
															-	"embed"
														
 
															-)
														
 
															-
														
 
															-//go:embed llama.cpp/ggml-metal.metal llama.cpp/build/darwin/*/lib/*.so
														
 
															-var libEmbed embed.FS
														
--- a/llm/payload_darwin_amd64.go
+++ b/llm/payload_darwin_amd64.go
@@ -0,0 +1,8 @@
 
															+package llm
														
 
															+
														
 
															+import (
														
 
															+	"embed"
														
 
															+)
														
 
															+
														
 
															+//go:embed llama.cpp/ggml-metal.metal llama.cpp/build/darwin/x86_64/*/lib/*.dylib*
														
 
															+var libEmbed embed.FS
														
--- a/llm/payload_darwin_arm64.go
+++ b/llm/payload_darwin_arm64.go
@@ -0,0 +1,8 @@
 
															+package llm
														
 
															+
														
 
															+import (
														
 
															+	"embed"
														
 
															+)
														
 
															+
														
 
															+//go:embed llama.cpp/ggml-metal.metal llama.cpp/build/darwin/arm64/*/lib/*.dylib*
														
 
															+var libEmbed embed.FS
														
--- a/llm/payload_linux.go
+++ b/llm/payload_linux.go
@@ -4,5 +4,5 @@ import (
 
															 	"embed"
														
 
															 )
														
 
															-//go:embed llama.cpp/build/linux/*/lib/*.so
														
 
															+//go:embed llama.cpp/build/linux/*/*/lib/*.so*
														
 
															 var libEmbed embed.FS
														
--- a/llm/payload_windows.go
+++ b/llm/payload_windows.go
@@ -4,5 +4,5 @@ import (
 
															 	"embed"
														
 
															 )
														
 
															-//go:embed llama.cpp/build/windows/*/lib/*.dll
														
 
															+//go:embed llama.cpp/build/windows/*/*/lib/*.dll*
														
 
															 var libEmbed embed.FS
														
--- a/scripts/build_darwin.sh
+++ b/scripts/build_darwin.sh
@@ -1,6 +1,6 @@
 
															 #!/bin/sh
														
 
															-set -eu
														
 
															+set -e
														
 
															 export VERSION=${VERSION:-0.0.0}
														
 
															 export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
														
@@ -11,21 +11,36 @@ for TARGETARCH in arm64 amd64; do
 
															     rm -rf llm/llama.cpp/build
														
 
															     GOOS=darwin GOARCH=$TARGETARCH go generate ./...
														
 
															     CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -o dist/ollama-darwin-$TARGETARCH
														
 
															+    CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -cover -o dist/ollama-darwin-$TARGETARCH-cov
														
 
															 done
														
 
															-lipo -create -output dist/ollama dist/ollama-darwin-*
														
 
															-rm -f dist/ollama-darwin-*
														
 
															-codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
														
 
															+lipo -create -output dist/ollama dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
														
 
															+rm -f dist/ollama-darwin-arm64 dist/ollama-darwin-amd64
														
 
															+if [ -n "$APPLE_IDENTITY" ]; then
														
 
															+    codesign --deep --force --options=runtime --sign "$APPLE_IDENTITY" --timestamp dist/ollama
														
 
															+else
														
 
															+    echo "Skipping code signing - set APPLE_IDENTITY"
														
 
															+fi
														
 
															 chmod +x dist/ollama
														
 
															-# build and sign the mac app
														
 
															+# build and optionally sign the mac app
														
 
															 npm install --prefix app
														
 
															-npm run --prefix app make:sign
														
 
															+if [ -n "$APPLE_IDENTITY" ]; then
														
 
															+    npm run --prefix app make:sign
														
 
															+else 
														
 
															+    npm run --prefix app make
														
 
															+fi
														
 
															 cp app/out/make/zip/darwin/universal/Ollama-darwin-universal-$VERSION.zip dist/Ollama-darwin.zip
														
 
															 # sign the binary and rename it
														
 
															-codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/ollama
														
 
															+if [ -n "$APPLE_IDENTITY" ]; then
														
 
															+    codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/ollama
														
 
															+else
														
 
															+    echo "WARNING: Skipping code signing - set APPLE_IDENTITY"
														
 
															+fi
														
 
															 ditto -c -k --keepParent dist/ollama dist/temp.zip
														
 
															-xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
														
 
															+if [ -n "$APPLE_IDENTITY" ]; then
														
 
															+    xcrun notarytool submit dist/temp.zip --wait --timeout 10m --apple-id $APPLE_ID --password $APPLE_PASSWORD --team-id $APPLE_TEAM_ID
														
 
															+fi
														
 
															 mv dist/ollama dist/ollama-darwin
														
 
															 rm -f dist/temp.zip
														
--- a/scripts/build_remote.py
+++ b/scripts/build_remote.py
@@ -66,3 +66,7 @@ subprocess.check_call(['ssh', netloc, 'cd', path, ';', GoCmd, 'generate', './...
 
															 print("Building")
														
 
															 subprocess.check_call(['ssh', netloc, 'cd', path, ';', GoCmd, 'build', '.'])
														
 
															+print("Copying built result")
														
 
															+subprocess.check_call(['scp', netloc +":"+ path + "/ollama.exe",  './dist/'])
														
 
															+
														
 
															+
														
--- a/scripts/rh_linux_deps.sh
+++ b/scripts/rh_linux_deps.sh
@@ -28,6 +28,7 @@ fi
 
															 if [ -n "${CMAKE_VERSION}" ]; then
														
 
															     curl -s -L https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-$(uname -m).tar.gz | tar -zx -C /usr --strip-components 1
														
 
															+    dnf install -y bzip2
														
 
															 fi
														
 
															 if [ -n "${GOLANG_VERSION}" ]; then