gen_linux.sh 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. #!/bin/bash
  2. # This script is intended to run inside the go generate
  3. # working directory must be llm/generate/
  4. # First we build one or more CPU based LLM libraries
  5. #
  6. # Then if we detect CUDA, we build a CUDA dynamic library, and carry the required
  7. # library dependencies
  8. #
  9. # Then if we detect ROCm, we build a dynamically loaded ROCm lib. The ROCM
  10. # libraries are quite large, and also dynamically load data files at runtime
  11. # which in turn are large, so we don't attempt to cary them as payload
  12. set -ex
  13. set -o pipefail
  14. compress_pids=""
  15. # See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
  16. amdGPUs() {
  17. if [ -n "${AMDGPU_TARGETS}" ]; then
  18. echo "${AMDGPU_TARGETS}"
  19. return
  20. fi
  21. GPU_LIST=(
  22. "gfx900"
  23. "gfx906:xnack-"
  24. "gfx908:xnack-"
  25. "gfx90a:xnack+"
  26. "gfx90a:xnack-"
  27. "gfx940"
  28. "gfx941"
  29. "gfx942"
  30. "gfx1010"
  31. "gfx1012"
  32. "gfx1030"
  33. "gfx1100"
  34. "gfx1101"
  35. "gfx1102"
  36. )
  37. (
  38. IFS=$';'
  39. echo "'${GPU_LIST[*]}'"
  40. )
  41. }
  42. echo "Starting linux generate script"
  43. if [ -z "${CUDACXX}" ]; then
  44. if [ -x /usr/local/cuda/bin/nvcc ]; then
  45. export CUDACXX=/usr/local/cuda/bin/nvcc
  46. else
  47. # Try the default location in case it exists
  48. export CUDACXX=$(command -v nvcc)
  49. fi
  50. fi
  51. COMMON_CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
  52. source $(dirname $0)/gen_common.sh
  53. init_vars
  54. git_module_setup
  55. apply_patches
  56. init_vars
  57. if [ -z "${OLLAMA_SKIP_STATIC_GENERATE}" -o "${OLLAMA_CPU_TARGET}" = "static" ]; then
  58. # Builds by default, allows skipping, forces build if OLLAMA_CPU_TARGET="static"
  59. # Enables optimized Dockerfile builds using a blanket skip and targeted overrides
  60. # Static build for linking into the Go binary
  61. init_vars
  62. CMAKE_TARGETS="--target llama --target ggml"
  63. CMAKE_DEFS="-DBUILD_SHARED_LIBS=off -DGGML_NATIVE=off -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off ${CMAKE_DEFS}"
  64. BUILD_DIR="../build/linux/${ARCH}_static"
  65. echo "Building static library"
  66. build
  67. fi
  68. init_vars
  69. if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
  70. # Users building from source can tune the exact flags we pass to cmake for configuring
  71. # llama.cpp, and we'll build only 1 CPU variant in that case as the default.
  72. if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
  73. init_vars
  74. echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
  75. CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
  76. BUILD_DIR="../build/linux/${ARCH}/cpu"
  77. echo "Building custom CPU"
  78. build
  79. install
  80. compress
  81. else
  82. # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
  83. # -DGGML_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
  84. # -DGGML_F16C -- 2012 Intel Ivy Bridge & AMD 2011 Bulldozer (No significant improvement over just AVX)
  85. # -DGGML_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
  86. # -DGGML_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
  87. # Note: the following seem to yield slower results than AVX2 - ymmv
  88. # -DGGML_AVX512 -- 2017 Intel Skylake and High End DeskTop (HEDT)
  89. # -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake
  90. # -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake
  91. COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
  92. if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then
  93. #
  94. # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
  95. #
  96. init_vars
  97. CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
  98. BUILD_DIR="../build/linux/${ARCH}/cpu"
  99. echo "Building LCD CPU"
  100. build
  101. install
  102. compress
  103. fi
  104. if [ "${ARCH}" == "x86_64" ]; then
  105. #
  106. # ARM chips in M1/M2/M3-based MACs and NVidia Tegra devices do not currently support avx extensions.
  107. #
  108. if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx" ]; then
  109. #
  110. # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
  111. # Approximately 400% faster than LCD on same CPU
  112. #
  113. init_vars
  114. CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
  115. BUILD_DIR="../build/linux/${ARCH}/cpu_avx"
  116. echo "Building AVX CPU"
  117. build
  118. install
  119. compress
  120. fi
  121. if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx2" ]; then
  122. #
  123. # ~2013 CPU Dynamic library
  124. # Approximately 10% faster than AVX on same CPU
  125. #
  126. init_vars
  127. CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on ${CMAKE_DEFS}"
  128. BUILD_DIR="../build/linux/${ARCH}/cpu_avx2"
  129. echo "Building AVX2 CPU"
  130. build
  131. install
  132. compress
  133. fi
  134. fi
  135. fi
  136. else
  137. echo "Skipping CPU generation step as requested"
  138. fi
  139. # If needed, look for the default CUDA toolkit location
  140. if [ -z "${CUDA_LIB_DIR}" ] && [ -d /usr/local/cuda/lib64 ]; then
  141. CUDA_LIB_DIR=/usr/local/cuda/lib64
  142. fi
  143. # If needed, look for CUDA on Arch Linux
  144. if [ -z "${CUDA_LIB_DIR}" ] && [ -d /opt/cuda/targets/x86_64-linux/lib ]; then
  145. CUDA_LIB_DIR=/opt/cuda/targets/x86_64-linux/lib
  146. fi
  147. # Allow override in case libcudart is in the wrong place
  148. if [ -z "${CUDART_LIB_DIR}" ]; then
  149. CUDART_LIB_DIR="${CUDA_LIB_DIR}"
  150. fi
  151. if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
  152. echo "CUDA libraries detected - building dynamic CUDA library"
  153. init_vars
  154. CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
  155. if [ -n "${CUDA_MAJOR}" -a -z "${CUDA_VARIANT}" ]; then
  156. CUDA_VARIANT=_v${CUDA_MAJOR}
  157. fi
  158. if [ "${ARCH}" == "arm64" ]; then
  159. echo "ARM CPU detected - disabling unsupported AVX instructions"
  160. # ARM-based CPUs such as M1 and Tegra do not support AVX extensions.
  161. #
  162. # CUDA compute < 6.0 lacks proper FP16 support on ARM.
  163. # Disabling has minimal performance effect while maintaining compatibility.
  164. ARM64_DEFS="-DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_CUDA_F16=off"
  165. fi
  166. # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
  167. if [ -n "${OLLAMA_CUSTOM_CUDA_DEFS}" ]; then
  168. echo "OLLAMA_CUSTOM_CUDA_DEFS=\"${OLLAMA_CUSTOM_CUDA_DEFS}\""
  169. CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} ${OLLAMA_CUSTOM_CUDA_DEFS}"
  170. echo "Building custom CUDA GPU"
  171. else
  172. CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}"
  173. fi
  174. export CUDAFLAGS="-t8"
  175. CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off"
  176. BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}"
  177. export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
  178. CUDA_DIST_DIR="${CUDA_DIST_DIR:-${DIST_BASE}/lib/ollama}"
  179. build
  180. install
  181. echo "Installing CUDA dependencies in ${CUDA_DIST_DIR}"
  182. mkdir -p "${CUDA_DIST_DIR}"
  183. for lib in ${CUDA_LIB_DIR}/libcudart.so* ${CUDA_LIB_DIR}/libcublas.so* ${CUDA_LIB_DIR}/libcublasLt.so* ; do
  184. cp -a "${lib}" "${CUDA_DIST_DIR}"
  185. done
  186. compress
  187. fi
  188. if [ -z "${ONEAPI_ROOT}" ]; then
  189. # Try the default location in case it exists
  190. ONEAPI_ROOT=/opt/intel/oneapi
  191. fi
  192. if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
  193. echo "OneAPI libraries detected - building dynamic OneAPI library"
  194. init_vars
  195. source ${ONEAPI_ROOT}/setvars.sh --force # set up environment variables for oneAPI
  196. CC=icx
  197. CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF"
  198. BUILD_DIR="../build/linux/${ARCH}/oneapi"
  199. ONEAPI_DIST_DIR="${DIST_BASE}/lib/ollama"
  200. export LLAMA_SERVER_LDFLAGS="-fsycl -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
  201. DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it
  202. build
  203. # copy oneAPI dependencies
  204. mkdir -p "${ONEAPI_DIST_DIR}"
  205. for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e sycl -e mkl -e tbb); do
  206. cp -a "${dep}" "${ONEAPI_DIST_DIR}"
  207. done
  208. cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${ONEAPI_DIST_DIR}"
  209. cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${ONEAPI_DIST_DIR}"
  210. cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${ONEAPI_DIST_DIR}"
  211. cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${ONEAPI_DIST_DIR}"
  212. cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${ONEAPI_DIST_DIR}"
  213. cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${ONEAPI_DIST_DIR}"
  214. cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${ONEAPI_DIST_DIR}"
  215. install
  216. compress
  217. fi
  218. if [ -z "${ROCM_PATH}" ]; then
  219. # Try the default location in case it exists
  220. ROCM_PATH=/opt/rocm
  221. fi
  222. if [ -z "${CLBlast_DIR}" ]; then
  223. # Try the default location in case it exists
  224. if [ -d /usr/lib/cmake/CLBlast ]; then
  225. export CLBlast_DIR=/usr/lib/cmake/CLBlast
  226. fi
  227. fi
  228. if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
  229. echo "ROCm libraries detected - building dynamic ROCm library"
  230. if [ -f ${ROCM_PATH}/lib/librocblas.so.*.*.????? ]; then
  231. ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)
  232. fi
  233. init_vars
  234. CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DGGML_HIPBLAS=on -DLLAMA_CUDA_NO_PEER_COPY=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
  235. # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
  236. if [ -n "${OLLAMA_CUSTOM_ROCM_DEFS}" ]; then
  237. echo "OLLAMA_CUSTOM_ROCM_DEFS=\"${OLLAMA_CUSTOM_ROCM_DEFS}\""
  238. CMAKE_DEFS="${CMAKE_DEFS} ${OLLAMA_CUSTOM_ROCM_DEFS}"
  239. echo "Building custom ROCM GPU"
  240. fi
  241. BUILD_DIR="../build/linux/${ARCH}/rocm${ROCM_VARIANT}"
  242. # ROCm dependencies are too large to fit into a unified bundle
  243. ROCM_DIST_DIR="${DIST_BASE}/../linux-${GOARCH}-rocm/lib/ollama"
  244. # TODO figure out how to disable runpath (rpath)
  245. # export CMAKE_HIP_FLAGS="-fno-rtlib-add-rpath" # doesn't work
  246. export LLAMA_SERVER_LDFLAGS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
  247. build
  248. # copy the ROCM dependencies
  249. mkdir -p "${ROCM_DIST_DIR}"
  250. for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -v "${ARCH}/rocm${ROCM_VARIANT}" | grep -e rocm -e amdgpu -e libtinfo ); do
  251. cp -a "${dep}"* "${ROCM_DIST_DIR}"
  252. done
  253. install
  254. compress
  255. fi
  256. cleanup
  257. wait_for_compress
  258. echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"