gen_linux.sh 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. #!/bin/bash
  2. # This script is intended to run inside the go generate
  3. # working directory must be llm/generate/
  4. # First we build one or more CPU based LLM libraries
  5. #
  6. # Then if we detect CUDA, we build a CUDA dynamic library, and carry the required
  7. # library dependencies
  8. #
  9. # Then if we detect ROCm, we build a dynamically loaded ROCm lib. The ROCM
  10. # libraries are quite large, and also dynamically load data files at runtime
  11. # which in turn are large, so we don't attempt to cary them as payload
  12. set -ex
  13. set -o pipefail
  14. compress_pids=""
  15. # See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
  16. amdGPUs() {
  17. if [ -n "${AMDGPU_TARGETS}" ]; then
  18. echo "${AMDGPU_TARGETS}"
  19. return
  20. fi
  21. GPU_LIST=(
  22. "gfx900"
  23. "gfx906:xnack-"
  24. "gfx908:xnack-"
  25. "gfx90a:xnack+"
  26. "gfx90a:xnack-"
  27. "gfx940"
  28. "gfx941"
  29. "gfx942"
  30. "gfx1010"
  31. "gfx1012"
  32. "gfx1030"
  33. "gfx1100"
  34. "gfx1101"
  35. "gfx1102"
  36. )
  37. (
  38. IFS=$';'
  39. echo "'${GPU_LIST[*]}'"
  40. )
  41. }
  42. echo "Starting linux generate script"
  43. if [ -z "${CUDACXX}" ]; then
  44. if [ -x /usr/local/cuda/bin/nvcc ]; then
  45. export CUDACXX=/usr/local/cuda/bin/nvcc
  46. else
  47. # Try the default location in case it exists
  48. export CUDACXX=$(command -v nvcc)
  49. fi
  50. fi
  51. COMMON_CMAKE_DEFS="-DCMAKE_SKIP_RPATH=on -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
  52. source $(dirname $0)/gen_common.sh
  53. init_vars
  54. git_module_setup
  55. apply_patches
  56. init_vars
  57. if [ -z "${OLLAMA_SKIP_STATIC_GENERATE}" -o "${OLLAMA_CPU_TARGET}" = "static" ]; then
  58. # Builds by default, allows skipping, forces build if OLLAMA_CPU_TARGET="static"
  59. # Enables optimized Dockerfile builds using a blanket skip and targeted overrides
  60. # Static build for linking into the Go binary
  61. init_vars
  62. CMAKE_TARGETS="--target llama --target ggml"
  63. CMAKE_DEFS="-DBUILD_SHARED_LIBS=off -DGGML_NATIVE=off -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off ${CMAKE_DEFS}"
  64. BUILD_DIR="../build/linux/${ARCH}_static"
  65. echo "Building static library"
  66. build
  67. fi
  68. init_vars
  69. if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
  70. # Users building from source can tune the exact flags we pass to cmake for configuring
  71. # llama.cpp, and we'll build only 1 CPU variant in that case as the default.
  72. if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
  73. init_vars
  74. echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
  75. CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
  76. RUNNER="cpu"
  77. BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
  78. echo "Building custom CPU"
  79. build
  80. install
  81. dist
  82. compress
  83. else
  84. # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
  85. # -DGGML_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
  86. # -DGGML_F16C -- 2012 Intel Ivy Bridge & AMD 2011 Bulldozer (No significant improvement over just AVX)
  87. # -DGGML_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
  88. # -DGGML_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
  89. # Note: the following seem to yield slower results than AVX2 - ymmv
  90. # -DGGML_AVX512 -- 2017 Intel Skylake and High End DeskTop (HEDT)
  91. # -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake
  92. # -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake
  93. COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
  94. if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then
  95. #
  96. # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
  97. #
  98. init_vars
  99. CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
  100. RUNNER=cpu
  101. BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
  102. echo "Building LCD CPU"
  103. build
  104. install
  105. dist
  106. compress
  107. fi
  108. if [ "${ARCH}" == "x86_64" ]; then
  109. #
  110. # ARM chips in M1/M2/M3-based MACs and NVidia Tegra devices do not currently support avx extensions.
  111. #
  112. if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx" ]; then
  113. #
  114. # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
  115. # Approximately 400% faster than LCD on same CPU
  116. #
  117. init_vars
  118. CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
  119. RUNNER=cpu_avx
  120. BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
  121. echo "Building AVX CPU"
  122. build
  123. install
  124. dist
  125. compress
  126. fi
  127. if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx2" ]; then
  128. #
  129. # ~2013 CPU Dynamic library
  130. # Approximately 10% faster than AVX on same CPU
  131. #
  132. init_vars
  133. CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on ${CMAKE_DEFS}"
  134. RUNNER=cpu_avx2
  135. BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
  136. echo "Building AVX2 CPU"
  137. build
  138. install
  139. dist
  140. compress
  141. fi
  142. fi
  143. fi
  144. else
  145. echo "Skipping CPU generation step as requested"
  146. fi
  147. # If needed, look for the default CUDA toolkit location
  148. if [ -z "${CUDA_LIB_DIR}" ] && [ -d /usr/local/cuda/lib64 ]; then
  149. CUDA_LIB_DIR=/usr/local/cuda/lib64
  150. fi
  151. # If needed, look for CUDA on Arch Linux
  152. if [ -z "${CUDA_LIB_DIR}" ] && [ -d /opt/cuda/targets/x86_64-linux/lib ]; then
  153. CUDA_LIB_DIR=/opt/cuda/targets/x86_64-linux/lib
  154. fi
  155. # Allow override in case libcudart is in the wrong place
  156. if [ -z "${CUDART_LIB_DIR}" ]; then
  157. CUDART_LIB_DIR="${CUDA_LIB_DIR}"
  158. fi
  159. if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
  160. echo "CUDA libraries detected - building dynamic CUDA library"
  161. init_vars
  162. CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
  163. if [ -n "${CUDA_MAJOR}" -a -z "${CUDA_VARIANT}" ]; then
  164. CUDA_VARIANT=_v${CUDA_MAJOR}
  165. fi
  166. if [ "${ARCH}" == "arm64" ]; then
  167. echo "ARM CPU detected - disabling unsupported AVX instructions"
  168. # ARM-based CPUs such as M1 and Tegra do not support AVX extensions.
  169. #
  170. # CUDA compute < 6.0 lacks proper FP16 support on ARM.
  171. # Disabling has minimal performance effect while maintaining compatibility.
  172. ARM64_DEFS="-DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_CUDA_F16=off"
  173. fi
  174. # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
  175. if [ -n "${OLLAMA_CUSTOM_CUDA_DEFS}" ]; then
  176. echo "OLLAMA_CUSTOM_CUDA_DEFS=\"${OLLAMA_CUSTOM_CUDA_DEFS}\""
  177. CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} ${OLLAMA_CUSTOM_CUDA_DEFS}"
  178. echo "Building custom CUDA GPU"
  179. else
  180. CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}"
  181. fi
  182. export CUDAFLAGS="-t8"
  183. CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS} -DGGML_STATIC=off"
  184. RUNNER=cuda${CUDA_VARIANT}
  185. BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
  186. export LLAMA_SERVER_LDFLAGS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
  187. CUDA_DIST_DIR="${CUDA_DIST_DIR:-${DIST_BASE}/lib/ollama}"
  188. build
  189. install
  190. dist
  191. echo "Installing CUDA dependencies in ${CUDA_DIST_DIR}"
  192. mkdir -p "${CUDA_DIST_DIR}"
  193. for lib in ${CUDA_LIB_DIR}/libcudart.so* ${CUDA_LIB_DIR}/libcublas.so* ${CUDA_LIB_DIR}/libcublasLt.so* ; do
  194. cp -a "${lib}" "${CUDA_DIST_DIR}"
  195. done
  196. compress
  197. fi
  198. if [ -z "${ONEAPI_ROOT}" ]; then
  199. # Try the default location in case it exists
  200. ONEAPI_ROOT=/opt/intel/oneapi
  201. fi
  202. if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
  203. echo "OneAPI libraries detected - building dynamic OneAPI library"
  204. init_vars
  205. source ${ONEAPI_ROOT}/setvars.sh --force # set up environment variables for oneAPI
  206. CC=icx
  207. CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF"
  208. RUNNER=oneapi
  209. BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
  210. ONEAPI_DIST_DIR="${DIST_BASE}/lib/ollama"
  211. export LLAMA_SERVER_LDFLAGS="-fsycl -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
  212. DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it
  213. build
  214. # copy oneAPI dependencies
  215. mkdir -p "${ONEAPI_DIST_DIR}"
  216. for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e sycl -e mkl -e tbb); do
  217. cp -a "${dep}" "${ONEAPI_DIST_DIR}"
  218. done
  219. cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${ONEAPI_DIST_DIR}"
  220. cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${ONEAPI_DIST_DIR}"
  221. cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${ONEAPI_DIST_DIR}"
  222. cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${ONEAPI_DIST_DIR}"
  223. cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${ONEAPI_DIST_DIR}"
  224. cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${ONEAPI_DIST_DIR}"
  225. cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${ONEAPI_DIST_DIR}"
  226. install
  227. dist
  228. compress
  229. fi
  230. if [ -z "${ROCM_PATH}" ]; then
  231. # Try the default location in case it exists
  232. ROCM_PATH=/opt/rocm
  233. fi
  234. if [ -z "${CLBlast_DIR}" ]; then
  235. # Try the default location in case it exists
  236. if [ -d /usr/lib/cmake/CLBlast ]; then
  237. export CLBlast_DIR=/usr/lib/cmake/CLBlast
  238. fi
  239. fi
  240. if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
  241. echo "ROCm libraries detected - building dynamic ROCm library"
  242. if [ -f ${ROCM_PATH}/lib/librocblas.so.*.*.????? ]; then
  243. ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)
  244. fi
  245. init_vars
  246. CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DGGML_HIPBLAS=on -DGGML_CUDA_NO_PEER_COPY=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
  247. # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
  248. if [ -n "${OLLAMA_CUSTOM_ROCM_DEFS}" ]; then
  249. echo "OLLAMA_CUSTOM_ROCM_DEFS=\"${OLLAMA_CUSTOM_ROCM_DEFS}\""
  250. CMAKE_DEFS="${CMAKE_DEFS} ${OLLAMA_CUSTOM_ROCM_DEFS}"
  251. echo "Building custom ROCM GPU"
  252. fi
  253. RUNNER=rocm${ROCM_VARIANT}
  254. BUILD_DIR="../build/linux/${ARCH}/${RUNNER}"
  255. # ROCm dependencies are too large to fit into a unified bundle
  256. ROCM_DIST_DIR="${DIST_BASE}/../linux-${GOARCH}-rocm/lib/ollama"
  257. # TODO figure out how to disable runpath (rpath)
  258. # export CMAKE_HIP_FLAGS="-fno-rtlib-add-rpath" # doesn't work
  259. export LLAMA_SERVER_LDFLAGS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
  260. build
  261. # copy the ROCM dependencies
  262. mkdir -p "${ROCM_DIST_DIR}"
  263. for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -v "${ARCH}/rocm${ROCM_VARIANT}" | grep -e rocm -e amdgpu -e libtinfo -e libnuma -e libelf ); do
  264. cp -a "${dep}"* "${ROCM_DIST_DIR}"
  265. if [ $(readlink -f "${dep}") != "${dep}" ] ; then
  266. cp $(readlink -f "${dep}") "${ROCM_DIST_DIR}"
  267. fi
  268. done
  269. install
  270. dist
  271. compress
  272. fi
  273. cleanup
  274. wait_for_compress
  275. echo "go generate completed. LLM runners: $(cd ${PAYLOAD_BASE}; echo *)"