gen_linux.sh 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. #!/bin/bash
  2. # This script is intended to run inside the go generate
  3. # working directory must be llm/generate/
  4. # First we build one or more CPU based LLM libraries
  5. #
  6. # Then if we detect CUDA, we build a CUDA dynamic library, and carry the required
  7. # library dependencies
  8. #
  9. # Then if we detect ROCm, we build a dynamically loaded ROCm lib. The ROCM
  10. # libraries are quite large, and also dynamically load data files at runtime
  11. # which in turn are large, so we don't attempt to cary them as payload
  12. set -ex
  13. set -o pipefail
  14. # See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
  15. amdGPUs() {
  16. if [ -n "${AMDGPU_TARGETS}" ]; then
  17. echo "${AMDGPU_TARGETS}"
  18. return
  19. fi
  20. GPU_LIST=(
  21. "gfx900"
  22. "gfx906:xnack-"
  23. "gfx908:xnack-"
  24. "gfx90a:xnack+"
  25. "gfx90a:xnack-"
  26. "gfx940"
  27. "gfx941"
  28. "gfx942"
  29. "gfx1010"
  30. "gfx1012"
  31. "gfx1030"
  32. "gfx1100"
  33. "gfx1101"
  34. "gfx1102"
  35. )
  36. (
  37. IFS=$';'
  38. echo "'${GPU_LIST[*]}'"
  39. )
  40. }
  41. echo "Starting linux generate script"
  42. if [ -z "${CUDACXX}" ]; then
  43. if [ -x /usr/local/cuda/bin/nvcc ]; then
  44. export CUDACXX=/usr/local/cuda/bin/nvcc
  45. else
  46. # Try the default location in case it exists
  47. export CUDACXX=$(command -v nvcc)
  48. fi
  49. fi
  50. COMMON_CMAKE_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off"
  51. source $(dirname $0)/gen_common.sh
  52. init_vars
  53. git_module_setup
  54. apply_patches
  55. init_vars
  56. if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
  57. if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "static" ]; then
  58. # Static build for linking into the Go binary
  59. init_vars
  60. CMAKE_TARGETS="--target llama --target ggml"
  61. CMAKE_DEFS="-DBUILD_SHARED_LIBS=off -DLLAMA_NATIVE=off -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
  62. BUILD_DIR="../build/linux/${ARCH}_static"
  63. echo "Building static library"
  64. build
  65. fi
  66. # Users building from source can tune the exact flags we pass to cmake for configuring
  67. # llama.cpp, and we'll build only 1 CPU variant in that case as the default.
  68. if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
  69. init_vars
  70. echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
  71. CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
  72. BUILD_DIR="../build/linux/${ARCH}/cpu"
  73. echo "Building custom CPU"
  74. build
  75. compress
  76. else
  77. # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
  78. # -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
  79. # -DLLAMA_F16C -- 2012 Intel Ivy Bridge & AMD 2011 Bulldozer (No significant improvement over just AVX)
  80. # -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
  81. # -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
  82. # Note: the following seem to yield slower results than AVX2 - ymmv
  83. # -DLLAMA_AVX512 -- 2017 Intel Skylake and High End DeskTop (HEDT)
  84. # -DLLAMA_AVX512_VBMI -- 2018 Intel Cannon Lake
  85. # -DLLAMA_AVX512_VNNI -- 2021 Intel Alder Lake
  86. COMMON_CPU_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off"
  87. if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then
  88. #
  89. # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
  90. #
  91. init_vars
  92. CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
  93. BUILD_DIR="../build/linux/${ARCH}/cpu"
  94. echo "Building LCD CPU"
  95. build
  96. compress
  97. fi
  98. if [ "${ARCH}" == "x86_64" ]; then
  99. #
  100. # ARM chips in M1/M2/M3-based MACs and NVidia Tegra devices do not currently support avx extensions.
  101. #
  102. if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx" ]; then
  103. #
  104. # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
  105. # Approximately 400% faster than LCD on same CPU
  106. #
  107. init_vars
  108. CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
  109. BUILD_DIR="../build/linux/${ARCH}/cpu_avx"
  110. echo "Building AVX CPU"
  111. build
  112. compress
  113. fi
  114. if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx2" ]; then
  115. #
  116. # ~2013 CPU Dynamic library
  117. # Approximately 10% faster than AVX on same CPU
  118. #
  119. init_vars
  120. CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
  121. BUILD_DIR="../build/linux/${ARCH}/cpu_avx2"
  122. echo "Building AVX2 CPU"
  123. build
  124. compress
  125. fi
  126. fi
  127. fi
  128. else
  129. echo "Skipping CPU generation step as requested"
  130. fi
  131. # If needed, look for the default CUDA toolkit location
  132. if [ -z "${CUDA_LIB_DIR}" ] && [ -d /usr/local/cuda/lib64 ]; then
  133. CUDA_LIB_DIR=/usr/local/cuda/lib64
  134. fi
  135. # If needed, look for CUDA on Arch Linux
  136. if [ -z "${CUDA_LIB_DIR}" ] && [ -d /opt/cuda/targets/x86_64-linux/lib ]; then
  137. CUDA_LIB_DIR=/opt/cuda/targets/x86_64-linux/lib
  138. fi
  139. # Allow override in case libcudart is in the wrong place
  140. if [ -z "${CUDART_LIB_DIR}" ]; then
  141. CUDART_LIB_DIR="${CUDA_LIB_DIR}"
  142. fi
  143. if [ -d "${CUDA_LIB_DIR}" ]; then
  144. echo "CUDA libraries detected - building dynamic CUDA library"
  145. init_vars
  146. CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
  147. if [ -n "${CUDA_MAJOR}" ]; then
  148. CUDA_VARIANT=_v${CUDA_MAJOR}
  149. fi
  150. if [ "${ARCH}" == "arm64" ]; then
  151. echo "ARM CPU detected - disabling unsupported AVX instructions"
  152. # ARM-based CPUs such as M1 and Tegra do not support AVX extensions.
  153. #
  154. # CUDA compute < 6.0 lacks proper FP16 support on ARM.
  155. # Disabling has minimal performance effect while maintaining compatibility.
  156. ARM64_DEFS="-DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_CUDA_F16=off"
  157. fi
  158. # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
  159. if [ -n "${OLLAMA_CUSTOM_CUDA_DEFS}" ]; then
  160. echo "OLLAMA_CUSTOM_CUDA_DEFS=\"${OLLAMA_CUSTOM_CUDA_DEFS}\""
  161. CMAKE_CUDA_DEFS="-DLLAMA_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} ${OLLAMA_CUSTOM_CUDA_DEFS}"
  162. echo "Building custom CUDA GPU"
  163. else
  164. CMAKE_CUDA_DEFS="-DLLAMA_CUDA=on -DLLAMA_CUDA_FORCE_MMQ=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}"
  165. fi
  166. CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS}"
  167. BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}"
  168. EXTRA_LIBS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
  169. build
  170. # Carry the CUDA libs as payloads to help reduce dependency burden on users
  171. #
  172. # TODO - in the future we may shift to packaging these separately and conditionally
  173. # downloading them in the install script.
  174. DEPS="$(ldd ${BUILD_DIR}/bin/ollama_llama_server )"
  175. for lib in libcudart.so libcublas.so libcublasLt.so ; do
  176. DEP=$(echo "${DEPS}" | grep ${lib} | cut -f1 -d' ' | xargs || true)
  177. if [ -n "${DEP}" -a -e "${CUDA_LIB_DIR}/${DEP}" ]; then
  178. cp "${CUDA_LIB_DIR}/${DEP}" "${BUILD_DIR}/bin/"
  179. elif [ -e "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" ]; then
  180. cp "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" "${BUILD_DIR}/bin/"
  181. elif [ -e "${CUDART_LIB_DIR}/${lib}" ]; then
  182. cp -d ${CUDART_LIB_DIR}/${lib}* "${BUILD_DIR}/bin/"
  183. else
  184. cp -d "${CUDA_LIB_DIR}/${lib}*" "${BUILD_DIR}/bin/"
  185. fi
  186. done
  187. compress
  188. fi
  189. if [ -z "${ROCM_PATH}" ]; then
  190. # Try the default location in case it exists
  191. ROCM_PATH=/opt/rocm
  192. fi
  193. if [ -z "${CLBlast_DIR}" ]; then
  194. # Try the default location in case it exists
  195. if [ -d /usr/lib/cmake/CLBlast ]; then
  196. export CLBlast_DIR=/usr/lib/cmake/CLBlast
  197. fi
  198. fi
  199. if [ -d "${ROCM_PATH}" ]; then
  200. echo "ROCm libraries detected - building dynamic ROCm library"
  201. if [ -f ${ROCM_PATH}/lib/librocblas.so.*.*.????? ]; then
  202. ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)
  203. fi
  204. init_vars
  205. CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
  206. # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
  207. if [ -n "${OLLAMA_CUSTOM_ROCM_DEFS}" ]; then
  208. echo "OLLAMA_CUSTOM_ROCM_DEFS=\"${OLLAMA_CUSTOM_ROCM_DEFS}\""
  209. CMAKE_DEFS="${CMAKE_DEFS} ${OLLAMA_CUSTOM_ROCM_DEFS}"
  210. echo "Building custom ROCM GPU"
  211. fi
  212. BUILD_DIR="../build/linux/${ARCH}/rocm${ROCM_VARIANT}"
  213. EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,\$ORIGIN/../../rocm/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
  214. build
  215. # Record the ROCM dependencies
  216. rm -f "${BUILD_DIR}/bin/deps.txt"
  217. touch "${BUILD_DIR}/bin/deps.txt"
  218. for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo ); do
  219. echo "${dep}" >> "${BUILD_DIR}/bin/deps.txt"
  220. done
  221. # bomb out if for some reason we didn't get a few deps
  222. if [ $(cat "${BUILD_DIR}/bin/deps.txt" | wc -l ) -lt 8 ] ; then
  223. cat "${BUILD_DIR}/bin/deps.txt"
  224. echo "ERROR: deps file short"
  225. exit 1
  226. fi
  227. compress
  228. fi
  229. cleanup
  230. echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"