gen_linux.sh 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. #!/bin/bash
  2. # This script is intended to run inside the go generate
  3. # working directory must be llm/generate/
  4. # First we build one or more CPU based LLM libraries
  5. #
  6. # Then if we detect CUDA, we build a CUDA dynamic library, and carry the required
  7. # library dependencies
  8. #
  9. # Then if we detect ROCm, we build a dynamically loaded ROCm lib. The ROCM
  10. # libraries are quite large, and also dynamically load data files at runtime
  11. # which in turn are large, so we don't attempt to cary them as payload
  12. set -ex
  13. set -o pipefail
  14. # See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
  15. amdGPUs() {
  16. GPU_LIST=(
  17. "gfx803"
  18. "gfx900"
  19. "gfx906:xnack-"
  20. "gfx908:xnack-"
  21. "gfx90a:xnack+"
  22. "gfx90a:xnack-"
  23. "gfx1010"
  24. "gfx1012"
  25. "gfx1030"
  26. "gfx1100"
  27. "gfx1101"
  28. "gfx1102"
  29. )
  30. (
  31. IFS=$';'
  32. echo "'${GPU_LIST[*]}'"
  33. )
  34. }
  35. echo "Starting linux generate script"
  36. if [ -z "${CUDACXX}" ]; then
  37. if [ -x /usr/local/cuda/bin/nvcc ]; then
  38. export CUDACXX=/usr/local/cuda/bin/nvcc
  39. else
  40. # Try the default location in case it exists
  41. export CUDACXX=$(command -v nvcc)
  42. fi
  43. fi
  44. COMMON_CMAKE_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off"
  45. source $(dirname $0)/gen_common.sh
  46. init_vars
  47. git_module_setup
  48. apply_patches
  49. if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
  50. # Users building from source can tune the exact flags we pass to cmake for configuring
  51. # llama.cpp, and we'll build only 1 CPU variant in that case as the default.
  52. if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
  53. echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
  54. CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
  55. BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu"
  56. echo "Building custom CPU"
  57. build
  58. compress_libs
  59. else
  60. # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
  61. # -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
  62. # -DLLAMA_F16C -- 2012 Intel Ivy Bridge & AMD 2011 Bulldozer (No significant improvement over just AVX)
  63. # -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
  64. # -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
  65. # Note: the following seem to yield slower results than AVX2 - ymmv
  66. # -DLLAMA_AVX512 -- 2017 Intel Skylake and High End DeskTop (HEDT)
  67. # -DLLAMA_AVX512_VBMI -- 2018 Intel Cannon Lake
  68. # -DLLAMA_AVX512_VNNI -- 2021 Intel Alder Lake
  69. COMMON_CPU_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off"
  70. #
  71. # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
  72. #
  73. CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
  74. BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu"
  75. echo "Building LCD CPU"
  76. build
  77. compress_libs
  78. #
  79. # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
  80. # Approximately 400% faster than LCD on same CPU
  81. #
  82. init_vars
  83. CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
  84. BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx"
  85. echo "Building AVX CPU"
  86. build
  87. compress_libs
  88. #
  89. # ~2013 CPU Dynamic library
  90. # Approximately 10% faster than AVX on same CPU
  91. #
  92. init_vars
  93. CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
  94. BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cpu_avx2"
  95. echo "Building AVX2 CPU"
  96. build
  97. compress_libs
  98. fi
  99. else
  100. echo "Skipping CPU generation step as requested"
  101. fi
  102. if [ -z "${CUDA_LIB_DIR}" ]; then
  103. # Try the default location in case it exists
  104. CUDA_LIB_DIR=/usr/local/cuda/lib64
  105. fi
  106. if [ -d "${CUDA_LIB_DIR}" ]; then
  107. echo "CUDA libraries detected - building dynamic CUDA library"
  108. init_vars
  109. CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
  110. if [ -n "${CUDA_MAJOR}" ]; then
  111. CUDA_VARIANT=_v${CUDA_MAJOR}
  112. fi
  113. CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
  114. BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/cuda${CUDA_VARIANT}"
  115. EXTRA_LIBS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
  116. build
  117. # Cary the CUDA libs as payloads to help reduce dependency burden on users
  118. #
  119. # TODO - in the future we may shift to packaging these separately and conditionally
  120. # downloading them in the install script.
  121. DEPS="$(ldd ${BUILD_DIR}/lib/libext_server.so )"
  122. for lib in libcudart.so libcublas.so libcublasLt.so ; do
  123. DEP=$(echo "${DEPS}" | grep ${lib} | cut -f1 -d' ' | xargs || true)
  124. if [ -n "${DEP}" -a -e "${CUDA_LIB_DIR}/${DEP}" ]; then
  125. cp "${CUDA_LIB_DIR}/${DEP}" "${BUILD_DIR}/lib/"
  126. elif [ -e "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" ]; then
  127. cp "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" "${BUILD_DIR}/lib/"
  128. else
  129. cp -d "${CUDA_LIB_DIR}/${lib}*" "${BUILD_DIR}/lib/"
  130. fi
  131. done
  132. compress_libs
  133. fi
  134. if [ -z "${ROCM_PATH}" ]; then
  135. # Try the default location in case it exists
  136. ROCM_PATH=/opt/rocm
  137. fi
  138. if [ -z "${CLBlast_DIR}" ]; then
  139. # Try the default location in case it exists
  140. if [ -d /usr/lib/cmake/CLBlast ]; then
  141. export CLBlast_DIR=/usr/lib/cmake/CLBlast
  142. fi
  143. fi
  144. if [ -d "${ROCM_PATH}" ]; then
  145. echo "ROCm libraries detected - building dynamic ROCm library"
  146. if [ -f ${ROCM_PATH}/lib/librocm_smi64.so.? ]; then
  147. ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocm_smi64.so.? | cut -f3 -d. || true)
  148. fi
  149. init_vars
  150. CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
  151. BUILD_DIR="${LLAMACPP_DIR}/build/linux/${ARCH}/rocm${ROCM_VARIANT}"
  152. EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,${ROCM_PATH}/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
  153. build
  154. # Note: the ROCM libs and runtime library files are too large to embed, so we depend on
  155. # them being present at runtime on the host
  156. compress_libs
  157. fi
  158. cleanup