gen_linux.sh 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. #!/bin/bash
  2. # This script is intended to run inside the go generate
  3. # working directory must be llm/generate/
  4. # First we build our default built-in library which will be linked into the CGO
  5. # binary as a normal dependency. This default build is CPU based.
  6. #
  7. # Then we build a CUDA dynamic library (although statically linked with the CUDA
  8. # library dependencies for maximum portability)
  9. #
  10. # Then if we detect ROCm, we build a dynamically loaded ROCm lib. ROCm is particularly
  11. # important to be a dynamic lib even if it's the only GPU library detected because
  12. # we can't redistribute the objectfiles but must rely on dynamic libraries at
  13. # runtime, which could lead the server not to start if not present.
  14. set -ex
  15. set -o pipefail
  16. # See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
  17. amdGPUs() {
  18. GPU_LIST=(
  19. "gfx803"
  20. "gfx900"
  21. "gfx906:xnack-"
  22. "gfx908:xnack-"
  23. "gfx90a:xnack+"
  24. "gfx90a:xnack-"
  25. "gfx1010"
  26. "gfx1012"
  27. "gfx1030"
  28. "gfx1100"
  29. "gfx1101"
  30. "gfx1102"
  31. )
  32. (
  33. IFS=$';'
  34. echo "'${GPU_LIST[*]}'"
  35. )
  36. }
  37. echo "Starting linux generate script"
  38. if [ -z "${CUDACXX}" ]; then
  39. if [ -x /usr/local/cuda/bin/nvcc ]; then
  40. export CUDACXX=/usr/local/cuda/bin/nvcc
  41. else
  42. # Try the default location in case it exists
  43. export CUDACXX=$(command -v nvcc)
  44. fi
  45. fi
  46. COMMON_CMAKE_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off"
  47. source $(dirname $0)/gen_common.sh
  48. init_vars
  49. git_module_setup
  50. apply_patches
  51. if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
  52. # Users building from source can tune the exact flags we pass to cmake for configuring
  53. # llama.cpp, and we'll build only 1 CPU variant in that case as the default.
  54. if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
  55. echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
  56. CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
  57. BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
  58. echo "Building custom CPU"
  59. build
  60. install
  61. link_server_lib
  62. else
  63. # Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
  64. # -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
  65. # -DLLAMA_F16C -- 2012 Intel Ivy Bridge & AMD 2011 Bulldozer (No significant improvement over just AVX)
  66. # -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
  67. # -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
  68. # Note: the following seem to yield slower results than AVX2 - ymmv
  69. # -DLLAMA_AVX512 -- 2017 Intel Skylake and High End DeskTop (HEDT)
  70. # -DLLAMA_AVX512_VBMI -- 2018 Intel Cannon Lake
  71. # -DLLAMA_AVX512_VNNI -- 2021 Intel Alder Lake
  72. COMMON_CPU_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off"
  73. #
  74. # CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
  75. #
  76. CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
  77. BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
  78. echo "Building LCD CPU"
  79. build
  80. install
  81. link_server_lib
  82. #
  83. # ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
  84. # Approximately 400% faster than LCD on same CPU
  85. #
  86. init_vars
  87. CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
  88. BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu_avx"
  89. echo "Building AVX CPU"
  90. build
  91. install
  92. link_server_lib
  93. #
  94. # ~2013 CPU Dynamic library
  95. # Approximately 10% faster than AVX on same CPU
  96. #
  97. init_vars
  98. CMAKE_DEFS="${COMMON_CPU_DEFS} -DLLAMA_AVX=on -DLLAMA_AVX2=on -DLLAMA_AVX512=off -DLLAMA_FMA=on -DLLAMA_F16C=on ${CMAKE_DEFS}"
  99. BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu_avx2"
  100. echo "Building AVX2 CPU"
  101. build
  102. install
  103. link_server_lib
  104. fi
  105. else
  106. echo "Skipping CPU generation step as requested"
  107. fi
  108. # If needed, look for the default CUDA toolkit location
  109. if [ -z "${CUDA_LIB_DIR}" ] && [ -d /usr/local/cuda/lib64 ]; then
  110. CUDA_LIB_DIR=/usr/local/cuda/lib64
  111. fi
  112. # If needed, look for CUDA on Arch Linux
  113. if [ -z "${CUDA_LIB_DIR}" ] && [ -d /opt/cuda/targets/x86_64-linux/lib ]; then
  114. CUDA_LIB_DIR=/opt/cuda/targets/x86_64-linux/lib
  115. fi
  116. if [ -d "${CUDA_LIB_DIR}" ]; then
  117. echo "CUDA libraries detected - building dynamic CUDA library"
  118. init_vars
  119. CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
  120. if [ -n "${CUDA_MAJOR}" ]; then
  121. CUDA_VARIANT=_v${CUDA_MAJOR}
  122. fi
  123. CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
  124. BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda${CUDA_VARIANT}"
  125. build
  126. install
  127. gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
  128. -Wl,--whole-archive \
  129. ${BUILD_DIR}/lib/libext_server.a \
  130. ${BUILD_DIR}/lib/libcommon.a \
  131. ${BUILD_DIR}/lib/libllama.a \
  132. -Wl,--no-whole-archive \
  133. ${CUDA_LIB_DIR}/libcudart_static.a \
  134. ${CUDA_LIB_DIR}/libcublas_static.a \
  135. ${CUDA_LIB_DIR}/libcublasLt_static.a \
  136. ${CUDA_LIB_DIR}/libcudadevrt.a \
  137. ${CUDA_LIB_DIR}/libculibos.a \
  138. -lcuda \
  139. -lrt -lpthread -ldl -lstdc++ -lm
  140. fi
  141. if [ -z "${ROCM_PATH}" ]; then
  142. # Try the default location in case it exists
  143. ROCM_PATH=/opt/rocm
  144. fi
  145. if [ -z "${CLBlast_DIR}" ]; then
  146. # Try the default location in case it exists
  147. if [ -d /usr/lib/cmake/CLBlast ]; then
  148. export CLBlast_DIR=/usr/lib/cmake/CLBlast
  149. fi
  150. fi
  151. if [ -d "${ROCM_PATH}" ]; then
  152. echo "ROCm libraries detected - building dynamic ROCm library"
  153. if [ -f ${ROCM_PATH}/lib/librocm_smi64.so.? ]; then
  154. ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocm_smi64.so.? | cut -f3 -d. || true)
  155. fi
  156. init_vars
  157. CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
  158. BUILD_DIR="${LLAMACPP_DIR}/build/linux/rocm${ROCM_VARIANT}"
  159. build
  160. install
  161. gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
  162. -Wl,--whole-archive \
  163. ${BUILD_DIR}/lib/libext_server.a \
  164. ${BUILD_DIR}/lib/libcommon.a \
  165. ${BUILD_DIR}/lib/libllama.a \
  166. -Wl,--no-whole-archive \
  167. -lrt -lpthread -ldl -lstdc++ -lm \
  168. -L/opt/rocm/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ \
  169. -Wl,-rpath,/opt/rocm/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ \
  170. -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu
  171. fi
  172. cleanup