build_hipblas.sh 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. #!/bin/bash
  2. archs=(
  3. gfx900
  4. gfx940
  5. gfx941
  6. gfx942
  7. gfx1010
  8. gfx1012
  9. gfx1030
  10. gfx1100
  11. gfx1101
  12. gfx1102
  13. )
  14. linux_archs=(
  15. gfx906:xnack-
  16. gfx908:xnack-
  17. gfx90a:xnack+
  18. gfx90a:xnack-
  19. )
  20. os="$(uname -s)"
  21. additional_flags=""
  22. if [[ "$os" == "Windows_NT" || "$os" == "MINGW64_NT"* ]]; then
  23. output="ggml-hipblas.dll"
  24. additional_flags=" -Xclang --dependent-lib=msvcrt"
  25. else
  26. output="libggml-hipblas.so"
  27. archs+=("${linux_archs[@]}")
  28. fi
  29. for arch in "${archs[@]}"; do
  30. additional_flags+=" --offload-arch=$arch"
  31. done
  32. # Create an array of all source files, expanding globs
  33. sources=(
  34. $(echo ggml-cuda/template-instances/fattn-wmma*.cu)
  35. $(echo ggml-cuda/template-instances/mmq*.cu)
  36. $(echo ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu)
  37. $(echo ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu)
  38. $(echo ggml-cuda/template-instances/fattn-vec*f16-f16.cu)
  39. ggml-cuda.cu
  40. $(echo ggml-cuda/*.cu)
  41. ggml.c
  42. ggml-backend.c
  43. ggml-alloc.c
  44. ggml-quants.c
  45. sgemm.cpp
  46. )
  47. # Function to compile a single source file
  48. compile_source() {
  49. src="$1"
  50. hipcc -c -O3 -DGGML_USE_CUDA -DGGML_BUILD=1 -DGGML_SHARED=1 -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 \
  51. -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_HIPBLAS -DGGML_USE_LLAMAFILE -DHIP_FAST_MATH -DNDEBUG \
  52. -DK_QUANTS_PER_ITERATION=2 -D_CRT_SECURE_NO_WARNINGS -DCMAKE_POSITION_INDEPENDENT_CODE=on \
  53. -D_GNU_SOURCE -Wno-expansion-to-defined -Wno-invalid-noreturn -Wno-ignored-attributes -Wno-pass-failed \
  54. -Wno-deprecated-declarations -Wno-unused-result -I. \
  55. $additional_flags -o "${src%.cu}.o" "$src"
  56. }
  57. # Function to handle Ctrl+C
  58. cleanup() {
  59. echo "Terminating all background processes..."
  60. kill 0
  61. }
  62. # Set trap to handle SIGINT (Ctrl+C)
  63. trap cleanup SIGINT
  64. # Limit the number of concurrent jobs
  65. max_jobs=$(nproc)
  66. job_count=0
  67. for src in "${sources[@]}"; do
  68. echo "$src"
  69. compile_source "$src" &
  70. job_count=$((job_count + 1))
  71. if [[ $job_count -ge $max_jobs ]]; then
  72. wait -n
  73. job_count=$((job_count - 1))
  74. fi
  75. done
  76. wait
  77. # Link all object files into a shared library
  78. echo "Linking object files..."
  79. hipcc -v -shared -o $output *.o ggml-cuda/*.o ggml-cuda/template-instances/*.o -lhipblas -lamdhip64 -lrocblas
  80. # Clean up object files after linking
  81. rm -f *.o ggml-cuda/*.o ggml-cuda/template-instances/*.o