123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596 |
- #!/bin/bash
- archs=(
- gfx900
- gfx940
- gfx941
- gfx942
- gfx1010
- gfx1012
- gfx1030
- gfx1100
- gfx1101
- gfx1102
- )
- linux_archs=(
- gfx906:xnack-
- gfx908:xnack-
- gfx90a:xnack+
- gfx90a:xnack-
- )
- os="$(uname -s)"
- additional_flags=""
- if [[ "$os" == "Windows_NT" || "$os" == "MINGW64_NT"* ]]; then
- output="ggml-hipblas.dll"
- additional_flags=" -Xclang --dependent-lib=msvcrt"
- else
- output="libggml-hipblas.so"
- archs+=("${linux_archs[@]}")
- fi
- for arch in "${archs[@]}"; do
- additional_flags+=" --offload-arch=$arch"
- done
- # Create an array of all source files, expanding globs
- sources=(
- $(echo ggml-cuda/template-instances/fattn-wmma*.cu)
- $(echo ggml-cuda/template-instances/mmq*.cu)
- $(echo ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu)
- $(echo ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu)
- $(echo ggml-cuda/template-instances/fattn-vec*f16-f16.cu)
- ggml-cuda.cu
- $(echo ggml-cuda/*.cu)
- ggml.c
- ggml-backend.c
- ggml-alloc.c
- ggml-quants.c
- sgemm.cpp
- )
- # Function to compile a single source file
- compile_source() {
- src="$1"
- hipcc -c -O3 -DGGML_USE_CUDA -DGGML_BUILD=1 -DGGML_SHARED=1 -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 \
- -DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_HIPBLAS -DGGML_USE_LLAMAFILE -DHIP_FAST_MATH -DNDEBUG \
- -DK_QUANTS_PER_ITERATION=2 -D_CRT_SECURE_NO_WARNINGS -DCMAKE_POSITION_INDEPENDENT_CODE=on \
- -D_GNU_SOURCE -Wno-expansion-to-defined -Wno-invalid-noreturn -Wno-ignored-attributes -Wno-pass-failed \
- -Wno-deprecated-declarations -Wno-unused-result -I. \
- $additional_flags -o "${src%.cu}.o" "$src"
- }
- # Function to handle Ctrl+C
- cleanup() {
- echo "Terminating all background processes..."
- kill 0
- }
- # Set trap to handle SIGINT (Ctrl+C)
- trap cleanup SIGINT
- # Limit the number of concurrent jobs
- max_jobs=$(nproc)
- job_count=0
- for src in "${sources[@]}"; do
- echo "$src"
- compile_source "$src" &
- job_count=$((job_count + 1))
- if [[ $job_count -ge $max_jobs ]]; then
- wait -n
- job_count=$((job_count - 1))
- fi
- done
- wait
- # Link all object files into a shared library
- echo "Linking object files..."
- hipcc -v -shared -o $output *.o ggml-cuda/*.o ggml-cuda/template-instances/*.o -lhipblas -lamdhip64 -lrocblas
- # Clean up object files after linking
- rm -f *.o ggml-cuda/*.o ggml-cuda/template-instances/*.o
|