瀏覽代碼

cuda: enable flash attention

ggml added an option to disable flash attention so explicitly enable it
Michael Yang 2 月之前
父節點
當前提交
b42aba40ed
共有 1 個文件被更改,包括 1 次插入0 次删除
  1. 1 0
      CMakeLists.txt

+ 1 - 0
CMakeLists.txt

@@ -23,6 +23,7 @@ set(GGML_SCHED_MAX_COPIES 4)
 set(GGML_LLAMAFILE ON)
 set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
 set(GGML_CUDA_GRAPHS ON)
+set(GGML_CUDA_FA ON)
 
 if((CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
     OR (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM64|ARMv[0-9]+"))