1 year ago · f59c4d03f7
--- a/llm/llama.cpp/generate_linux.go
+++ b/llm/llama.cpp/generate_linux.go
@@ -6,9 +6,10 @@ package llm
 
				 //go:generate -command git-apply git -C ggml apply
			
 
				 //go:generate git-apply ../ggml_patch/0001-add-detokenize-endpoint.patch
			
 
				 //go:generate git-apply ../ggml_patch/0002-34B-model-support.patch
			
 
				-//go:generate cmake -S ggml -B ggml/build/gpu -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on 
			
 
				+//go:generate git-apply ../ggml_patch/0005-ggml-support-CUDA-s-half-type-for-aarch64-1455-2670.patch
			
 
				+//go:generate cmake -S ggml -B ggml/build/gpu -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
			
 
				 //go:generate cmake --build ggml/build/gpu --target server --config Release
			
 
				 
			
 
				 //go:generate git submodule update --force gguf
			
 
				-//go:generate cmake -S gguf -B gguf/build/gpu -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on 
			
 
				+//go:generate cmake -S gguf -B gguf/build/gpu -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
			
 
				 //go:generate cmake --build gguf/build/gpu --target server --config Release
			
--- a/llm/llama.cpp/ggml_patch/0005-ggml-support-CUDA-s-half-type-for-aarch64-1455-2670.patch
+++ b/llm/llama.cpp/ggml_patch/0005-ggml-support-CUDA-s-half-type-for-aarch64-1455-2670.patch
@@ -0,0 +1,32 @@
 
				+From 1e3bc523d8053a77df3ac7126a84d0297ee97ef6 Mon Sep 17 00:00:00 2001
			
 
				+From: Kylin <56434533+KyL0N@users.noreply.github.com>
			
 
				+Date: Tue, 22 Aug 2023 15:14:23 +0800
			
 
				+Subject: [PATCH] ggml : support CUDA's half type for aarch64(#1455) (#2670)
			
 
				+
			
 
				+* ggml: support CUDA's half type for aarch64(#1455)
			
 
				+support CUDA's half type for aarch64 in ggml_fp16_t definition
			
 
				+
			
 
				+* ggml: use __CUDACC__ to recognise nvcc compiler
			
 
				+---
			
 
				+ ggml.h | 5 +++--
			
 
				+ 1 file changed, 3 insertions(+), 2 deletions(-)
			
 
				+
			
 
				+diff --git a/ggml.h b/ggml.h
			
 
				+index 544ad2d..0ec7ec5 100644
			
 
				+--- a/ggml.h
			
 
				++++ b/ggml.h
			
 
				+@@ -259,8 +259,9 @@
			
 
				+ extern "C" {
			
 
				+ #endif
			
 
				+ 
			
 
				+-#ifdef __ARM_NEON
			
 
				+-    // we use the built-in 16-bit float type
			
 
				++#if defined(__ARM_NEON) && defined(__CUDACC__)
			
 
				++    typedef half ggml_fp16_t;
			
 
				++#elif defined(__ARM_NEON)
			
 
				+     typedef __fp16 ggml_fp16_t;
			
 
				+ #else
			
 
				+     typedef uint16_t ggml_fp16_t;
			
 
				+-- 
			
 
				+2.39.2 (Apple Git-143)
			
 
				+