2 months ago · f4711da7bd
--- a/llama/patches/0016-remove-sgemm-global-variables.patch
+++ b/llama/patches/0016-remove-sgemm-global-variables.patch
@@ -0,0 +1,55 @@
 
				+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
			
 
				+From: jmorganca <jmorganca@gmail.com>
			
 
				+Date: Sun, 9 Feb 2025 17:22:15 -0800
			
 
				+Subject: [PATCH] remove sgemm global variables
			
 
				+
			
 
				+removes the 'iq4nlt' global variable in sgemm.cpp that causes
			
 
				+a runtime crash when calling dlopen on ggml-cpu libraries as
			
 
				+its initialization depends on AVX instructions the host machine
			
 
				+may not have
			
 
				+---
			
 
				+ ggml/src/ggml-cpu/llamafile/sgemm.cpp | 17 +++++++++--------
			
 
				+ 1 file changed, 9 insertions(+), 8 deletions(-)
			
 
				+
			
 
				+diff --git a/ggml/src/ggml-cpu/llamafile/sgemm.cpp b/ggml/src/ggml-cpu/llamafile/sgemm.cpp
			
 
				+index 8fce576c..3f260ce5 100644
			
 
				+--- a/ggml/src/ggml-cpu/llamafile/sgemm.cpp
			
 
				++++ b/ggml/src/ggml-cpu/llamafile/sgemm.cpp
			
 
				+@@ -279,14 +279,6 @@ template <> inline __m256bh load(const float *p) {
			
 
				+ }
			
 
				+ #endif
			
 
				+ 
			
 
				+-////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+-// CONSTANTS
			
 
				+-
			
 
				+-#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__)
			
 
				+-static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
			
 
				+-static const __m128i iq4nlt = _mm_loadu_si128((const __m128i *) kvalues_iq4nl);
			
 
				+-#endif
			
 
				+-
			
 
				+ ////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+ // FLOATING POINT MATRIX MULTIPLICATION
			
 
				+ 
			
 
				+@@ -613,6 +605,14 @@ class tinyBLAS_Q0_AVX {
			
 
				+                     TC *C, int64_t ldc,
			
 
				+                     int ith, int nth)
			
 
				+         : A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
			
 
				++        const int8_t kvalues_iq4nl[16] = {
			
 
				++            -127, -104, -83, -65,
			
 
				++            -49,  -35,  -22, -10,
			
 
				++              1,   13,   25,  38,
			
 
				++             53,   69,   89, 113
			
 
				++        };
			
 
				++
			
 
				++        iq4nlt = _mm_loadu_si128((const __m128i *)kvalues_iq4nl);
			
 
				+     }
			
 
				+ 
			
 
				+     void matmul(int64_t m, int64_t n) {
			
 
				+@@ -1037,6 +1037,7 @@ class tinyBLAS_Q0_AVX {
			
 
				+     const int64_t ldc;
			
 
				+     const int ith;
			
 
				+     const int nth;
			
 
				++    __m128i iq4nlt;
			
 
				+ };
			
 
				+ #endif // __AVX__
			
 
				+ 
			
--- a/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/sgemm.cpp
+++ b/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/sgemm.cpp
@@ -279,14 +279,6 @@ template <> inline __m256bh load(const float *p) {
 
				 }
			
 
				 #endif
			
 
				 
			
 
				-////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				-// CONSTANTS
			
 
				-
			
 
				-#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__)
			
 
				-static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
			
 
				-static const __m128i iq4nlt = _mm_loadu_si128((const __m128i *) kvalues_iq4nl);
			
 
				-#endif
			
 
				-
			
 
				 ////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				 // FLOATING POINT MATRIX MULTIPLICATION
			
 
				 
			
@@ -613,6 +605,14 @@ class tinyBLAS_Q0_AVX {
 
				                     TC *C, int64_t ldc,
			
 
				                     int ith, int nth)
			
 
				         : A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
			
 
				+        const int8_t kvalues_iq4nl[16] = {
			
 
				+            -127, -104, -83, -65,
			
 
				+            -49,  -35,  -22, -10,
			
 
				+              1,   13,   25,  38,
			
 
				+             53,   69,   89, 113
			
 
				+        };
			
 
				+
			
 
				+        iq4nlt = _mm_loadu_si128((const __m128i *)kvalues_iq4nl);
			
 
				     }
			
 
				 
			
 
				     void matmul(int64_t m, int64_t n) {
			
@@ -1037,6 +1037,7 @@ class tinyBLAS_Q0_AVX {
 
				     const int64_t ldc;
			
 
				     const int ith;
			
 
				     const int nth;
			
 
				+    __m128i iq4nlt;
			
 
				 };
			
 
				 #endif // __AVX__