0016-remove-sgemm-global-variables.patch 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
  2. From: jmorganca <jmorganca@gmail.com>
  3. Date: Sun, 9 Feb 2025 17:22:15 -0800
  4. Subject: [PATCH] remove sgemm global variables
  5. removes the 'iq4nlt' global variable in sgemm.cpp that causes
  6. a runtime crash when calling dlopen on ggml-cpu libraries as
  7. its initialization depends on AVX instructions the host machine
  8. may not have
  9. ---
  10. ggml/src/ggml-cpu/llamafile/sgemm.cpp | 17 +++++++++--------
  11. 1 file changed, 9 insertions(+), 8 deletions(-)
  12. diff --git a/ggml/src/ggml-cpu/llamafile/sgemm.cpp b/ggml/src/ggml-cpu/llamafile/sgemm.cpp
  13. index 8fce576c..3f260ce5 100644
  14. --- a/ggml/src/ggml-cpu/llamafile/sgemm.cpp
  15. +++ b/ggml/src/ggml-cpu/llamafile/sgemm.cpp
  16. @@ -279,14 +279,6 @@ template <> inline __m256bh load(const float *p) {
  17. }
  18. #endif
  19. -////////////////////////////////////////////////////////////////////////////////////////////////////
  20. -// CONSTANTS
  21. -
  22. -#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__)
  23. -static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
  24. -static const __m128i iq4nlt = _mm_loadu_si128((const __m128i *) kvalues_iq4nl);
  25. -#endif
  26. -
  27. ////////////////////////////////////////////////////////////////////////////////////////////////////
  28. // FLOATING POINT MATRIX MULTIPLICATION
  29. @@ -613,6 +605,14 @@ class tinyBLAS_Q0_AVX {
  30. TC *C, int64_t ldc,
  31. int ith, int nth)
  32. : A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
  33. + const int8_t kvalues_iq4nl[16] = {
  34. + -127, -104, -83, -65,
  35. + -49, -35, -22, -10,
  36. + 1, 13, 25, 38,
  37. + 53, 69, 89, 113
  38. + };
  39. +
  40. + iq4nlt = _mm_loadu_si128((const __m128i *)kvalues_iq4nl);
  41. }
  42. void matmul(int64_t m, int64_t n) {
  43. @@ -1037,6 +1037,7 @@ class tinyBLAS_Q0_AVX {
  44. const int64_t ldc;
  45. const int ith;
  46. const int nth;
  47. + __m128i iq4nlt;
  48. };
  49. #endif // __AVX__