diff --git a/llama/patches/0016-remove-sgemm-global-variables.patch b/llama/patches/0016-remove-sgemm-global-variables.patch new file mode 100644 index 000000000..31a59aeaf --- /dev/null +++ b/llama/patches/0016-remove-sgemm-global-variables.patch @@ -0,0 +1,55 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: jmorganca +Date: Sun, 9 Feb 2025 17:22:15 -0800 +Subject: [PATCH] remove sgemm global variables + +removes the 'iq4nlt' global variable in sgemm.cpp that causes +a runtime crash when calling dlopen on ggml-cpu libraries as +its initialization depends on AVX instructions the host machine +may not have +--- + ggml/src/ggml-cpu/llamafile/sgemm.cpp | 17 +++++++++-------- + 1 file changed, 9 insertions(+), 8 deletions(-) + +diff --git a/ggml/src/ggml-cpu/llamafile/sgemm.cpp b/ggml/src/ggml-cpu/llamafile/sgemm.cpp +index 8fce576c..3f260ce5 100644 +--- a/ggml/src/ggml-cpu/llamafile/sgemm.cpp ++++ b/ggml/src/ggml-cpu/llamafile/sgemm.cpp +@@ -279,14 +279,6 @@ template <> inline __m256bh load(const float *p) { + } + #endif + +-//////////////////////////////////////////////////////////////////////////////////////////////////// +-// CONSTANTS +- +-#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) +-static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113}; +-static const __m128i iq4nlt = _mm_loadu_si128((const __m128i *) kvalues_iq4nl); +-#endif +- + //////////////////////////////////////////////////////////////////////////////////////////////////// + // FLOATING POINT MATRIX MULTIPLICATION + +@@ -613,6 +605,14 @@ class tinyBLAS_Q0_AVX { + TC *C, int64_t ldc, + int ith, int nth) + : A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) { ++ const int8_t kvalues_iq4nl[16] = { ++ -127, -104, -83, -65, ++ -49, -35, -22, -10, ++ 1, 13, 25, 38, ++ 53, 69, 89, 113 ++ }; ++ ++ iq4nlt = _mm_loadu_si128((const __m128i *)kvalues_iq4nl); + } + + void matmul(int64_t m, int64_t n) { +@@ -1037,6 +1037,7 @@ class tinyBLAS_Q0_AVX { + const int64_t ldc; + const int ith; + const int nth; ++ __m128i iq4nlt; + }; + #endif // __AVX__ + diff --git a/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/sgemm.cpp b/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/sgemm.cpp index 8fce576c3..3f260ce5a 100644 --- a/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/sgemm.cpp +++ b/ml/backend/ggml/ggml/src/ggml-cpu/llamafile/sgemm.cpp @@ -279,14 +279,6 @@ template <> inline __m256bh load(const float *p) { } #endif -//////////////////////////////////////////////////////////////////////////////////////////////////// -// CONSTANTS - -#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) -static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113}; -static const __m128i iq4nlt = _mm_loadu_si128((const __m128i *) kvalues_iq4nl); -#endif - //////////////////////////////////////////////////////////////////////////////////////////////////// // FLOATING POINT MATRIX MULTIPLICATION @@ -613,6 +605,14 @@ class tinyBLAS_Q0_AVX { TC *C, int64_t ldc, int ith, int nth) : A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) { + const int8_t kvalues_iq4nl[16] = { + -127, -104, -83, -65, + -49, -35, -22, -10, + 1, 13, 25, 38, + 53, 69, 89, 113 + }; + + iq4nlt = _mm_loadu_si128((const __m128i *)kvalues_iq4nl); } void matmul(int64_t m, int64_t n) { @@ -1037,6 +1037,7 @@ class tinyBLAS_Q0_AVX { const int64_t ldc; const int ith; const int nth; + __m128i iq4nlt; }; #endif // __AVX__