Open
Description
Git commit
git clone https://github.com/ggml-org/llama.cpp
Operating systems
Linux
GGML backends
CUDA
Problem description & steps to reproduce
I'm trying to compile llama.cpp with CUDA support on SLES 15.6. I got the following errors.
~/llama.cpp> uname -m
x86_64
It is strange that the installation script keeps running aarch64, but I have intel CPUs.
The persistent errors in ggml-cpu-aarch64.cpp (e.g., _mm256_set_m128 and _mm256_set_m128i not declared) indicate that the ARM-specific code is still being compiled despite your x86_64 system and the CMakeLists.txt modification to exclude it. Since you’ve confirmed GCC 12.4.0 supports and you’re building from source in ~/llama.cpp, the issue is likely due to a build system bug.
First Bad Commit
This is my first installation.
Compile command
cmake -B build -DGGML_CUDA=ON
cmake --build build --config Release
Relevant log output
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp: In function ‘void ggml_gemm_q4_0_8x8_ q8_0(int, float*, size_t, const void*, const void*, int, int)’:
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:103:54: error: ‘_mm256_set_m128i’ was not declared in this scope
#define GGML_F32Cx8x2_LOAD(x, y) _mm512_cvtph_ps(_mm256_set_m128i(_mm_loadu_si128((const __m128i *)(y)) , _mm_loadu_si128((const __m128i *)(x))))
^
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:3177:50: note: in expansion of macro ‘ GGML_F32Cx8x2_LOAD’
const __m512 col_scale_f32 = GGML_F32Cx8x2_LOAD(b_ptr_0[b].d, b_ptr_1[b].d);
^~~~~~~~~~~~~~~~~~
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:103:54: note: suggested alternative: ‘ _mm256_set_epi8’
#define GGML_F32Cx8x2_LOAD(x, y) _mm512_cvtph_ps(_mm256_set_m128i(_mm_loadu_si128((const __m128i *)(y)) , _mm_loadu_si128((const __m128i *)(x))))
^
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:3177:50: note: in expansion of macro ‘ GGML_F32Cx8x2_LOAD’
const __m512 col_scale_f32 = GGML_F32Cx8x2_LOAD(b_ptr_0[b].d, b_ptr_1[b].d);
^~~~~~~~~~~~~~~~~~
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:103:54: error: ‘_mm256_set_m128i’ was not declared in this scope
#define GGML_F32Cx8x2_LOAD(x, y) _mm512_cvtph_ps(_mm256_set_m128i(_mm_loadu_si128((const __m128i *)(y)) , _mm_loadu_si128((const __m128i *)(x))))
^
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:3364:50: note: in expansion of macro ‘ GGML_F32Cx8x2_LOAD’
const __m512 col_scale_f32 = GGML_F32Cx8x2_LOAD(b_ptr_0[b].d, b_ptr_1[b].d);
^~~~~~~~~~~~~~~~~~
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:103:54: note: suggested alternative: ‘ _mm256_set_epi8’
#define GGML_F32Cx8x2_LOAD(x, y) _mm512_cvtph_ps(_mm256_set_m128i(_mm_loadu_si128((const __m128i *)(y)) , _mm_loadu_si128((const __m128i *)(x))))
^
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:3364:50: note: in expansion of macro ‘ GGML_F32Cx8x2_LOAD’
const __m512 col_scale_f32 = GGML_F32Cx8x2_LOAD(b_ptr_0[b].d, b_ptr_1[b].d);
^~~~~~~~~~~~~~~~~~
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp: In function ‘void ggml_gemm_q4_K_8x8_ q8_K(int, float*, size_t, const void*, const void*, int, int)’:
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:103:54: error: ‘_mm256_set_m128i’ was not declared in this scope
#define GGML_F32Cx8x2_LOAD(x, y) _mm512_cvtph_ps(_mm256_set_m128i(_mm_loadu_si128((const __m128i *)(y)) , _mm_loadu_si128((const __m128i *)(x))))
^
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:4091:46: note: in expansion of macro ‘ GGML_F32Cx8x2_LOAD’
const __m512 col_scale_f32 = GGML_F32Cx8x2_LOAD(b_ptr_0[b].d, b_ptr_1[b].d);
^~~~~~~~~~~~~~~~~~
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:103:54: note: suggested alternative: ‘ _mm256_set_epi8’
#define GGML_F32Cx8x2_LOAD(x, y) _mm512_cvtph_ps(_mm256_set_m128i(_mm_loadu_si128((const __m128i *)(y)) , _mm_loadu_si128((const __m128i *)(x))))
^
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:4091:46: note: in expansion of macro ‘ GGML_F32Cx8x2_LOAD’
const __m512 col_scale_f32 = GGML_F32Cx8x2_LOAD(b_ptr_0[b].d, b_ptr_1[b].d);
^~~~~~~~~~~~~~~~~~
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:4402:58: error: ‘_mm256_set_m128’ was not declared in this scope
const __m256 row_scale_f32_ymm = _mm256_set_m128(row_scale_f32_sse, row_scale_f32_s se);
^~~~~~~~~~~~~~~
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:4402:58: note: suggested alternative: ‘_mm256_set_epi8’
const __m256 row_scale_f32_ymm = _mm256_set_m128(row_scale_f32_sse, row_scale_f32_s se);
^~~~~~~~~~~~~~~
_mm256_set_epi8
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:103:54: error: ‘_mm256_set_m128i’ was not declared in this scope
#define GGML_F32Cx8x2_LOAD(x, y) _mm512_cvtph_ps(_mm256_set_m128i(_mm_loadu_si128((const __m128i *)(y)) , _mm_loadu_si128((const __m128i *)(x))))
^
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:4454:46: note: in expansion of macro ‘ GGML_F32Cx8x2_LOAD’
const __m512 col_scale_f32 = GGML_F32Cx8x2_LOAD(b_ptr_0[b].d, b_ptr_1[b].d);
^~~~~~~~~~~~~~~~~~
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:103:54: note: suggested alternative: ‘ _mm256_set_epi8’
#define GGML_F32Cx8x2_LOAD(x, y) _mm512_cvtph_ps(_mm256_set_m128i(_mm_loadu_si128((const __m128i *)(y)) , _mm_loadu_si128((const __m128i *)(x))))
^
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:4454:46: note: in expansion of macro ‘ GGML_F32Cx8x2_LOAD’
const __m512 col_scale_f32 = GGML_F32Cx8x2_LOAD(b_ptr_0[b].d, b_ptr_1[b].d);
^~~~~~~~~~~~~~~~~~
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:4765:54: error: ‘_mm256_set_m128’ was not declared in this scope
const __m256 row_scale_f32_ymm = _mm256_set_m128(row_scale_f32_sse, row_scale_f32_sse);
^~~~~~~~~~~~~~~
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:4765:54: note: suggested alternative: ‘_mm256_set_epi8’
const __m256 row_scale_f32_ymm = _mm256_set_m128(row_scale_f32_sse, row_scale_f32_sse);
^~~~~~~~~~~~~~~
_mm256_set_epi8
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:5111:54: error: ‘_mm256_set_m128’ was not declared in this scope
const __m256 row_scale_f32 = _mm256_set_m128(row_scale_f32_sse, row_scale_f32_sse); //GGML_F32Cx8_REPEAT_LOAD(a_ptrs[rp][b].d, loadMask);
^~~~~~~~~~~~~~~
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:5111:54: note: suggested alternative: ‘_mm256_set_epi8’
const __m256 row_scale_f32 = _mm256_set_m128(row_scale_f32_sse, row_scale_f32_sse); //GGML_F32Cx8_REPEAT_LOAD(a_ptrs[rp][b].d, loadMask);
^~~~~~~~~~~~~~~
_mm256_set_epi8
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:5441:50: error: ‘_mm256_set_m128’ was not declared in this scope
const __m256 row_scale_f32 = _mm256_set_m128(row_scale_f32_sse, row_scale_f32_sse); //G GML_F32Cx8_REPEAT_LOAD(a_ptrs[rp][b].d, loadMask);
^~~~~~~~~~~~~~~
/mnt/data/home/johnl/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp:5441:50: note: suggested alternative: ‘_mm256_set_epi8’
const __m256 row_scale_f32 = _mm256_set_m128(row_scale_f32_sse, row_scale_f32_sse); //G GML_F32Cx8_REPEAT_LOAD(a_ptrs[rp][b].d, loadMask);
^~~~~~~~~~~~~~~
_mm256_set_epi8
gmake[2]: *** [ggml/src/CMakeFiles/ggml-cpu.dir/build.make:104: ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/gg ml-cpu-aarch64.cpp.o] Error 1
gmake[1]: *** [CMakeFiles/Makefile2:1751: ggml/src/CMakeFiles/ggml-cpu.dir/all] Error 2
gmake: *** [Makefile:146: all] Error 2