From f1ffef2b2c2c45163754b5754dfbd5ec1da66fd3 Mon Sep 17 00:00:00 2001 From: Dimitri John Ledkov Date: Wed, 7 Nov 2018 01:30:52 +0000 Subject: [PATCH] Fix simd instruction alignment on 32bit kernels. Resolves segfaults on i686 kernel. Also execute all available simd instructions, not just the best one. Signed-off-by: Dimitri John Ledkov Signed-off-by: Michael Hudson Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=894774 --- test cases/common/152 simd/simd_sse2.c | 3 +- test cases/common/152 simd/simd_sse3.c | 3 +- test cases/common/152 simd/simd_sse41.c | 3 +- test cases/common/152 simd/simd_sse42.c | 3 +- test cases/common/152 simd/simd_ssse3.c | 3 +- test cases/common/152 simd/simdchecker.c | 55 +++++++++++++----------- test cases/common/152 simd/simdtest.h | 9 ++++ 7 files changed, 49 insertions(+), 30 deletions(-) create mode 100644 test cases/common/152 simd/simdtest.h diff --git a/test cases/common/152 simd/simd_sse2.c b/test cases/common/152 simd/simd_sse2.c index 02745337bc30..5d412fd4141a 100644 --- a/test cases/common/152 simd/simd_sse2.c +++ b/test cases/common/152 simd/simd_sse2.c @@ -1,6 +1,7 @@ #include #include #include +#include #ifdef _MSC_VER int sse2_available() { @@ -21,7 +22,7 @@ int sse2_available() { #endif void increment_sse2(float arr[4]) { - double darr[4]; + alignas(16) double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); __m128d one = _mm_set_pd(1.0, 1.0); diff --git a/test cases/common/152 simd/simd_sse3.c b/test cases/common/152 simd/simd_sse3.c index e97d102854a1..4aa47fd32636 100644 --- a/test cases/common/152 simd/simd_sse3.c +++ b/test cases/common/152 simd/simd_sse3.c @@ -1,5 +1,6 @@ #include #include +#include #ifdef _MSC_VER #include @@ -22,7 +23,7 @@ int sse3_available() { #endif void increment_sse3(float arr[4]) { - double darr[4]; + alignas(16) double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); __m128d one = _mm_set_pd(1.0, 1.0); diff --git a/test cases/common/152 simd/simd_sse41.c b/test cases/common/152 simd/simd_sse41.c index 0308c7e494ee..466b30c5bef6 100644 --- a/test cases/common/152 simd/simd_sse41.c +++ b/test cases/common/152 simd/simd_sse41.c @@ -1,6 +1,7 @@ #include #include +#include #include #ifdef _MSC_VER @@ -24,7 +25,7 @@ int sse41_available() { #endif void increment_sse41(float arr[4]) { - double darr[4]; + alignas(16) double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); __m128d one = _mm_set_pd(1.0, 1.0); diff --git a/test cases/common/152 simd/simd_sse42.c b/test cases/common/152 simd/simd_sse42.c index 137ffc441353..26d5ba814d46 100644 --- a/test cases/common/152 simd/simd_sse42.c +++ b/test cases/common/152 simd/simd_sse42.c @@ -1,6 +1,7 @@ #include #include #include +#include #ifdef _MSC_VER #include @@ -27,7 +28,7 @@ int sse42_available() { #endif void increment_sse42(float arr[4]) { - double darr[4]; + alignas(16) double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); __m128d one = _mm_set_pd(1.0, 1.0); diff --git a/test cases/common/152 simd/simd_ssse3.c b/test cases/common/152 simd/simd_ssse3.c index ab4dff4f819d..d09916522d69 100644 --- a/test cases/common/152 simd/simd_ssse3.c +++ b/test cases/common/152 simd/simd_ssse3.c @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -30,7 +31,7 @@ int ssse3_available() { #endif void increment_ssse3(float arr[4]) { - double darr[4]; + alignas(16) double darr[4]; __m128d val1 = _mm_set_pd(arr[0], arr[1]); __m128d val2 = _mm_set_pd(arr[2], arr[3]); __m128d one = _mm_set_pd(1.0, 1.0); diff --git a/test cases/common/152 simd/simdchecker.c b/test cases/common/152 simd/simdchecker.c index 222fbf3e31d2..2cf8f14facca 100644 --- a/test cases/common/152 simd/simdchecker.c +++ b/test cases/common/152 simd/simdchecker.c @@ -1,5 +1,7 @@ #include +#include #include +#include /* * A function that checks at runtime which simd accelerations are @@ -8,86 +10,89 @@ */ int main(int argc, char **argv) { - float four[4] = {2.0, 3.0, 4.0, 5.0}; + static const float four_initial[4] = {2.0, 3.0, 4.0, 5.0}; + alignas(16) float four[4]; const float expected[4] = {3.0, 4.0, 5.0, 6.0}; void (*fptr)(float[4]) = NULL; const char *type; - int i; + int i, r=0; /* Add here. The first matched one is used so put "better" instruction * sets at the top. */ #if HAVE_NEON - if(fptr == NULL && neon_available()) { + if(neon_available()) { fptr = increment_neon; type = "NEON"; + #include } #endif #if HAVE_AVX2 - if(fptr == NULL && avx2_available()) { + if(avx2_available()) { fptr = increment_avx2; type = "AVX2"; + #include } #endif #if HAVE_AVX - if(fptr == NULL && avx_available()) { + if(avx_available()) { fptr = increment_avx; type = "AVX"; + #include } #endif #if HAVE_SSE42 - if(fptr == NULL && sse42_available()) { + if(sse42_available()) { fptr = increment_sse42; type = "SSE42"; + #include } #endif #if HAVE_SSE41 - if(fptr == NULL && sse41_available()) { + if(sse41_available()) { fptr = increment_sse41; type = "SSE41"; + #include } #endif #if HAVE_SSSE3 - if(fptr == NULL && ssse3_available()) { + if(ssse3_available()) { fptr = increment_ssse3; type = "SSSE3"; + #include } #endif #if HAVE_SSE3 - if(fptr == NULL && sse3_available()) { + if(sse3_available()) { fptr = increment_sse3; type = "SSE3"; + #include } #endif #if HAVE_SSE2 - if(fptr == NULL && sse2_available()) { + if(sse2_available()) { fptr = increment_sse2; type = "SSE2"; + #include } #endif #if HAVE_SSE - if(fptr == NULL && sse_available()) { + if(sse_available()) { fptr = increment_sse; type = "SSE"; + #include } #endif #if HAVE_MMX - if(fptr == NULL && mmx_available()) { + if(mmx_available()) { fptr = increment_mmx; type = "MMX"; + #include } #endif - if(fptr == NULL) { - fptr = increment_fallback; - type = "fallback"; - } - printf("Using %s.\n", type); - fptr(four); - for(i=0; i<4; i++) { - if(four[i] != expected[i]) { - printf("Increment function failed, got %f expected %f.\n", four[i], expected[i]); - return 1; - } - } - return 0; + fptr = increment_fallback; + type = "fallback"; + #include + + return r; } diff --git a/test cases/common/152 simd/simdtest.h b/test cases/common/152 simd/simdtest.h new file mode 100644 index 000000000000..4bf1623af5ac --- /dev/null +++ b/test cases/common/152 simd/simdtest.h @@ -0,0 +1,9 @@ +memcpy(four, four_initial, sizeof(four_initial)); +printf("Using %s.\n", type); +fptr(four); +for(i=0; i<4; i++) { + if(four[i] != expected[i]) { + printf("Increment function failed, got %f expected %f.\n", four[i], expected[i]); + r=1; + } +}