Blob Blame History Raw
diff -up mpich/src/elpa2/kernels/real_128bit_256bit_512bit_BLOCK_template.c.vsx mpich/src/elpa2/kernels/real_128bit_256bit_512bit_BLOCK_template.c
--- mpich/src/elpa2/kernels/real_128bit_256bit_512bit_BLOCK_template.c.vsx	2020-05-27 13:16:25.000000000 +0200
+++ mpich/src/elpa2/kernels/real_128bit_256bit_512bit_BLOCK_template.c	2020-09-21 15:23:52.106489501 +0200
@@ -184,7 +184,7 @@
 #ifdef DOUBLE_PRECISION_REAL
 #define offset 2
 #define __SIMD_DATATYPE __vector double
-#define __SIMD_LOAD (__vector double) vec_ld
+#define _SIMD_LOAD (__vector double) vec_ld
 #endif
 
 #ifdef SINGLE_PRECISION_REAL
@@ -197,6 +197,7 @@
 #define _SIMD_STORE vec_st
 #define _SIMD_ADD vec_add
 #define _SIMD_MUL vec_mul
+#define _SIMD_SUB vec_sub
 #define _SIMD_SET1 vec_splats
 
 #endif /*  VEC_SET == SPARC64_SSE */
@@ -1629,7 +1630,7 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_real_
 
 
 #undef ROW_LENGTH
-#if  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE || VEC_SET == NEON_ARCH64_128
+#if  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE || VEC_SET == VSX_SSE || VEC_SET == NEON_ARCH64_128
 #ifdef DOUBLE_PRECISION_REAL
 #define ROW_LENGTH 6
 #define STEP_SIZE 6
@@ -1640,7 +1641,7 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_real_
 #define STEP_SIZE 12
 #define UPPER_BOUND 8
 #endif
-#endif /*  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE || VEC_SET == NEON_ARCH64_128 */
+#endif /*  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE || VEC_SET == VSX_SSE || VEC_SET == NEON_ARCH64_128 */
 
 #if  VEC_SET == AVX_256 || VEC_SET == AVX2_256
 #ifdef DOUBLE_PRECISION_REAL
@@ -1680,14 +1681,14 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_real_
 
 
 #undef ROW_LENGTH
-#if  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE || VEC_SET == NEON_ARCH64_128
+#if  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE || VEC_SET == VSX_SSE || VEC_SET == NEON_ARCH64_128
 #ifdef DOUBLE_PRECISION_REAL
 #define ROW_LENGTH 4
 #endif
 #ifdef SINGLE_PRECISION_REAL
 #define ROW_LENGTH 8
 #endif
-#endif /*  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE || VEC_SET == NEON_ARCH64_128 */
+#endif /*  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE || VEC_SET == VSX_SSE || VEC_SET == NEON_ARCH64_128 */
 
 #if  VEC_SET == AVX_256 || VEC_SET == AVX2_256
 #ifdef DOUBLE_PRECISION_REAL
@@ -1715,14 +1716,14 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_real_
     }
 
 #undef ROW_LENGTH
-#if  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE || VEC_SET == NEON_ARCH64_128
+#if  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE || VEC_SET == VSX_SSE || VEC_SET == NEON_ARCH64_128
 #ifdef DOUBLE_PRECISION_REAL
 #define ROW_LENGTH 2
 #endif
 #ifdef SINGLE_PRECISION_REAL
 #define ROW_LENGTH 4
 #endif
-#endif /*  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE || VEC_SET == NEON_ARCH64_128 */
+#endif /*  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE || VEC_SET == VSX_SSE || VEC_SET == NEON_ARCH64_128 */
 
 #if  VEC_SET == AVX_256 || VEC_SET == AVX2_256
 #ifdef DOUBLE_PRECISION_REAL
@@ -1772,7 +1773,7 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_real_
 #ifdef BLOCK6
 
 #undef ROW_LENGTH
-#if  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE || VEC_SET == NEON_ARCH64_128
+#if  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE || VEC_SET == VSX_SSE || VEC_SET == NEON_ARCH64_128
 #ifdef DOUBLE_PRECISION_REAL
 #define ROW_LENGTH 4
 #define STEP_SIZE 4
@@ -1783,7 +1784,7 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_real_
 #define STEP_SIZE 8
 #define UPPER_BOUND 4
 #endif
-#endif /*  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE  || VEC_SET == NEON_ARCH64_128 */
+#endif /*  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE || VEC_SET == VSX_SSE || VEC_SET == NEON_ARCH64_128 */
 
 #if  VEC_SET == AVX_256 || VEC_SET == AVX2_256
 #ifdef DOUBLE_PRECISION_REAL
@@ -1822,14 +1823,14 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_real_
       }
 
 #undef ROW_LENGTH
-#if  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE || VEC_SET == NEON_ARCH64_128
+#if  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE || VEC_SET == VSX_SSE || VEC_SET == NEON_ARCH64_128
 #ifdef DOUBLE_PRECISION_REAL
 #define ROW_LENGTH 2
 #endif
 #ifdef SINGLE_PRECISION_REAL
 #define ROW_LENGTH 4
 #endif
-#endif /*  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE  || VEC_SET == NEON_ARCH64_128 */
+#endif /*  VEC_SET == SSE_128 || VEC_SET == SPARC64_SSE || VEC_SET == VSX_SSE || VEC_SET == NEON_ARCH64_128 */
 
 #if  VEC_SET == AVX_256 || VEC_SET == AVX2_256
 #ifdef DOUBLE_PRECISION_REAL
diff -up mpich/src/elpa2/kernels/real_vsx_4hv_double_precision.c.vsx mpich/src/elpa2/kernels/real_vsx_4hv_double_precision.c
--- mpich/src/elpa2/kernels/real_vsx_4hv_double_precision.c.vsx	2020-05-27 13:16:25.000000000 +0200
+++ mpich/src/elpa2/kernels/real_vsx_4hv_double_precision.c	2020-09-21 15:15:22.777337971 +0200
@@ -49,11 +49,11 @@
 #define REALCASE 1
 #define DOUBLE_PRECISION 1
 #define BLOCK4 1
-#define SIMD_SET VSX_SSE
+#define VEC_SET VSX_SSE
 #include "../../general/precision_macros.h"
-#include "real_vsx_4hv_template.c"
+#include "real_128bit_256bit_512bit_BLOCK_template.c"
 #undef BLOCK4
-#undef SIMD_SET
+#undef VEC_SET
 #undef REALCASE
 #undef DOUBLE_PRECISION
 
diff -up mpich/src/elpa2/kernels/real_vsx_4hv_single_precision.c.vsx mpich/src/elpa2/kernels/real_vsx_4hv_single_precision.c
--- mpich/src/elpa2/kernels/real_vsx_4hv_single_precision.c.vsx	2020-05-27 13:16:25.000000000 +0200
+++ mpich/src/elpa2/kernels/real_vsx_4hv_single_precision.c	2020-09-21 15:15:22.777337971 +0200
@@ -49,11 +49,11 @@
 #define REALCASE 1
 #define SINGLE_PRECISION 1
 #define BLOCK4 1
-#define SIMD_SET VSX_SSE
+#define VEC_SET VSX_SSE
 #include "../../general/precision_macros.h"
-#include "real_vsx_4hv_template.c"
+#include "real_128bit_256bit_512bit_BLOCK_template.c"
 #undef BLOCK4
-#undef SIMD_SET
+#undef VEC_SET
 #undef REALCASE
 #undef SINGLE_PRECISION
 
diff -up mpich/src/elpa2/kernels/real_vsx_6hv_double_precision.c.vsx mpich/src/elpa2/kernels/real_vsx_6hv_double_precision.c
--- mpich/src/elpa2/kernels/real_vsx_6hv_double_precision.c.vsx	2020-05-27 13:16:25.000000000 +0200
+++ mpich/src/elpa2/kernels/real_vsx_6hv_double_precision.c	2020-09-21 15:15:22.777337971 +0200
@@ -49,11 +49,11 @@
 #define REALCASE 1
 #define DOUBLE_PRECISION 1
 #define BLOCK6 1
-#define SIMD_SET VSX_SSE
+#define VEC_SET VSX_SSE
 #include "../../general/precision_macros.h"
-#include "real_vsx_6hv_template.c"
+#include "real_128bit_256bit_512bit_BLOCK_template.c"
 #undef BLOCK6
-#undef SIMD_SET
+#undef VEC_SET
 #undef REALCASE
 #undef DOUBLE_PRECISION
 
diff -up mpich/src/elpa2/kernels/real_vsx_6hv_single_precision.c.vsx mpich/src/elpa2/kernels/real_vsx_6hv_single_precision.c
--- mpich/src/elpa2/kernels/real_vsx_6hv_single_precision.c.vsx	2020-05-27 13:16:25.000000000 +0200
+++ mpich/src/elpa2/kernels/real_vsx_6hv_single_precision.c	2020-09-21 15:15:22.777337971 +0200
@@ -51,7 +51,7 @@
 #define BLOCK6 1
 #define VEC_SET VSX_SSE
 #include "../../general/precision_macros.h"
-#include "real_vsx_6hv_template.c"
+#include "real_128bit_256bit_512bit_BLOCK_template.c"
 #undef VEC_SET
 #undef BLOCK6
 #undef REALCASE