From febfe9a6490052d4b8789fd8f7a0342115bfd55e Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Sun, 29 Mar 2020 23:13:21 +0200 Subject: Enable padding over Z dimmension --- src/Core/CMakeLists.txt | 10 ++++++---- src/Core/regularisers_CPU/TNV_core.c | 4 ++-- src/Core/regularisers_CPU/TNV_core_backtrack.c | 4 ++-- src/Core/regularisers_CPU/TNV_core_backtrack_loop.h | 4 ++-- src/Core/regularisers_CPU/TNV_core_loop.h | 4 ++-- 5 files changed, 14 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/Core/CMakeLists.txt b/src/Core/CMakeLists.txt index 76b0f3e..9a53b67 100644 --- a/src/Core/CMakeLists.txt +++ b/src/Core/CMakeLists.txt @@ -60,15 +60,17 @@ message("CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}") message("Adding regularisers as a shared library") #set(CMAKE_C_COMPILER /opt/intel/compilers_and_libraries/linux/bin/intel64/icc) -#set(CMAKE_C_FLAGS "-Ofast -mtune=sandybridge -xSSE4.2 -qopt-report=5 -qopt-report-file=stdout -qopt-report-phase=vec -qopenmp -g") -#set(CMAKE_C_FLAGS "-Ofast -mtune=sandybridge -axAVX2 -xAVX2 -qopt-report=5 -qopt-report-file=stdout -qopt-report-phase=vec -qopenmp -g") -#set(CMAKE_C_FLAGS "-Ofast -mtune=sandybridge -mavx512f -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512vl -qopt-report=5 -qopt-report-file=stdout -qopt-report-phase=vec -qopenmp -g") +#set(CMAKE_C_FLAGS "-Ofast -mtune=sandybridge -xSSE4.2 -qopt-report=5 -qopt-report-file=stdout -qopt-report-phase=vec -qopenmp") +#set(CMAKE_C_FLAGS "-Ofast -mtune=sandybridge -axAVX2 -xAVX2 -qopt-report=5 -qopt-report-file=stdout -qopt-report-phase=vec -qopenmp") +#set(CMAKE_C_FLAGS "-Ofast -mtune=sandybridge -mavx512f -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512vl -qopt-report=5 -qopt-report-file=stdout -qopt-report-phase=vec -qopenmp") #set(CMAKE_C_COMPILER clang) #set(CMAKE_C_FLAGS "-march=nocona -msse -msse2 -msse3 -mssse3 -msse4 -msse4.1 -msse4.2 -ftree-vectorize -fopenmp") +#set(CMAKE_C_COMPILER gcc-9) +set(CMAKE_C_FLAGS "-march=native -ftree-vectorize -fopt-info-vec-optimized -fopt-info-vec -fopenmp") #set(CMAKE_C_FLAGS "-march=nocona -msse -msse2 -msse3 -mssse3 -msse4 -msse4.1 -msse4.2 -ftree-vectorize -fopt-info-vec-optimized -fopt-info-vec -mprefer-vector-width=128 -fopenmp") -set(CMAKE_C_FLAGS "-march=native -mavx2 -ftree-vectorize -fopt-info-vec-optimized -fopt-info-vec -mprefer-vector-width=512 -fopenmp") +#set(CMAKE_C_FLAGS "-march=native -mavx2 -ftree-vectorize -fopt-info-vec-optimized -fopt-info-vec -mprefer-vector-width=512 -fopenmp") #set(CMAKE_C_FLAGS "-march=native -mavx512f -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512vl -ftree-vectorize -fopt-info-vec-optimized -fopt-info-vec -mprefer-vector-width=512 -fopenmp") #set(CMAKE_C_FLAGS_RELEASE "-g -gdwarf-2 -g3 -fno-omit-frame-pointer") diff --git a/src/Core/regularisers_CPU/TNV_core.c b/src/Core/regularisers_CPU/TNV_core.c index 415c644..7b8023b 100755 --- a/src/Core/regularisers_CPU/TNV_core.c +++ b/src/Core/regularisers_CPU/TNV_core.c @@ -460,9 +460,9 @@ static void TNV_CPU_init(float *InputT, float *uT, int dimX, int dimY, int dimZ) tnv_ctx.dimY = dimY; tnv_ctx.dimZ = dimZ; // Padding seems actually slower - tnv_ctx.padZ = dimZ; +// tnv_ctx.padZ = dimZ; // tnv_ctx.padZ = 4 * ((dimZ / 4) + ((dimZ % 4)?1:0)); -// tnv_ctx.padZ = 16 * ((dimZ / 16) + ((dimZ % 16)?1:0)); + tnv_ctx.padZ = 16 * ((dimZ / 16) + ((dimZ % 16)?1:0)); hw_sched_init(); diff --git a/src/Core/regularisers_CPU/TNV_core_backtrack.c b/src/Core/regularisers_CPU/TNV_core_backtrack.c index 9b19ed5..7eb367e 100755 --- a/src/Core/regularisers_CPU/TNV_core_backtrack.c +++ b/src/Core/regularisers_CPU/TNV_core_backtrack.c @@ -483,8 +483,8 @@ static void TNV_CPU_init(float *InputT, float *uT, int dimX, int dimY, int dimZ) tnv_ctx.dimY = dimY; tnv_ctx.dimZ = dimZ; // Padding seems actually slower - tnv_ctx.padZ = dimZ; -// tnv_ctx.padZ = 16 * ((dimZ / 16) + ((dimZ % 16)?1:0)); +// tnv_ctx.padZ = dimZ; + tnv_ctx.padZ = 16 * ((dimZ / 16) + ((dimZ % 16)?1:0)); hw_sched_init(); diff --git a/src/Core/regularisers_CPU/TNV_core_backtrack_loop.h b/src/Core/regularisers_CPU/TNV_core_backtrack_loop.h index 3ec4250..2605d22 100644 --- a/src/Core/regularisers_CPU/TNV_core_backtrack_loop.h +++ b/src/Core/regularisers_CPU/TNV_core_backtrack_loop.h @@ -3,7 +3,7 @@ l = (j * dimX + i) * padZ; -//#pragma vector aligned +#pragma vector aligned #pragma GCC ivdep for(k = 0; k < dimZ; k++) { u_upd[l + k] = (u[l + k] + tau * div[l + k] + taulambda * Input[l + k]) / constant; @@ -44,7 +44,7 @@ coefF(t, M1, M2, M3, sigma, p, q, r); -//#pragma vector aligned +#pragma vector aligned #pragma GCC ivdep for(k = 0; k < dimZ; k++) { #ifdef TNV_NEW_STYLE diff --git a/src/Core/regularisers_CPU/TNV_core_loop.h b/src/Core/regularisers_CPU/TNV_core_loop.h index 34e7139..3f6d9bc 100644 --- a/src/Core/regularisers_CPU/TNV_core_loop.h +++ b/src/Core/regularisers_CPU/TNV_core_loop.h @@ -17,7 +17,7 @@ // __assume(padZ%16==0); -//#pragma vector aligned +#pragma vector aligned #pragma GCC ivdep for(k = 0; k < dimZ; k++) { float u_upd = (u[l + k] + tau * div[l + k] + taulambda * Input[l + k]) / constant; // 3 reads @@ -59,7 +59,7 @@ coefF(t, M1, M2, M3, sigma, p, q, r); -//#pragma vector aligned +#pragma vector aligned #pragma GCC ivdep for(k = 0; k < padZ; k++) { float vx = ubarx[k] + divsigma * qx_current[k]; // cache 2r -- cgit v1.2.3