From e2601be8f44c09ff21f259d0ce3219bfd5918f12 Mon Sep 17 00:00:00 2001 From: dkazanc Date: Wed, 4 Sep 2019 13:31:59 +0100 Subject: gpu module fixed --- src/Core/regularisers_GPU/PatchSelect_GPU_core.cu | 164 ++++++++++++++-------- 1 file changed, 102 insertions(+), 62 deletions(-) diff --git a/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu b/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu index 2cd27ff..fb6fa95 100644 --- a/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu +++ b/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu @@ -40,8 +40,8 @@ */ -#define BLKXSIZE 16 -#define BLKYSIZE 16 +#define BLKXSIZE 8 +#define BLKYSIZE 4 #define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) #define M_PI 3.14159265358979323846 #define EPS 1.0e-8 @@ -68,46 +68,56 @@ __device__ void swapUS(unsigned short *xp, unsigned short *yp) __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) { - long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; + long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2, ind; float normsum; float Weight_Vec[CONSTVECSIZE5]; unsigned short ind_i[CONSTVECSIZE5]; unsigned short ind_j[CONSTVECSIZE5]; + for(ind=0; ind= 0) && (i1 < N)) { for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { - i1 = i+i_m; j1 = j+j_m; - if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) { + if ((j1 >= 0) && (j1 < M)) { normsum = 0.0f; counterG = 0; for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) { + i2 = i1 + i_c; + i3 = i + i_c; + if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) { for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) { - i2 = i1 + i_c; j2 = j1 + j_c; - i3 = i + i_c; j3 = j + j_c; - if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) { - if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) { - normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); + if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) { + normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2); counterG++; - }} - }} + } /*if j2 j3*/ + } + } /*if i2 i3*/ + } /* writing temporarily into vectors */ if (normsum > EPS) { - Weight_Vec[counter] = __expf(-normsum/h2); + Weight_Vec[counter] = expf(-normsum/h2); ind_i[counter] = i1; ind_j[counter] = j1; counter++; } - } - }} + } /*if j1*/ + } + } /*if i1*/ + } /* do sorting to choose the most prominent weights [HIGH to LOW] */ /* and re-arrange indeces accordingly */ @@ -133,46 +143,56 @@ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigne __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) { - long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; + long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2, ind; float normsum; float Weight_Vec[CONSTVECSIZE7]; unsigned short ind_i[CONSTVECSIZE7]; unsigned short ind_j[CONSTVECSIZE7]; + for(ind=0; ind= 0) && (i1 < N)) { for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { - i1 = i+i_m; j1 = j+j_m; - if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) { + if ((j1 >= 0) && (j1 < M)) { normsum = 0.0f; counterG = 0; for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) { + i2 = i1 + i_c; + i3 = i + i_c; + if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) { for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) { - i2 = i1 + i_c; j2 = j1 + j_c; - i3 = i + i_c; j3 = j + j_c; - if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) { - if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) { - normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); + if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) { + normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2); counterG++; - }} - }} + } /*if j2 j3*/ + } + } /*if i2 i3*/ + } /* writing temporarily into vectors */ if (normsum > EPS) { - Weight_Vec[counter] = __expf(-normsum/h2); + Weight_Vec[counter] = expf(-normsum/h2); ind_i[counter] = i1; ind_j[counter] = j1; counter++; } - } - }} + } /*if j1*/ + } + } /*if i1*/ + } /* do sorting to choose the most prominent weights [HIGH to LOW] */ /* and re-arrange indeces accordingly */ @@ -225,16 +245,16 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) { i2 = i1 + i_c; i3 = i + i_c; - //if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) { + if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) { for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) { j2 = j1 + j_c; j3 = j + j_c; - //if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) { + if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) { normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2); counterG++; - // } /*if j2 j3*/ + } /*if j2 j3*/ } - // } /*if i2 i3*/ + } /*if i2 i3*/ } /* writing temporarily into vectors */ if (normsum > EPS) { @@ -271,46 +291,56 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) { - long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; + long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2, ind; float normsum; float Weight_Vec[CONSTVECSIZE11]; unsigned short ind_i[CONSTVECSIZE11]; unsigned short ind_j[CONSTVECSIZE11]; + for(ind=0; ind= 0) && (i1 < N)) { for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { - i1 = i+i_m; j1 = j+j_m; - if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) { + if ((j1 >= 0) && (j1 < M)) { normsum = 0.0f; counterG = 0; for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) { + i2 = i1 + i_c; + i3 = i + i_c; + if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) { for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) { - i2 = i1 + i_c; j2 = j1 + j_c; - i3 = i + i_c; j3 = j + j_c; - if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) { - if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) { - normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); + if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) { + normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2); counterG++; - }} - }} + } /*if j2 j3*/ + } + } /*if i2 i3*/ + } /* writing temporarily into vectors */ if (normsum > EPS) { - Weight_Vec[counter] = __expf(-normsum/h2); + Weight_Vec[counter] = expf(-normsum/h2); ind_i[counter] = i1; ind_j[counter] = j1; counter++; } - } - }} + } /*if j1*/ + } + } /*if i1*/ + } /* do sorting to choose the most prominent weights [HIGH to LOW] */ /* and re-arrange indeces accordingly */ @@ -335,46 +365,56 @@ __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsign __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) { - long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; + long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2, ind; float normsum; float Weight_Vec[CONSTVECSIZE13]; unsigned short ind_i[CONSTVECSIZE13]; unsigned short ind_j[CONSTVECSIZE13]; + for(ind=0; ind= 0) && (i1 < N)) { for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { - i1 = i+i_m; j1 = j+j_m; - if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) { + if ((j1 >= 0) && (j1 < M)) { normsum = 0.0f; counterG = 0; for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) { + i2 = i1 + i_c; + i3 = i + i_c; + if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) { for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) { - i2 = i1 + i_c; j2 = j1 + j_c; - i3 = i + i_c; j3 = j + j_c; - if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) { - if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) { - normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); + if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) { + normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2); counterG++; - }} - }} + } /*if j2 j3*/ + } + } /*if i2 i3*/ + } /* writing temporarily into vectors */ if (normsum > EPS) { - Weight_Vec[counter] = __expf(-normsum/h2); + Weight_Vec[counter] = expf(-normsum/h2); ind_i[counter] = i1; ind_j[counter] = j1; counter++; } - } - }} + } /*if j1*/ + } + } /*if i1*/ + } /* do sorting to choose the most prominent weights [HIGH to LOW] */ /* and re-arrange indeces accordingly */ -- cgit v1.2.3