summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Core/regularisers_GPU/LLT_ROF_GPU_core.cu4
-rw-r--r--Core/regularisers_GPU/NonlDiff_GPU_core.cu2
-rw-r--r--Core/regularisers_GPU/PatchSelect_GPU_core.cu174
-rw-r--r--Core/regularisers_GPU/TGV_GPU_core.cu2
-rwxr-xr-xCore/regularisers_GPU/TV_FGP_GPU_core.cu2
-rwxr-xr-xCore/regularisers_GPU/TV_ROF_GPU_core.cu2
-rwxr-xr-xCore/regularisers_GPU/TV_SB_GPU_core.cu2
-rw-r--r--Core/regularisers_GPU/dTV_FGP_GPU_core.cu2
8 files changed, 95 insertions, 95 deletions
diff --git a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu
index 0228bf0..ac43eb7 100644
--- a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu
+++ b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu
@@ -44,11 +44,11 @@ limitations under the License.
{ \
const cudaError_t error = call; \
if (error != cudaSuccess) \
- { \
+ { / \
fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
fprintf(stderr, "code: %d, reason: %s\n", error, \
cudaGetErrorString(error)); \
- exit(1); \
+ return; \
} \
}
diff --git a/Core/regularisers_GPU/NonlDiff_GPU_core.cu b/Core/regularisers_GPU/NonlDiff_GPU_core.cu
index 8048830..f8176eb 100644
--- a/Core/regularisers_GPU/NonlDiff_GPU_core.cu
+++ b/Core/regularisers_GPU/NonlDiff_GPU_core.cu
@@ -46,7 +46,7 @@ limitations under the License.
fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
fprintf(stderr, "code: %d, reason: %s\n", error, \
cudaGetErrorString(error)); \
- exit(1); \
+ return; \
} \
}
diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.cu b/Core/regularisers_GPU/PatchSelect_GPU_core.cu
index f558b0f..ba84105 100644
--- a/Core/regularisers_GPU/PatchSelect_GPU_core.cu
+++ b/Core/regularisers_GPU/PatchSelect_GPU_core.cu
@@ -19,7 +19,7 @@
*/
#include "PatchSelect_GPU_core.h"
-
+
/* CUDA implementation of non-local weight pre-calculation for non-local priors
* Weights and associated indices are stored into pre-allocated arrays and passed
* to the regulariser
@@ -36,32 +36,32 @@
* 1. AR_i - indeces of i neighbours
* 2. AR_j - indeces of j neighbours
* 3. Weights_ij - associated weights
- */
-
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
- if (cudaSuccess != err)
- {
- fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
- file, line, (int)err, cudaGetErrorString(err));
- exit(EXIT_FAILURE);
- }
-}
-
-#define BLKXSIZE 16
-#define BLKYSIZE 16
-#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
-#define M_PI 3.14159265358979323846
-#define EPS 1.0e-8
+ */
+
+// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
+#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
+
+inline void __checkCudaErrors(cudaError err, const char *file, const int line)
+{
+ if (cudaSuccess != err)
+ {
+ fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
+ file, line, (int)err, cudaGetErrorString(err));
+ return;
+ }
+}
+
+#define BLKXSIZE 16
+#define BLKYSIZE 16
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+#define M_PI 3.14159265358979323846
+#define EPS 1.0e-8
#define CONSTVECSIZE5 121
#define CONSTVECSIZE7 225
#define CONSTVECSIZE9 361
#define CONSTVECSIZE11 529
#define CONSTVECSIZE13 729
-
+
__device__ void swap(float *xp, float *yp)
{
float temp = *xp;
@@ -75,9 +75,9 @@ __device__ void swapUS(unsigned short *xp, unsigned short *yp)
*yp = temp;
}
-/********************************************************************************/
-__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+/********************************************************************************/
+__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
float normsum;
@@ -85,10 +85,10 @@ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigne
float Weight_Vec[CONSTVECSIZE5];
unsigned short ind_i[CONSTVECSIZE5];
unsigned short ind_j[CONSTVECSIZE5];
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
long index = i*M+j;
counter = 0;
@@ -139,10 +139,10 @@ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigne
H_j_d[index2] = ind_j[x];
Weights_d[index2] = Weight_Vec[x];
}
-}
-/********************************************************************************/
-__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+}
+/********************************************************************************/
+__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
float normsum;
@@ -150,10 +150,10 @@ __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigne
float Weight_Vec[CONSTVECSIZE7];
unsigned short ind_i[CONSTVECSIZE7];
unsigned short ind_j[CONSTVECSIZE7];
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
long index = i*M+j;
counter = 0;
@@ -204,9 +204,9 @@ __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigne
H_j_d[index2] = ind_j[x];
Weights_d[index2] = Weight_Vec[x];
}
-}
-__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+}
+__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
float normsum;
@@ -214,10 +214,10 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne
float Weight_Vec[CONSTVECSIZE9];
unsigned short ind_i[CONSTVECSIZE9];
unsigned short ind_j[CONSTVECSIZE9];
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
long index = i*M+j;
counter = 0;
@@ -269,8 +269,8 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne
Weights_d[index2] = Weight_Vec[x];
}
}
-__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
float normsum;
@@ -278,10 +278,10 @@ __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsign
float Weight_Vec[CONSTVECSIZE11];
unsigned short ind_i[CONSTVECSIZE11];
unsigned short ind_j[CONSTVECSIZE11];
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
long index = i*M+j;
counter = 0;
@@ -333,8 +333,8 @@ __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsign
Weights_d[index2] = Weight_Vec[x];
}
}
-__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
float normsum;
@@ -342,10 +342,10 @@ __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsign
float Weight_Vec[CONSTVECSIZE13];
unsigned short ind_i[CONSTVECSIZE13];
unsigned short ind_j[CONSTVECSIZE13];
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
long index = i*M+j;
counter = 0;
@@ -398,29 +398,29 @@ __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsign
}
}
-
+
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
/********************* MAIN HOST FUNCTION ******************/
-/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h)
-{
- int deviceCount = -1; // number of devices
- cudaGetDeviceCount(&deviceCount);
- if (deviceCount == 0) {
- fprintf(stderr, "No CUDA devices found\n");
- return;
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h)
+{
+ int deviceCount = -1; // number of devices
+ cudaGetDeviceCount(&deviceCount);
+ if (deviceCount == 0) {
+ fprintf(stderr, "No CUDA devices found\n");
+ return;
}
-
- int SearchW_full, SimilW_full, counterG, i, j;
+
+ int SearchW_full, SimilW_full, counterG, i, j;
float *Ad, *Weights_d, h2, *Eucl_Vec, *Eucl_Vec_d;
- unsigned short *H_i_d, *H_j_d;
+ unsigned short *H_i_d, *H_j_d;
h2 = h*h;
-
- dim3 dimBlock(BLKXSIZE,BLKYSIZE);
- dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));
-
- SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window size */
- SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1); /* the full similarity window size */
+
+ dim3 dimBlock(BLKXSIZE,BLKYSIZE);
+ dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));
+
+ SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window size */
+ SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1); /* the full similarity window size */
/* generate a 2D Gaussian kernel for NLM procedure */
Eucl_Vec = (float*) calloc (SimilW_full,sizeof(float));
@@ -432,16 +432,16 @@ extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned sho
}} /*main neighb loop */
- /*allocate space on the device*/
- checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) );
+ /*allocate space on the device*/
+ checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) );
checkCudaErrors( cudaMalloc((void**)&H_i_d, N*M*NumNeighb*sizeof(unsigned short)) );
checkCudaErrors( cudaMalloc((void**)&H_j_d, N*M*NumNeighb*sizeof(unsigned short)) );
checkCudaErrors( cudaMalloc((void**)&Weights_d, N*M*NumNeighb*sizeof(float)) );
- checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d, SimilW_full*sizeof(float)) );
-
- /* copy data from the host to the device */
+ checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d, SimilW_full*sizeof(float)) );
+
+ /* copy data from the host to the device */
checkCudaErrors( cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice) );
- checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );
+ checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );
/********************** Run CUDA kernel here ********************/
if (SearchWindow == 5) IndexSelect2D_5_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
@@ -450,19 +450,19 @@ extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned sho
else if (SearchWindow == 11) IndexSelect2D_11_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
else if (SearchWindow == 13) IndexSelect2D_13_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
else {
- fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n");
+ fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n");
return;}
- checkCudaErrors(cudaPeekAtLastError() );
- checkCudaErrors(cudaDeviceSynchronize());
- /***************************************************************/
-
+ checkCudaErrors(cudaPeekAtLastError() );
+ checkCudaErrors(cudaDeviceSynchronize());
+ /***************************************************************/
+
checkCudaErrors(cudaMemcpy(H_i, H_i_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) );
checkCudaErrors(cudaMemcpy(H_j, H_j_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) );
checkCudaErrors(cudaMemcpy(Weights, Weights_d, N*M*NumNeighb*sizeof(float),cudaMemcpyDeviceToHost) );
-
+
cudaFree(Ad);
cudaFree(H_i_d);
cudaFree(H_j_d);
cudaFree(Weights_d);
- cudaFree(Eucl_Vec_d);
+ cudaFree(Eucl_Vec_d);
}
diff --git a/Core/regularisers_GPU/TGV_GPU_core.cu b/Core/regularisers_GPU/TGV_GPU_core.cu
index 3081011..09a4ec5 100644
--- a/Core/regularisers_GPU/TGV_GPU_core.cu
+++ b/Core/regularisers_GPU/TGV_GPU_core.cu
@@ -45,7 +45,7 @@ limitations under the License.
fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
fprintf(stderr, "code: %d, reason: %s\n", error, \
cudaGetErrorString(error)); \
- exit(1); \
+ return; \
} \
}
diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.cu b/Core/regularisers_GPU/TV_FGP_GPU_core.cu
index eab7a44..7466135 100755
--- a/Core/regularisers_GPU/TV_FGP_GPU_core.cu
+++ b/Core/regularisers_GPU/TV_FGP_GPU_core.cu
@@ -48,7 +48,7 @@ inline void __checkCudaErrors(cudaError err, const char *file, const int line)
{
fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
file, line, (int)err, cudaGetErrorString(err));
- exit(EXIT_FAILURE);
+ return;
}
}
diff --git a/Core/regularisers_GPU/TV_ROF_GPU_core.cu b/Core/regularisers_GPU/TV_ROF_GPU_core.cu
index 57de63d..5ae3b6e 100755
--- a/Core/regularisers_GPU/TV_ROF_GPU_core.cu
+++ b/Core/regularisers_GPU/TV_ROF_GPU_core.cu
@@ -44,7 +44,7 @@ limitations under the License.
fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
fprintf(stderr, "code: %d, reason: %s\n", error, \
cudaGetErrorString(error)); \
- exit(1); \
+ return; \
} \
}
diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.cu b/Core/regularisers_GPU/TV_SB_GPU_core.cu
index 68b9221..a97851c 100755
--- a/Core/regularisers_GPU/TV_SB_GPU_core.cu
+++ b/Core/regularisers_GPU/TV_SB_GPU_core.cu
@@ -47,7 +47,7 @@ inline void __checkCudaErrors(cudaError err, const char *file, const int line)
{
fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
file, line, (int)err, cudaGetErrorString(err));
- exit(EXIT_FAILURE);
+ return;
}
}
diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
index 80a78da..6040648 100644
--- a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
+++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
@@ -54,7 +54,7 @@ inline void __checkCudaErrors(cudaError err, const char *file, const int line)
{
fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
file, line, (int)err, cudaGetErrorString(err));
- exit(EXIT_FAILURE);
+ return;
}
}