diff options
-rw-r--r-- | Core/regularisers_GPU/LLT_ROF_GPU_core.cu | 4 | ||||
-rw-r--r-- | Core/regularisers_GPU/NonlDiff_GPU_core.cu | 2 | ||||
-rw-r--r-- | Core/regularisers_GPU/PatchSelect_GPU_core.cu | 174 | ||||
-rw-r--r-- | Core/regularisers_GPU/TGV_GPU_core.cu | 2 | ||||
-rwxr-xr-x | Core/regularisers_GPU/TV_FGP_GPU_core.cu | 2 | ||||
-rwxr-xr-x | Core/regularisers_GPU/TV_ROF_GPU_core.cu | 2 | ||||
-rwxr-xr-x | Core/regularisers_GPU/TV_SB_GPU_core.cu | 2 | ||||
-rw-r--r-- | Core/regularisers_GPU/dTV_FGP_GPU_core.cu | 2 |
8 files changed, 95 insertions, 95 deletions
diff --git a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu index 0228bf0..ac43eb7 100644 --- a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu +++ b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu @@ -44,11 +44,11 @@ limitations under the License. { \ const cudaError_t error = call; \ if (error != cudaSuccess) \ - { \ + { / \ fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ fprintf(stderr, "code: %d, reason: %s\n", error, \ cudaGetErrorString(error)); \ - exit(1); \ + return; \ } \ } diff --git a/Core/regularisers_GPU/NonlDiff_GPU_core.cu b/Core/regularisers_GPU/NonlDiff_GPU_core.cu index 8048830..f8176eb 100644 --- a/Core/regularisers_GPU/NonlDiff_GPU_core.cu +++ b/Core/regularisers_GPU/NonlDiff_GPU_core.cu @@ -46,7 +46,7 @@ limitations under the License. fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ fprintf(stderr, "code: %d, reason: %s\n", error, \ cudaGetErrorString(error)); \ - exit(1); \ + return; \ } \ } diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.cu b/Core/regularisers_GPU/PatchSelect_GPU_core.cu index f558b0f..ba84105 100644 --- a/Core/regularisers_GPU/PatchSelect_GPU_core.cu +++ b/Core/regularisers_GPU/PatchSelect_GPU_core.cu @@ -19,7 +19,7 @@ */ #include "PatchSelect_GPU_core.h" -
+ /* CUDA implementation of non-local weight pre-calculation for non-local priors * Weights and associated indices are stored into pre-allocated arrays and passed * to the regulariser @@ -36,32 +36,32 @@ * 1. AR_i - indeces of i neighbours * 2. AR_j - indeces of j neighbours * 3. Weights_ij - associated weights - */
-
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
- if (cudaSuccess != err)
- {
- fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
- file, line, (int)err, cudaGetErrorString(err));
- exit(EXIT_FAILURE);
- }
-}
-
-#define BLKXSIZE 16
-#define BLKYSIZE 16
-#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
-#define M_PI 3.14159265358979323846
-#define EPS 1.0e-8
+ */ + +// This will output the proper CUDA error strings in the event that a CUDA host call returns an error +#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) + +inline void __checkCudaErrors(cudaError err, const char *file, const int line) +{ + if (cudaSuccess != err) + { + fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", + file, line, (int)err, cudaGetErrorString(err)); + return; + } +} + +#define BLKXSIZE 16 +#define BLKYSIZE 16 +#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) +#define M_PI 3.14159265358979323846 +#define EPS 1.0e-8 #define CONSTVECSIZE5 121 #define CONSTVECSIZE7 225 #define CONSTVECSIZE9 361 #define CONSTVECSIZE11 529 #define CONSTVECSIZE13 729 -
+ __device__ void swap(float *xp, float *yp) { float temp = *xp; @@ -75,9 +75,9 @@ __device__ void swapUS(unsigned short *xp, unsigned short *yp) *yp = temp; } -/********************************************************************************/
-__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+/********************************************************************************/ +__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; float normsum; @@ -85,10 +85,10 @@ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigne float Weight_Vec[CONSTVECSIZE5]; unsigned short ind_i[CONSTVECSIZE5]; unsigned short ind_j[CONSTVECSIZE5]; -
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + long index = i*M+j; counter = 0; @@ -139,10 +139,10 @@ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigne H_j_d[index2] = ind_j[x]; Weights_d[index2] = Weight_Vec[x]; } -}
-/********************************************************************************/
-__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+} +/********************************************************************************/ +__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; float normsum; @@ -150,10 +150,10 @@ __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigne float Weight_Vec[CONSTVECSIZE7]; unsigned short ind_i[CONSTVECSIZE7]; unsigned short ind_j[CONSTVECSIZE7]; -
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + long index = i*M+j; counter = 0; @@ -204,9 +204,9 @@ __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigne H_j_d[index2] = ind_j[x]; Weights_d[index2] = Weight_Vec[x]; } -}
-__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+} +__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; float normsum; @@ -214,10 +214,10 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne float Weight_Vec[CONSTVECSIZE9]; unsigned short ind_i[CONSTVECSIZE9]; unsigned short ind_j[CONSTVECSIZE9]; -
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + long index = i*M+j; counter = 0; @@ -269,8 +269,8 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne Weights_d[index2] = Weight_Vec[x]; } } -__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; float normsum; @@ -278,10 +278,10 @@ __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsign float Weight_Vec[CONSTVECSIZE11]; unsigned short ind_i[CONSTVECSIZE11]; unsigned short ind_j[CONSTVECSIZE11]; -
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + long index = i*M+j; counter = 0; @@ -333,8 +333,8 @@ __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsign Weights_d[index2] = Weight_Vec[x]; } } -__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; float normsum; @@ -342,10 +342,10 @@ __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsign float Weight_Vec[CONSTVECSIZE13]; unsigned short ind_i[CONSTVECSIZE13]; unsigned short ind_j[CONSTVECSIZE13]; -
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + long index = i*M+j; counter = 0; @@ -398,29 +398,29 @@ __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsign } } -
+ /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ /********************* MAIN HOST FUNCTION ******************/ -/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h)
-{
- int deviceCount = -1; // number of devices
- cudaGetDeviceCount(&deviceCount);
- if (deviceCount == 0) {
- fprintf(stderr, "No CUDA devices found\n");
- return;
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ +extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h) +{ + int deviceCount = -1; // number of devices + cudaGetDeviceCount(&deviceCount); + if (deviceCount == 0) { + fprintf(stderr, "No CUDA devices found\n"); + return; } -
- int SearchW_full, SimilW_full, counterG, i, j;
+ + int SearchW_full, SimilW_full, counterG, i, j; float *Ad, *Weights_d, h2, *Eucl_Vec, *Eucl_Vec_d; - unsigned short *H_i_d, *H_j_d;
+ unsigned short *H_i_d, *H_j_d; h2 = h*h; -
- dim3 dimBlock(BLKXSIZE,BLKYSIZE);
- dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));
-
- SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window size */
- SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1); /* the full similarity window size */
+ + dim3 dimBlock(BLKXSIZE,BLKYSIZE); + dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE)); + + SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window size */ + SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1); /* the full similarity window size */ /* generate a 2D Gaussian kernel for NLM procedure */ Eucl_Vec = (float*) calloc (SimilW_full,sizeof(float)); @@ -432,16 +432,16 @@ extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned sho }} /*main neighb loop */ - /*allocate space on the device*/
- checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) );
+ /*allocate space on the device*/ + checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) ); checkCudaErrors( cudaMalloc((void**)&H_i_d, N*M*NumNeighb*sizeof(unsigned short)) ); checkCudaErrors( cudaMalloc((void**)&H_j_d, N*M*NumNeighb*sizeof(unsigned short)) ); checkCudaErrors( cudaMalloc((void**)&Weights_d, N*M*NumNeighb*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d, SimilW_full*sizeof(float)) );
-
- /* copy data from the host to the device */
+ checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d, SimilW_full*sizeof(float)) ); + + /* copy data from the host to the device */ checkCudaErrors( cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice) ); - checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );
+ checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) ); /********************** Run CUDA kernel here ********************/ if (SearchWindow == 5) IndexSelect2D_5_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2); @@ -450,19 +450,19 @@ extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned sho else if (SearchWindow == 11) IndexSelect2D_11_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2); else if (SearchWindow == 13) IndexSelect2D_13_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2); else { - fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n");
+ fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n"); return;} - checkCudaErrors(cudaPeekAtLastError() );
- checkCudaErrors(cudaDeviceSynchronize());
- /***************************************************************/
-
+ checkCudaErrors(cudaPeekAtLastError() ); + checkCudaErrors(cudaDeviceSynchronize()); + /***************************************************************/ + checkCudaErrors(cudaMemcpy(H_i, H_i_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) ); checkCudaErrors(cudaMemcpy(H_j, H_j_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) ); checkCudaErrors(cudaMemcpy(Weights, Weights_d, N*M*NumNeighb*sizeof(float),cudaMemcpyDeviceToHost) ); -
+ cudaFree(Ad); cudaFree(H_i_d); cudaFree(H_j_d); cudaFree(Weights_d); - cudaFree(Eucl_Vec_d);
+ cudaFree(Eucl_Vec_d); } diff --git a/Core/regularisers_GPU/TGV_GPU_core.cu b/Core/regularisers_GPU/TGV_GPU_core.cu index 3081011..09a4ec5 100644 --- a/Core/regularisers_GPU/TGV_GPU_core.cu +++ b/Core/regularisers_GPU/TGV_GPU_core.cu @@ -45,7 +45,7 @@ limitations under the License. fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ fprintf(stderr, "code: %d, reason: %s\n", error, \ cudaGetErrorString(error)); \ - exit(1); \ + return; \ } \ } diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.cu b/Core/regularisers_GPU/TV_FGP_GPU_core.cu index eab7a44..7466135 100755 --- a/Core/regularisers_GPU/TV_FGP_GPU_core.cu +++ b/Core/regularisers_GPU/TV_FGP_GPU_core.cu @@ -48,7 +48,7 @@ inline void __checkCudaErrors(cudaError err, const char *file, const int line) { fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", file, line, (int)err, cudaGetErrorString(err)); - exit(EXIT_FAILURE); + return; } } diff --git a/Core/regularisers_GPU/TV_ROF_GPU_core.cu b/Core/regularisers_GPU/TV_ROF_GPU_core.cu index 57de63d..5ae3b6e 100755 --- a/Core/regularisers_GPU/TV_ROF_GPU_core.cu +++ b/Core/regularisers_GPU/TV_ROF_GPU_core.cu @@ -44,7 +44,7 @@ limitations under the License. fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ fprintf(stderr, "code: %d, reason: %s\n", error, \ cudaGetErrorString(error)); \ - exit(1); \ + return; \ } \ } diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.cu b/Core/regularisers_GPU/TV_SB_GPU_core.cu index 68b9221..a97851c 100755 --- a/Core/regularisers_GPU/TV_SB_GPU_core.cu +++ b/Core/regularisers_GPU/TV_SB_GPU_core.cu @@ -47,7 +47,7 @@ inline void __checkCudaErrors(cudaError err, const char *file, const int line) { fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", file, line, (int)err, cudaGetErrorString(err)); - exit(EXIT_FAILURE); + return; } } diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu index 80a78da..6040648 100644 --- a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu +++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu @@ -54,7 +54,7 @@ inline void __checkCudaErrors(cudaError err, const char *file, const int line) { fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", file, line, (int)err, cudaGetErrorString(err)); - exit(EXIT_FAILURE); + return; } } |