diff options
28 files changed, 338 insertions, 330 deletions
diff --git a/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu b/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu index fd586ef..a4dbe70 100644 --- a/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu +++ b/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu @@ -18,6 +18,7 @@ limitations under the License. */ #include "Diffus_4thO_GPU_core.h" +#include "shared.h" /* CUDA implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case) * The minimisation is performed using explicit scheme. @@ -36,18 +37,6 @@ limitations under the License. * [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191. */ -#define CHECK(call) \ -{ \ - const cudaError_t error = call; \ - if (error != cudaSuccess) \ - { \ - fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ - fprintf(stderr, "code: %d, reason: %s\n", error, \ - cudaGetErrorString(error)); \ - exit(1); \ - } \ -} - #define BLKXSIZE 8 #define BLKYSIZE 8 #define BLKZSIZE 8 @@ -228,7 +217,7 @@ __global__ void Diffusion_update_step3D_kernel(float *Output, float *Input, floa /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ /********************* MAIN HOST FUNCTION ******************/ /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ -extern "C" void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z) +extern "C" int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z) { int dimTotal, dev = 0; CHECK(cudaSetDevice(dev)); @@ -242,7 +231,7 @@ extern "C" void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, CHECK(cudaMalloc((void**)&d_W_Lapl,dimTotal*sizeof(float))); CHECK(cudaMemcpy(d_input,Input,dimTotal*sizeof(float),cudaMemcpyHostToDevice)); - CHECK(cudaMemcpy(d_output,Input,dimTotal*sizeof(float),cudaMemcpyHostToDevice)); + CHECK(cudaMemcpy(d_output,Input,dimTotal*sizeof(float),cudaMemcpyHostToDevice)); if (Z == 1) { /*2D case */ @@ -275,4 +264,5 @@ extern "C" void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, CHECK(cudaFree(d_input)); CHECK(cudaFree(d_output)); CHECK(cudaFree(d_W_Lapl)); + return 0; } diff --git a/Core/regularisers_GPU/Diffus_4thO_GPU_core.h b/Core/regularisers_GPU/Diffus_4thO_GPU_core.h index 6314c1f..77d5d79 100644 --- a/Core/regularisers_GPU/Diffus_4thO_GPU_core.h +++ b/Core/regularisers_GPU/Diffus_4thO_GPU_core.h @@ -3,6 +3,6 @@ #include "CCPiDefines.h" #include <stdio.h> -extern "C" CCPI_EXPORT void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z); +extern "C" CCPI_EXPORT int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z); #endif diff --git a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu index 0228bf0..87871be 100644 --- a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu +++ b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu @@ -18,6 +18,7 @@ limitations under the License. */ #include "LLT_ROF_GPU_core.h" +#include "shared.h" /* CUDA implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty. * @@ -40,18 +41,6 @@ limitations under the License. * [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms" */ -#define CHECK(call) \ -{ \ - const cudaError_t error = call; \ - if (error != cudaSuccess) \ - { \ - fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ - fprintf(stderr, "code: %d, reason: %s\n", error, \ - cudaGetErrorString(error)); \ - exit(1); \ - } \ -} - #define BLKXSIZE 8 #define BLKYSIZE 8 #define BLKZSIZE 8 @@ -403,7 +392,7 @@ __global__ void Update3D_LLT_ROF_kernel(float *U0, float *U, float *D1_LLT, floa /************************ HOST FUNCTION ****************************/ /*******************************************************************/ -extern "C" void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z) +extern "C" int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z) { // set up device int dev = 0; @@ -480,4 +469,5 @@ extern "C" void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, f CHECK(cudaFree(D1_ROF)); CHECK(cudaFree(D2_ROF)); CHECK(cudaFree(D3_ROF)); + return 0; } diff --git a/Core/regularisers_GPU/LLT_ROF_GPU_core.h b/Core/regularisers_GPU/LLT_ROF_GPU_core.h index 4a19d09..a6bfcc7 100644 --- a/Core/regularisers_GPU/LLT_ROF_GPU_core.h +++ b/Core/regularisers_GPU/LLT_ROF_GPU_core.h @@ -3,6 +3,6 @@ #include "CCPiDefines.h" #include <stdio.h> -extern "C" CCPI_EXPORT void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z); +extern "C" CCPI_EXPORT int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z); #endif diff --git a/Core/regularisers_GPU/NonlDiff_GPU_core.cu b/Core/regularisers_GPU/NonlDiff_GPU_core.cu index 8048830..ff7ce4d 100644 --- a/Core/regularisers_GPU/NonlDiff_GPU_core.cu +++ b/Core/regularisers_GPU/NonlDiff_GPU_core.cu @@ -18,6 +18,7 @@ limitations under the License. */ #include "NonlDiff_GPU_core.h" +#include "shared.h" /* CUDA implementation of linear and nonlinear diffusion with the regularisation model [1,2] (2D/3D case) * The minimisation is performed using explicit scheme. @@ -38,18 +39,7 @@ limitations under the License. * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432. */ -#define CHECK(call) \ -{ \ - const cudaError_t error = call; \ - if (error != cudaSuccess) \ - { \ - fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ - fprintf(stderr, "code: %d, reason: %s\n", error, \ - cudaGetErrorString(error)); \ - exit(1); \ - } \ -} - + #define BLKXSIZE 8 #define BLKYSIZE 8 #define BLKZSIZE 8 @@ -295,7 +285,7 @@ __global__ void NonLinearDiff3D_kernel(float *Input, float *Output, float lambda ///////////////////////////////////////////////// // HOST FUNCTION -extern "C" void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z) +extern "C" int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z) { // set up device int dev = 0; @@ -350,5 +340,6 @@ extern "C" void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, CHECK(cudaMemcpy(Output,d_output,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost)); CHECK(cudaFree(d_input)); CHECK(cudaFree(d_output)); - //cudaDeviceReset(); + //cudaDeviceReset(); + return 0; } diff --git a/Core/regularisers_GPU/NonlDiff_GPU_core.h b/Core/regularisers_GPU/NonlDiff_GPU_core.h index afd712b..5fe457e 100644 --- a/Core/regularisers_GPU/NonlDiff_GPU_core.h +++ b/Core/regularisers_GPU/NonlDiff_GPU_core.h @@ -3,6 +3,6 @@ #include "CCPiDefines.h" #include <stdio.h> -extern "C" CCPI_EXPORT void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z); +extern "C" CCPI_EXPORT int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z); #endif diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.cu b/Core/regularisers_GPU/PatchSelect_GPU_core.cu index f558b0f..d173124 100644 --- a/Core/regularisers_GPU/PatchSelect_GPU_core.cu +++ b/Core/regularisers_GPU/PatchSelect_GPU_core.cu @@ -19,7 +19,8 @@ */ #include "PatchSelect_GPU_core.h" -
+#include "shared.h" + /* CUDA implementation of non-local weight pre-calculation for non-local priors * Weights and associated indices are stored into pre-allocated arrays and passed * to the regulariser @@ -36,32 +37,20 @@ * 1. AR_i - indeces of i neighbours * 2. AR_j - indeces of j neighbours * 3. Weights_ij - associated weights - */
-
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
- if (cudaSuccess != err)
- {
- fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
- file, line, (int)err, cudaGetErrorString(err));
- exit(EXIT_FAILURE);
- }
-}
-
-#define BLKXSIZE 16
-#define BLKYSIZE 16
-#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
-#define M_PI 3.14159265358979323846
-#define EPS 1.0e-8
+ */ + + +#define BLKXSIZE 16 +#define BLKYSIZE 16 +#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) +#define M_PI 3.14159265358979323846 +#define EPS 1.0e-8 #define CONSTVECSIZE5 121 #define CONSTVECSIZE7 225 #define CONSTVECSIZE9 361 #define CONSTVECSIZE11 529 #define CONSTVECSIZE13 729 -
+ __device__ void swap(float *xp, float *yp) { float temp = *xp; @@ -75,9 +64,9 @@ __device__ void swapUS(unsigned short *xp, unsigned short *yp) *yp = temp; } -/********************************************************************************/
-__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+/********************************************************************************/ +__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; float normsum; @@ -85,10 +74,10 @@ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigne float Weight_Vec[CONSTVECSIZE5]; unsigned short ind_i[CONSTVECSIZE5]; unsigned short ind_j[CONSTVECSIZE5]; -
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + long index = i*M+j; counter = 0; @@ -139,10 +128,10 @@ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigne H_j_d[index2] = ind_j[x]; Weights_d[index2] = Weight_Vec[x]; } -}
-/********************************************************************************/
-__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+} +/********************************************************************************/ +__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; float normsum; @@ -150,10 +139,10 @@ __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigne float Weight_Vec[CONSTVECSIZE7]; unsigned short ind_i[CONSTVECSIZE7]; unsigned short ind_j[CONSTVECSIZE7]; -
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + long index = i*M+j; counter = 0; @@ -204,9 +193,9 @@ __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigne H_j_d[index2] = ind_j[x]; Weights_d[index2] = Weight_Vec[x]; } -}
-__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+} +__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; float normsum; @@ -214,10 +203,10 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne float Weight_Vec[CONSTVECSIZE9]; unsigned short ind_i[CONSTVECSIZE9]; unsigned short ind_j[CONSTVECSIZE9]; -
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + long index = i*M+j; counter = 0; @@ -269,8 +258,8 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne Weights_d[index2] = Weight_Vec[x]; } } -__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; float normsum; @@ -278,10 +267,10 @@ __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsign float Weight_Vec[CONSTVECSIZE11]; unsigned short ind_i[CONSTVECSIZE11]; unsigned short ind_j[CONSTVECSIZE11]; -
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + long index = i*M+j; counter = 0; @@ -333,8 +322,8 @@ __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsign Weights_d[index2] = Weight_Vec[x]; } } -__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) +{ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; float normsum; @@ -342,10 +331,10 @@ __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsign float Weight_Vec[CONSTVECSIZE13]; unsigned short ind_i[CONSTVECSIZE13]; unsigned short ind_j[CONSTVECSIZE13]; -
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + long index = i*M+j; counter = 0; @@ -398,29 +387,29 @@ __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsign } } -
+ /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ /********************* MAIN HOST FUNCTION ******************/ -/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h)
-{
- int deviceCount = -1; // number of devices
- cudaGetDeviceCount(&deviceCount);
- if (deviceCount == 0) {
- fprintf(stderr, "No CUDA devices found\n");
- return;
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ +extern "C" int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h) +{ + int deviceCount = -1; // number of devices + cudaGetDeviceCount(&deviceCount); + if (deviceCount == 0) { + fprintf(stderr, "No CUDA devices found\n"); + return -1; } -
- int SearchW_full, SimilW_full, counterG, i, j;
+ + int SearchW_full, SimilW_full, counterG, i, j; float *Ad, *Weights_d, h2, *Eucl_Vec, *Eucl_Vec_d; - unsigned short *H_i_d, *H_j_d;
+ unsigned short *H_i_d, *H_j_d; h2 = h*h; -
- dim3 dimBlock(BLKXSIZE,BLKYSIZE);
- dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));
-
- SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window size */
- SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1); /* the full similarity window size */
+ + dim3 dimBlock(BLKXSIZE,BLKYSIZE); + dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE)); + + SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window size */ + SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1); /* the full similarity window size */ /* generate a 2D Gaussian kernel for NLM procedure */ Eucl_Vec = (float*) calloc (SimilW_full,sizeof(float)); @@ -432,16 +421,16 @@ extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned sho }} /*main neighb loop */ - /*allocate space on the device*/
- checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) );
+ /*allocate space on the device*/ + checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) ); checkCudaErrors( cudaMalloc((void**)&H_i_d, N*M*NumNeighb*sizeof(unsigned short)) ); checkCudaErrors( cudaMalloc((void**)&H_j_d, N*M*NumNeighb*sizeof(unsigned short)) ); checkCudaErrors( cudaMalloc((void**)&Weights_d, N*M*NumNeighb*sizeof(float)) ); - checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d, SimilW_full*sizeof(float)) );
-
- /* copy data from the host to the device */
+ checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d, SimilW_full*sizeof(float)) ); + + /* copy data from the host to the device */ checkCudaErrors( cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice) ); - checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );
+ checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) ); /********************** Run CUDA kernel here ********************/ if (SearchWindow == 5) IndexSelect2D_5_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2); @@ -450,19 +439,20 @@ extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned sho else if (SearchWindow == 11) IndexSelect2D_11_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2); else if (SearchWindow == 13) IndexSelect2D_13_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2); else { - fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n");
- return;} - checkCudaErrors(cudaPeekAtLastError() );
- checkCudaErrors(cudaDeviceSynchronize());
- /***************************************************************/
-
+ fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n"); + return -1;} + checkCudaErrors(cudaPeekAtLastError() ); + checkCudaErrors(cudaDeviceSynchronize()); + /***************************************************************/ + checkCudaErrors(cudaMemcpy(H_i, H_i_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) ); checkCudaErrors(cudaMemcpy(H_j, H_j_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) ); checkCudaErrors(cudaMemcpy(Weights, Weights_d, N*M*NumNeighb*sizeof(float),cudaMemcpyDeviceToHost) ); -
+ cudaFree(Ad); cudaFree(H_i_d); cudaFree(H_j_d); cudaFree(Weights_d); - cudaFree(Eucl_Vec_d);
+ cudaFree(Eucl_Vec_d); + return 0; } diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.h b/Core/regularisers_GPU/PatchSelect_GPU_core.h index d20fe9f..8c124d3 100644 --- a/Core/regularisers_GPU/PatchSelect_GPU_core.h +++ b/Core/regularisers_GPU/PatchSelect_GPU_core.h @@ -3,6 +3,6 @@ #include "CCPiDefines.h" #include <stdio.h> -extern "C" CCPI_EXPORT void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h); +extern "C" CCPI_EXPORT int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h); #endif diff --git a/Core/regularisers_GPU/TGV_GPU_core.cu b/Core/regularisers_GPU/TGV_GPU_core.cu index 3081011..73232a9 100644 --- a/Core/regularisers_GPU/TGV_GPU_core.cu +++ b/Core/regularisers_GPU/TGV_GPU_core.cu @@ -18,6 +18,7 @@ limitations under the License. */ #include "TGV_GPU_core.h" +#include "shared.h" /* CUDA implementation of Primal-Dual denoising method for * Total Generilized Variation (TGV)-L2 model [1] (2D case only) @@ -36,19 +37,6 @@ limitations under the License. * References: * [1] K. Bredies "Total Generalized Variation" */ - -#define CHECK(call) \ -{ \ - const cudaError_t error = call; \ - if (error != cudaSuccess) \ - { \ - fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ - fprintf(stderr, "code: %d, reason: %s\n", error, \ - cudaGetErrorString(error)); \ - exit(1); \ - } \ -} - #define BLKXSIZE2D 16 #define BLKYSIZE2D 16 @@ -239,7 +227,7 @@ __global__ void newU_kernel(float *U, float *U_old, int N, int M, int num_total) /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ /********************* MAIN HOST FUNCTION ******************/ /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ -extern "C" void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY) +extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY) { int dimTotal, dev = 0; CHECK(cudaSetDevice(dev)); @@ -320,4 +308,5 @@ extern "C" void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, fl CHECK(cudaFree(V2)); CHECK(cudaFree(V1_old)); CHECK(cudaFree(V2_old)); + return 0; } diff --git a/Core/regularisers_GPU/TGV_GPU_core.h b/Core/regularisers_GPU/TGV_GPU_core.h index 663378f..5a4eb76 100644 --- a/Core/regularisers_GPU/TGV_GPU_core.h +++ b/Core/regularisers_GPU/TGV_GPU_core.h @@ -3,6 +3,6 @@ #include "CCPiDefines.h" #include <stdio.h> -extern "C" CCPI_EXPORT void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY); +extern "C" CCPI_EXPORT int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY); #endif diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.cu b/Core/regularisers_GPU/TV_FGP_GPU_core.cu index eab7a44..b371c5d 100755 --- a/Core/regularisers_GPU/TV_FGP_GPU_core.cu +++ b/Core/regularisers_GPU/TV_FGP_GPU_core.cu @@ -18,6 +18,7 @@ limitations under the License. */ #include "TV_FGP_GPU_core.h" +#include "shared.h" #include <thrust/device_vector.h> #include <thrust/transform_reduce.h> @@ -39,18 +40,6 @@ limitations under the License. * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" */ -// This will output the proper CUDA error strings in the event that a CUDA host call returns an error -#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) - -inline void __checkCudaErrors(cudaError err, const char *file, const int line) -{ - if (cudaSuccess != err) - { - fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", - file, line, (int)err, cudaGetErrorString(err)); - exit(EXIT_FAILURE); - } -} #define BLKXSIZE2D 16 #define BLKYSIZE2D 16 @@ -354,13 +343,13 @@ __global__ void FGPResidCalc3D_kernel(float *Input1, float *Input2, float* Outpu /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ ////////////MAIN HOST FUNCTION /////////////// -extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) { int deviceCount = -1; // number of devices cudaGetDeviceCount(&deviceCount); if (deviceCount == 0) { fprintf(stderr, "No CUDA devices found\n"); - return; + return -1; } int count = 0, i; @@ -570,5 +559,6 @@ extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, in cudaFree(R2); cudaFree(R3); } - //cudaDeviceReset(); + //cudaDeviceReset(); + return 0; } diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.h b/Core/regularisers_GPU/TV_FGP_GPU_core.h index 107d243..b28cdf3 100755 --- a/Core/regularisers_GPU/TV_FGP_GPU_core.h +++ b/Core/regularisers_GPU/TV_FGP_GPU_core.h @@ -5,6 +5,6 @@ #ifndef _TV_FGP_GPU_ #define _TV_FGP_GPU_ -extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); +extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); #endif diff --git a/Core/regularisers_GPU/TV_ROF_GPU_core.cu b/Core/regularisers_GPU/TV_ROF_GPU_core.cu index 57de63d..76f5be9 100755 --- a/Core/regularisers_GPU/TV_ROF_GPU_core.cu +++ b/Core/regularisers_GPU/TV_ROF_GPU_core.cu @@ -35,18 +35,7 @@ limitations under the License. * * D. Kazantsev, 2016-18 */ - -#define CHECK(call) \ -{ \ - const cudaError_t error = call; \ - if (error != cudaSuccess) \ - { \ - fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ - fprintf(stderr, "code: %d, reason: %s\n", error, \ - cudaGetErrorString(error)); \ - exit(1); \ - } \ -} +#include "shared.h" #define BLKXSIZE 8 #define BLKYSIZE 8 @@ -304,7 +293,7 @@ __host__ __device__ int sign (float x) ///////////////////////////////////////////////// // HOST FUNCTION -extern "C" void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z) +extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z) { // set up device int dev = 0; @@ -364,5 +353,6 @@ extern "C" void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, in CHECK(cudaFree(d_update)); CHECK(cudaFree(d_D1)); CHECK(cudaFree(d_D2)); - //cudaDeviceReset(); + //cudaDeviceReset(); + return 0; } diff --git a/Core/regularisers_GPU/TV_ROF_GPU_core.h b/Core/regularisers_GPU/TV_ROF_GPU_core.h index d772aba..3a09296 100755 --- a/Core/regularisers_GPU/TV_ROF_GPU_core.h +++ b/Core/regularisers_GPU/TV_ROF_GPU_core.h @@ -3,6 +3,6 @@ #include "CCPiDefines.h" #include <stdio.h> -extern "C" CCPI_EXPORT void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); +extern "C" CCPI_EXPORT int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); #endif diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.cu b/Core/regularisers_GPU/TV_SB_GPU_core.cu index 68b9221..1f494ee 100755 --- a/Core/regularisers_GPU/TV_SB_GPU_core.cu +++ b/Core/regularisers_GPU/TV_SB_GPU_core.cu @@ -18,6 +18,7 @@ limitations under the License. */ #include "TV_SB_GPU_core.h" +#include "shared.h" #include <thrust/device_vector.h> #include <thrust/transform_reduce.h> @@ -39,17 +40,6 @@ limitations under the License. */ // This will output the proper CUDA error strings in the event that a CUDA host call returns an error -#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) - -inline void __checkCudaErrors(cudaError err, const char *file, const int line) -{ - if (cudaSuccess != err) - { - fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", - file, line, (int)err, cudaGetErrorString(err)); - exit(EXIT_FAILURE); - } -} #define BLKXSIZE2D 16 #define BLKYSIZE2D 16 @@ -363,13 +353,13 @@ __global__ void SBResidCalc3D_kernel(float *Input1, float *Input2, float* Output /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ /********************* MAIN HOST FUNCTION ******************/ /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ -extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ) +extern "C" int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ) { int deviceCount = -1; // number of devices cudaGetDeviceCount(&deviceCount); if (deviceCount == 0) { fprintf(stderr, "No CUDA devices found\n"); - return; + return -1; } int ll, DimTotal; @@ -557,5 +547,6 @@ extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, cudaFree(By); cudaFree(Bz); } - //cudaDeviceReset(); + //cudaDeviceReset(); + return 0; } diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.h b/Core/regularisers_GPU/TV_SB_GPU_core.h index bdc9219..d44ab77 100755 --- a/Core/regularisers_GPU/TV_SB_GPU_core.h +++ b/Core/regularisers_GPU/TV_SB_GPU_core.h @@ -5,6 +5,6 @@ #ifndef _SB_TV_GPU_ #define _SB_TV_GPU_ -extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ); +extern "C" int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ); #endif diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu index 80a78da..7503ec7 100644 --- a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu +++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu @@ -16,7 +16,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - +#include "shared.h" #include "dTV_FGP_GPU_core.h" #include <thrust/device_vector.h> #include <thrust/transform_reduce.h> @@ -45,19 +45,6 @@ limitations under the License. */ -// This will output the proper CUDA error strings in the event that a CUDA host call returns an error -#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) - -inline void __checkCudaErrors(cudaError err, const char *file, const int line) -{ - if (cudaSuccess != err) - { - fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", - file, line, (int)err, cudaGetErrorString(err)); - exit(EXIT_FAILURE); - } -} - #define BLKXSIZE2D 16 #define BLKYSIZE2D 16 @@ -468,13 +455,13 @@ __global__ void dTVnonneg3D_kernel(float* Output, int N, int M, int Z, int num_t /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ ////////////MAIN HOST FUNCTION /////////////// -extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +extern "C" int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) { int deviceCount = -1; // number of devices cudaGetDeviceCount(&deviceCount); if (deviceCount == 0) { fprintf(stderr, "No CUDA devices found\n"); - return; + return -1; } int count = 0, i; @@ -748,6 +735,7 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f cudaFree(InputRef_y); cudaFree(InputRef_z); cudaFree(d_InputRef); - } - //cudaDeviceReset(); + } + //cudaDeviceReset(); + return 0; } diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.h b/Core/regularisers_GPU/dTV_FGP_GPU_core.h index b906636..9020b1a 100644 --- a/Core/regularisers_GPU/dTV_FGP_GPU_core.h +++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.h @@ -5,6 +5,6 @@ #ifndef _dTV_FGP_GPU_ #define _dTV_FGP_GPU_ -extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); +extern "C" int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); #endif diff --git a/Core/regularisers_GPU/shared.h b/Core/regularisers_GPU/shared.h new file mode 100644 index 0000000..fe98cd6 --- /dev/null +++ b/Core/regularisers_GPU/shared.h @@ -0,0 +1,42 @@ +/*shared macros*/ + + +/*checks CUDA call, should be used in functions returning <int> value +if error happens, writes to standard error and explicitly returns -1*/ +#define CHECK(call) \ +{ \ + const cudaError_t error = call; \ + if (error != cudaSuccess) \ + { \ + fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ + fprintf(stderr, "code: %d, reason: %s\n", error, \ + cudaGetErrorString(error)); \ + return -1; \ + } \ +} + +// This will output the proper CUDA error strings in the event that a CUDA host call returns an error +#define checkCudaErrors(call) \ +{ \ + const cudaError_t error = call; \ + if (error != cudaSuccess) \ + { \ + fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \ + fprintf(stderr, "code: %d, reason: %s\n", error, \ + cudaGetErrorString(error)); \ + return -1; \ + } \ +} +/*#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) + +inline void __checkCudaErrors(cudaError err, const char *file, const int line) +{ + if (cudaSuccess != err) + { + fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", + file, line, (int)err, cudaGetErrorString(err)); + return; + } +} +*/ + @@ -69,7 +69,6 @@ Here an example of build on Linux (see also `run.sh` for additional info): ```bash git clone https://github.com/vais-ral/CCPi-Regularisation-Toolkit.git -mkdir build cd build cmake .. -DCONDA_BUILD=OFF -DBUILD_MATLAB_WRAPPER=ON -DBUILD_PYTHON_WRAPPER=ON -DBUILD_CUDA=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./install make install @@ -88,7 +87,7 @@ conda install ccpi-regulariser -c ccpi -c conda-forge #### Python (conda-build) ``` - export CIL_VERSION=0.10.2 + export CIL_VERSION=0.10.3 conda build Wrappers/Python/conda-recipe --numpy 1.12 --python 3.5 conda install ccpi-regulariser=${CIL_VERSION} --use-local --force cd demos/ @@ -124,7 +123,7 @@ On Windows the `dll` and the mex modules must reside in the same directory. It i addpath(/path/to/library); ``` -#### Legacy Matlab installation +#### Legacy Matlab installation (partly supported, please use Cmake) ``` cd /Wrappers/Matlab/mex_compile diff --git a/Wrappers/Matlab/mex_compile/compileGPU_mex.m b/Wrappers/Matlab/mex_compile/compileGPU_mex.m index e0311ea..dd1475c 100644 --- a/Wrappers/Matlab/mex_compile/compileGPU_mex.m +++ b/Wrappers/Matlab/mex_compile/compileGPU_mex.m @@ -7,11 +7,10 @@ % In the code bellow we provide a full explicit path to nvcc compiler % ! paths to matlab and CUDA sdk can be different, modify accordingly ! -% Tested on Ubuntu 16.04/MATLAB 2016b/cuda7.5/gcc4.9 - -% Installation HAS NOT been tested on Windows, please contact me if you'll be able to -% install software on Windows and I gratefully include it into the master release. +% Tested on Ubuntu 18.04/MATLAB 2016b/cuda10.0/gcc7.3 +% Installation HAS NOT been tested on Windows, please you Cmake build or +% modify the code bellow accordingly fsep = '/'; pathcopyFrom = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'regularisers_GPU'], 1i); @@ -28,44 +27,45 @@ fprintf('%s \n', '<<<<<<<<<<<Compiling GPU regularisers (CUDA)>>>>>>>>>>>>>'); fprintf('%s \n', 'Compiling ROF-TV...'); !/usr/local/cuda/bin/nvcc -O0 -c TV_ROF_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ -mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu ROF_TV_GPU.cpp TV_ROF_GPU_core.o +mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu ROF_TV_GPU.cpp TV_ROF_GPU_core.o movefile('ROF_TV_GPU.mex*',Pathmove); fprintf('%s \n', 'Compiling FGP-TV...'); !/usr/local/cuda/bin/nvcc -O0 -c TV_FGP_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ -mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu FGP_TV_GPU.cpp TV_FGP_GPU_core.o +mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu FGP_TV_GPU.cpp TV_FGP_GPU_core.o movefile('FGP_TV_GPU.mex*',Pathmove); fprintf('%s \n', 'Compiling SB-TV...'); !/usr/local/cuda/bin/nvcc -O0 -c TV_SB_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ -mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu SB_TV_GPU.cpp TV_SB_GPU_core.o +mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu SB_TV_GPU.cpp TV_SB_GPU_core.o movefile('SB_TV_GPU.mex*',Pathmove); fprintf('%s \n', 'Compiling TGV...'); !/usr/local/cuda/bin/nvcc -O0 -c TGV_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ -mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu TGV_GPU.cpp TGV_GPU_core.o +mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu TGV_GPU.cpp TGV_GPU_core.o movefile('TGV_GPU.mex*',Pathmove); fprintf('%s \n', 'Compiling dFGP-TV...'); !/usr/local/cuda/bin/nvcc -O0 -c dTV_FGP_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ -mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu FGP_dTV_GPU.cpp dTV_FGP_GPU_core.o +mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu FGP_dTV_GPU.cpp dTV_FGP_GPU_core.o movefile('FGP_dTV_GPU.mex*',Pathmove); fprintf('%s \n', 'Compiling NonLinear Diffusion...'); !/usr/local/cuda/bin/nvcc -O0 -c NonlDiff_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ -mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu NonlDiff_GPU.cpp NonlDiff_GPU_core.o +mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu NonlDiff_GPU.cpp NonlDiff_GPU_core.o movefile('NonlDiff_GPU.mex*',Pathmove); fprintf('%s \n', 'Compiling Anisotropic diffusion of higher order...'); !/usr/local/cuda/bin/nvcc -O0 -c Diffus_4thO_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ -mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu Diffusion_4thO_GPU.cpp Diffus_4thO_GPU_core.o +mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu Diffusion_4thO_GPU.cpp Diffus_4thO_GPU_core.o movefile('Diffusion_4thO_GPU.mex*',Pathmove); fprintf('%s \n', 'Compiling ROF-LLT...'); !/usr/local/cuda/bin/nvcc -O0 -c LLT_ROF_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ -mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu LLT_ROF_GPU.cpp LLT_ROF_GPU_core.o +mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu LLT_ROF_GPU.cpp LLT_ROF_GPU_core.o movefile('LLT_ROF_GPU.mex*',Pathmove); + delete TV_ROF_GPU_core* TV_FGP_GPU_core* TV_SB_GPU_core* dTV_FGP_GPU_core* NonlDiff_GPU_core* Diffus_4thO_GPU_core* TGV_GPU_core* LLT_ROF_GPU_core* CCPiDefines.h fprintf('%s \n', 'All successfully compiled!'); diff --git a/Wrappers/Python/conda-recipe/build.sh b/Wrappers/Python/conda-recipe/build.sh index 54bc8e2..eec7c2f 100644 --- a/Wrappers/Python/conda-recipe/build.sh +++ b/Wrappers/Python/conda-recipe/build.sh @@ -4,7 +4,7 @@ cp -rv "$RECIPE_DIR/../.." "$SRC_DIR/ccpi" cp -rv "$RECIPE_DIR/../../../Core" "$SRC_DIR/Core" cd $SRC_DIR - +##cuda=off cmake -G "Unix Makefiles" $RECIPE_DIR/../../../ -DBUILD_PYTHON_WRAPPER=ON -DCONDA_BUILD=ON -DBUILD_CUDA=ON -DCMAKE_BUILD_TYPE="Release" -DLIBRARY_LIB=$CONDA_PREFIX/lib -DLIBRARY_INC=$CONDA_PREFIX -DCMAKE_INSTALL_PREFIX=$PREFIX diff --git a/Wrappers/Python/conda-recipe/meta.yaml b/Wrappers/Python/conda-recipe/meta.yaml index ed73165..808493e 100644 --- a/Wrappers/Python/conda-recipe/meta.yaml +++ b/Wrappers/Python/conda-recipe/meta.yaml @@ -1,6 +1,6 @@ package: name: ccpi-regulariser - version: 0.10.2 + version: 0.10.3 build: diff --git a/Wrappers/Python/conda-recipe/run_test.py b/Wrappers/Python/conda-recipe/run_test.py index 499ae7f..cfb3f53 100755 --- a/Wrappers/Python/conda-recipe/run_test.py +++ b/Wrappers/Python/conda-recipe/run_test.py @@ -2,7 +2,7 @@ import unittest import numpy as np
import os
import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, DIFF4th +from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, DIFF4th
from PIL import Image
class TiffReader(object):
@@ -37,6 +37,8 @@ class TestRegularisers(unittest.TestCase): def test_ROF_TV_CPU_vs_GPU(self):
+ #print ("tomas debug test function")
+ print(__name__)
filename = os.path.join("lena_gray_512.tif")
plt = TiffReader()
# read image
@@ -63,11 +65,11 @@ class TestRegularisers(unittest.TestCase): # set parameters
pars = {'algorithm': ROF_TV, \
- 'input' : u0,\
- 'regularisation_parameter':0.04,\
- 'number_of_iterations': 1000,\
- 'time_marching_parameter': 0.0001
- }
+ 'input' : u0,\
+ 'regularisation_parameter':0.04,\
+ 'number_of_iterations': 2500,\
+ 'time_marching_parameter': 0.00002
+ }
print ("#############ROF TV CPU####################")
start_time = timeit.default_timer()
rof_cpu = ROF_TV(pars['input'],
@@ -88,8 +90,8 @@ class TestRegularisers(unittest.TestCase): pars['number_of_iterations'],
pars['time_marching_parameter'],'gpu')
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
+
rms = rmse(Im, rof_gpu)
pars['rmse'] = rms
pars['algorithm'] = ROF_TV
@@ -101,10 +103,10 @@ class TestRegularisers(unittest.TestCase): diff_im = np.zeros(np.shape(rof_cpu))
diff_im = abs(rof_cpu - rof_gpu)
diff_im[diff_im > tolerance] = 1
-
self.assertLessEqual(diff_im.sum() , 1)
def test_FGP_TV_CPU_vs_GPU(self):
+ print(__name__)
filename = os.path.join("lena_gray_512.tif")
plt = TiffReader()
# read image
@@ -169,10 +171,10 @@ class TestRegularisers(unittest.TestCase): pars['methodTV'],
pars['nonneg'],
pars['printingOut'],'gpu')
-
+
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
+
rms = rmse(Im, fgp_gpu)
pars['rmse'] = rms
pars['algorithm'] = FGP_TV
@@ -189,6 +191,7 @@ class TestRegularisers(unittest.TestCase): self.assertLessEqual(diff_im.sum() , 1)
def test_SB_TV_CPU_vs_GPU(self):
+ print(__name__)
filename = os.path.join("lena_gray_512.tif")
plt = TiffReader()
# read image
@@ -251,10 +254,10 @@ class TestRegularisers(unittest.TestCase): pars['tolerance_constant'],
pars['methodTV'],
pars['printingOut'],'gpu')
-
+
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
+
rms = rmse(Im, sb_gpu)
pars['rmse'] = rms
pars['algorithm'] = SB_TV
@@ -269,6 +272,7 @@ class TestRegularisers(unittest.TestCase): self.assertLessEqual(diff_im.sum(), 1)
def test_TGV_CPU_vs_GPU(self):
+ print(__name__)
filename = os.path.join("lena_gray_512.tif")
plt = TiffReader()
# read image
@@ -329,10 +333,10 @@ class TestRegularisers(unittest.TestCase): pars['alpha0'],
pars['number_of_iterations'],
pars['LipshitzConstant'],'gpu')
-
+
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
+
rms = rmse(Im, tgv_gpu)
pars['rmse'] = rms
pars['algorithm'] = TGV
@@ -347,6 +351,7 @@ class TestRegularisers(unittest.TestCase): self.assertLessEqual(diff_im.sum() , 1)
def test_LLT_ROF_CPU_vs_GPU(self):
+ print(__name__)
filename = os.path.join("lena_gray_512.tif")
plt = TiffReader()
# read image
@@ -405,8 +410,8 @@ class TestRegularisers(unittest.TestCase): pars['time_marching_parameter'],'gpu')
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
+
rms = rmse(Im, lltrof_gpu)
pars['rmse'] = rms
pars['algorithm'] = LLT_ROF
@@ -421,6 +426,7 @@ class TestRegularisers(unittest.TestCase): self.assertLessEqual(diff_im.sum(), 1)
def test_NDF_CPU_vs_GPU(self):
+ print(__name__)
filename = os.path.join("lena_gray_512.tif")
plt = TiffReader()
# read image
@@ -483,8 +489,7 @@ class TestRegularisers(unittest.TestCase): pars['penalty_type'],'gpu')
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
rms = rmse(Im, ndf_gpu)
pars['rmse'] = rms
pars['algorithm'] = NDF
@@ -557,8 +562,7 @@ class TestRegularisers(unittest.TestCase): pars['time_marching_parameter'], 'gpu')
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
rms = rmse(Im, diff4th_gpu)
pars['rmse'] = rms
pars['algorithm'] = DIFF4th
@@ -603,8 +607,8 @@ class TestRegularisers(unittest.TestCase): 'input' : u0,\
'refdata' : u_ref,\
'regularisation_parameter':0.04, \
- 'number_of_iterations' :2000 ,\
- 'tolerance_constant':1e-06,\
+ 'number_of_iterations' :1000 ,\
+ 'tolerance_constant':1e-07,\
'eta_const':0.2,\
'methodTV': 0 ,\
'nonneg': 0 ,\
@@ -643,8 +647,7 @@ class TestRegularisers(unittest.TestCase): pars['nonneg'],
pars['printingOut'],'gpu')
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
rms = rmse(Im, fgp_dtv_gpu)
pars['rmse'] = rms
pars['algorithm'] = FGP_dTV
@@ -765,8 +768,8 @@ class TestRegularisers(unittest.TestCase): pars_rof_tv['number_of_iterations'],
pars_rof_tv['time_marching_parameter'],'gpu')
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
+
rms_rof = rmse(Im, rof_gpu)
# now compare obtained rms with the expected value
self.assertLess(abs(rms_rof-rms_rof_exp) , tolerance)
@@ -806,10 +809,10 @@ class TestRegularisers(unittest.TestCase): pars_fgp_tv['nonneg'],
pars_fgp_tv['printingOut'],'gpu')
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
rms_fgp = rmse(Im, fgp_gpu)
# now compare obtained rms with the expected value
+
self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance)
if __name__ == '__main__':
diff --git a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py index 616eab0..6529b5c 100644 --- a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py +++ b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py @@ -656,8 +656,8 @@ pars = {'algorithm' : FGP_dTV, \ 'input' : u0,\ 'refdata' : u_ref,\ 'regularisation_parameter':0.04, \ - 'number_of_iterations' :2000 ,\ - 'tolerance_constant':1e-06,\ + 'number_of_iterations' :1000 ,\ + 'tolerance_constant':1e-07,\ 'eta_const':0.2,\ 'methodTV': 0 ,\ 'nonneg': 0 ,\ diff --git a/Wrappers/Python/src/gpu_regularisers.pyx b/Wrappers/Python/src/gpu_regularisers.pyx index 302727e..2b97865 100644 --- a/Wrappers/Python/src/gpu_regularisers.pyx +++ b/Wrappers/Python/src/gpu_regularisers.pyx @@ -18,15 +18,17 @@ import cython import numpy as np cimport numpy as np -cdef extern void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); -cdef extern void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z); -cdef extern void TV_SB_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int printM, int N, int M, int Z); -cdef extern void TGV_GPU_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY); -cdef extern void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z); -cdef extern void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z); -cdef extern void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z); -cdef extern void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z); -cdef extern void PatchSelect_GPU_main(float *Input, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h); +CUDAErrorMessage = 'CUDA error' + +cdef extern int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); +cdef extern int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z); +cdef extern int TV_SB_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int printM, int N, int M, int Z); +cdef extern int TGV_GPU_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY); +cdef extern int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z); +cdef extern int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z); +cdef extern int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z); +cdef extern int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z); +cdef extern int PatchSelect_GPU_main(float *Input, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h); # Total-variation Rudin-Osher-Fatemi (ROF) def TV_ROF_GPU(inputData, @@ -186,15 +188,16 @@ def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \ np.zeros([dims[0],dims[1]], dtype='float32') - # Running CUDA code here - TV_ROF_GPU_main( + # Running CUDA code here + if (TV_ROF_GPU_main( &inputData[0,0], &outputData[0,0], regularisation_parameter, iterations , time_marching_parameter, - dims[1], dims[0], 1); - - return outputData + dims[1], dims[0], 1)==0): + return outputData; + else: + raise ValueError(CUDAErrorMessage); def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, float regularisation_parameter, @@ -210,14 +213,15 @@ def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, np.zeros([dims[0],dims[1],dims[2]], dtype='float32') # Running CUDA code here - TV_ROF_GPU_main( + if (TV_ROF_GPU_main( &inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, iterations , time_marching_parameter, - dims[2], dims[1], dims[0]); - - return outputData + dims[2], dims[1], dims[0])==0): + return outputData; + else: + raise ValueError(CUDAErrorMessage); #****************************************************************# #********************** Total-variation FGP *********************# #****************************************************************# @@ -238,16 +242,18 @@ def FGPTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, np.zeros([dims[0],dims[1]], dtype='float32') # Running CUDA code here - TV_FGP_GPU_main(&inputData[0,0], &outputData[0,0], + if (TV_FGP_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, iterations, tolerance_param, methodTV, nonneg, printM, - dims[1], dims[0], 1); - - return outputData + dims[1], dims[0], 1)==0): + return outputData; + else: + raise ValueError(CUDAErrorMessage); + def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, float regularisation_parameter, @@ -266,16 +272,18 @@ def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, np.zeros([dims[0],dims[1],dims[2]], dtype='float32') # Running CUDA code here - TV_FGP_GPU_main(&inputData[0,0,0], &outputData[0,0,0], + if (TV_FGP_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter , iterations, tolerance_param, methodTV, nonneg, printM, - dims[2], dims[1], dims[0]); - - return outputData + dims[2], dims[1], dims[0])==0): + return outputData; + else: + raise ValueError(CUDAErrorMessage); + #***************************************************************# #********************** Total-variation SB *********************# #***************************************************************# @@ -295,15 +303,17 @@ def SBTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, np.zeros([dims[0],dims[1]], dtype='float32') # Running CUDA code here - TV_SB_GPU_main(&inputData[0,0], &outputData[0,0], + if (TV_SB_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, iterations, tolerance_param, methodTV, printM, - dims[1], dims[0], 1); - - return outputData + dims[1], dims[0], 1)==0): + return outputData; + else: + raise ValueError(CUDAErrorMessage); + def SBTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, float regularisation_parameter, @@ -321,15 +331,17 @@ def SBTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, np.zeros([dims[0],dims[1],dims[2]], dtype='float32') # Running CUDA code here - TV_SB_GPU_main(&inputData[0,0,0], &outputData[0,0,0], + if (TV_SB_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter , iterations, tolerance_param, methodTV, printM, - dims[2], dims[1], dims[0]); - - return outputData + dims[2], dims[1], dims[0])==0): + return outputData; + else: + raise ValueError(CUDAErrorMessage); + #***************************************************************# #************************ LLT-ROF model ************************# @@ -349,8 +361,11 @@ def LLT_ROF_GPU2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, np.zeros([dims[0],dims[1]], dtype='float32') # Running CUDA code here - LLT_ROF_GPU_main(&inputData[0,0], &outputData[0,0],regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[1],dims[0],1); - return outputData + if (LLT_ROF_GPU_main(&inputData[0,0], &outputData[0,0],regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[1],dims[0],1)==0): + return outputData; + else: + raise ValueError(CUDAErrorMessage); + def LLT_ROF_GPU3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, float regularisation_parameterROF, @@ -367,8 +382,11 @@ def LLT_ROF_GPU3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, np.zeros([dims[0],dims[1],dims[2]], dtype='float32') # Running CUDA code here - LLT_ROF_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[2], dims[1], dims[0]); - return outputData + if (LLT_ROF_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[2], dims[1], dims[0])==0): + return outputData; + else: + raise ValueError(CUDAErrorMessage); + #***************************************************************# @@ -389,13 +407,16 @@ def TGV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, np.zeros([dims[0],dims[1]], dtype='float32') #/* Run TGV iterations for 2D data */ - TGV_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, + if (TGV_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, alpha1, alpha0, iterationsNumb, LipshitzConst, - dims[1],dims[0]) - return outputData + dims[1],dims[0])==0): + return outputData + else: + raise ValueError(CUDAErrorMessage); + #****************************************************************# #**************Directional Total-variation FGP ******************# @@ -419,7 +440,7 @@ def FGPdTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, np.zeros([dims[0],dims[1]], dtype='float32') # Running CUDA code here - dTV_FGP_GPU_main(&inputData[0,0], &refdata[0,0], &outputData[0,0], + if (dTV_FGP_GPU_main(&inputData[0,0], &refdata[0,0], &outputData[0,0], regularisation_parameter, iterations, tolerance_param, @@ -427,9 +448,11 @@ def FGPdTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, methodTV, nonneg, printM, - dims[1], dims[0], 1); - - return outputData + dims[1], dims[0], 1)==0): + return outputData + else: + raise ValueError(CUDAErrorMessage); + def FGPdTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, np.ndarray[np.float32_t, ndim=3, mode="c"] refdata, @@ -450,7 +473,7 @@ def FGPdTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, np.zeros([dims[0],dims[1],dims[2]], dtype='float32') # Running CUDA code here - dTV_FGP_GPU_main(&inputData[0,0,0], &refdata[0,0,0], &outputData[0,0,0], + if (dTV_FGP_GPU_main(&inputData[0,0,0], &refdata[0,0,0], &outputData[0,0,0], regularisation_parameter , iterations, tolerance_param, @@ -458,8 +481,11 @@ def FGPdTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, methodTV, nonneg, printM, - dims[2], dims[1], dims[0]); - return outputData + dims[2], dims[1], dims[0])==0): + return outputData; + else: + raise ValueError(CUDAErrorMessage); + #****************************************************************# #***************Nonlinear (Isotropic) Diffusion******************# @@ -483,8 +509,11 @@ def NDF_GPU_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, # Run Nonlinear Diffusion iterations for 2D data # Running CUDA code here - NonlDiff_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[1], dims[0], 1) - return outputData + if (NonlDiff_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[1], dims[0], 1)==0): + return outputData; + else: + raise ValueError(CUDAErrorMessage); + def NDF_GPU_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, float regularisation_parameter, @@ -502,9 +531,11 @@ def NDF_GPU_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, # Run Nonlinear Diffusion iterations for 3D data # Running CUDA code here - NonlDiff_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[2], dims[1], dims[0]) + if (NonlDiff_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[2], dims[1], dims[0])==0): + return outputData; + else: + raise ValueError(CUDAErrorMessage); - return outputData #****************************************************************# #************Anisotropic Fourth-Order diffusion******************# #****************************************************************# @@ -522,8 +553,11 @@ def Diff4th_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, # Run Anisotropic Fourth-Order diffusion for 2D data # Running CUDA code here - Diffus4th_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[1], dims[0], 1) - return outputData + if (Diffus4th_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[1], dims[0], 1)==0): + return outputData + else: + raise ValueError(CUDAErrorMessage); + def Diff4th_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, float regularisation_parameter, @@ -540,9 +574,11 @@ def Diff4th_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, # Run Anisotropic Fourth-Order diffusion for 3D data # Running CUDA code here - Diffus4th_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[2], dims[1], dims[0]) + if (Diffus4th_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[2], dims[1], dims[0])==0): + return outputData; + else: + raise ValueError(CUDAErrorMessage); - return outputData #****************************************************************# #************Patch-based weights pre-selection******************# #****************************************************************# @@ -571,6 +607,8 @@ def PatchSel_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, np.zeros([dims[0], dims[1],dims[2]], dtype='uint16') # Run patch-based weight selection function - PatchSelect_GPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], searchwindow, patchwindow, neighbours, edge_parameter) - - return H_i, H_j, Weights + if (PatchSelect_GPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], searchwindow, patchwindow, neighbours, edge_parameter)==0): + return H_i, H_j, Weights; + else: + raise ValueError(CUDAErrorMessage); + diff --git a/build/jenkins-build.sh b/build/jenkins-build.sh index 04f8da6..0c397b1 100755 --- a/build/jenkins-build.sh +++ b/build/jenkins-build.sh @@ -1,12 +1,29 @@ #!/usr/bin/env bash # Script to builds source code in Jenkins environment +module try-load conda -module avail -module load conda -# it expects that git clone is done before this script launch +# install miniconda if the module is not present +if hash conda 2>/dev/null; then + echo using conda +else + if [ ! -f Miniconda3-latest-Linux-x86_64.sh ]; then + wget -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh + chmod +x Miniconda3-latest-Linux-x86_64.sh + fi + ./Miniconda3-latest-Linux-x86_64.sh -u -b -p . + PATH=$PATH:./bin +fi + +# presume that git clone is done before this script launch # git clone https://github.com/vais-ral/CCPi-Regularisation-Toolkit -conda install conda-build +conda install -y conda-build #export CIL_VERSION=0.10.2 -export CIL_VERSION=0.10.2 -cd CCPi-Regularisation-Toolkit +if [[ -n ${CIL_VERSION} ]] +then + echo Using defined version: $CIL_VERSION +else + export CIL_VERSION=0.10.3 + echo Defining version: $CIL_VERSION +fi +#cd CCPi-Regularisation-Toolkit # already there by jenkins conda build Wrappers/Python/conda-recipe @@ -3,7 +3,7 @@ echo "Building CCPi-regularisation Toolkit using CMake" # rm -r build # Requires Cython, install it first: # pip install cython -mkdir build +# mkdir build cd build/ make clean # install Python modules only without CUDA |