summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Core/regularisers_GPU/Diffus_4thO_GPU_core.cu18
-rw-r--r--Core/regularisers_GPU/Diffus_4thO_GPU_core.h2
-rw-r--r--Core/regularisers_GPU/LLT_ROF_GPU_core.cu16
-rw-r--r--Core/regularisers_GPU/LLT_ROF_GPU_core.h2
-rw-r--r--Core/regularisers_GPU/NonlDiff_GPU_core.cu19
-rw-r--r--Core/regularisers_GPU/NonlDiff_GPU_core.h2
-rw-r--r--Core/regularisers_GPU/PatchSelect_GPU_core.cu166
-rw-r--r--Core/regularisers_GPU/PatchSelect_GPU_core.h2
-rw-r--r--Core/regularisers_GPU/TGV_GPU_core.cu17
-rw-r--r--Core/regularisers_GPU/TGV_GPU_core.h2
-rwxr-xr-xCore/regularisers_GPU/TV_FGP_GPU_core.cu20
-rwxr-xr-xCore/regularisers_GPU/TV_FGP_GPU_core.h2
-rwxr-xr-xCore/regularisers_GPU/TV_ROF_GPU_core.cu18
-rwxr-xr-xCore/regularisers_GPU/TV_ROF_GPU_core.h2
-rwxr-xr-xCore/regularisers_GPU/TV_SB_GPU_core.cu19
-rwxr-xr-xCore/regularisers_GPU/TV_SB_GPU_core.h2
-rw-r--r--Core/regularisers_GPU/dTV_FGP_GPU_core.cu24
-rw-r--r--Core/regularisers_GPU/dTV_FGP_GPU_core.h2
-rw-r--r--Core/regularisers_GPU/shared.h42
-rw-r--r--Readme.md5
-rw-r--r--Wrappers/Matlab/mex_compile/compileGPU_mex.m24
-rw-r--r--Wrappers/Python/conda-recipe/build.sh2
-rw-r--r--Wrappers/Python/conda-recipe/meta.yaml2
-rwxr-xr-xWrappers/Python/conda-recipe/run_test.py67
-rw-r--r--Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py4
-rw-r--r--Wrappers/Python/src/gpu_regularisers.pyx156
-rwxr-xr-xbuild/jenkins-build.sh29
-rw-r--r--run.sh2
28 files changed, 338 insertions, 330 deletions
diff --git a/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu b/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu
index fd586ef..a4dbe70 100644
--- a/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu
+++ b/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu
@@ -18,6 +18,7 @@ limitations under the License.
*/
#include "Diffus_4thO_GPU_core.h"
+#include "shared.h"
/* CUDA implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case)
* The minimisation is performed using explicit scheme.
@@ -36,18 +37,6 @@ limitations under the License.
* [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.
*/
-#define CHECK(call) \
-{ \
- const cudaError_t error = call; \
- if (error != cudaSuccess) \
- { \
- fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
- fprintf(stderr, "code: %d, reason: %s\n", error, \
- cudaGetErrorString(error)); \
- exit(1); \
- } \
-}
-
#define BLKXSIZE 8
#define BLKYSIZE 8
#define BLKZSIZE 8
@@ -228,7 +217,7 @@ __global__ void Diffusion_update_step3D_kernel(float *Output, float *Input, floa
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
/********************* MAIN HOST FUNCTION ******************/
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z)
+extern "C" int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z)
{
int dimTotal, dev = 0;
CHECK(cudaSetDevice(dev));
@@ -242,7 +231,7 @@ extern "C" void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar,
CHECK(cudaMalloc((void**)&d_W_Lapl,dimTotal*sizeof(float)));
CHECK(cudaMemcpy(d_input,Input,dimTotal*sizeof(float),cudaMemcpyHostToDevice));
- CHECK(cudaMemcpy(d_output,Input,dimTotal*sizeof(float),cudaMemcpyHostToDevice));
+ CHECK(cudaMemcpy(d_output,Input,dimTotal*sizeof(float),cudaMemcpyHostToDevice));
if (Z == 1) {
/*2D case */
@@ -275,4 +264,5 @@ extern "C" void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar,
CHECK(cudaFree(d_input));
CHECK(cudaFree(d_output));
CHECK(cudaFree(d_W_Lapl));
+ return 0;
}
diff --git a/Core/regularisers_GPU/Diffus_4thO_GPU_core.h b/Core/regularisers_GPU/Diffus_4thO_GPU_core.h
index 6314c1f..77d5d79 100644
--- a/Core/regularisers_GPU/Diffus_4thO_GPU_core.h
+++ b/Core/regularisers_GPU/Diffus_4thO_GPU_core.h
@@ -3,6 +3,6 @@
#include "CCPiDefines.h"
#include <stdio.h>
-extern "C" CCPI_EXPORT void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z);
+extern "C" CCPI_EXPORT int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z);
#endif
diff --git a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu
index 0228bf0..87871be 100644
--- a/Core/regularisers_GPU/LLT_ROF_GPU_core.cu
+++ b/Core/regularisers_GPU/LLT_ROF_GPU_core.cu
@@ -18,6 +18,7 @@ limitations under the License.
*/
#include "LLT_ROF_GPU_core.h"
+#include "shared.h"
/* CUDA implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.
*
@@ -40,18 +41,6 @@ limitations under the License.
* [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
*/
-#define CHECK(call) \
-{ \
- const cudaError_t error = call; \
- if (error != cudaSuccess) \
- { \
- fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
- fprintf(stderr, "code: %d, reason: %s\n", error, \
- cudaGetErrorString(error)); \
- exit(1); \
- } \
-}
-
#define BLKXSIZE 8
#define BLKYSIZE 8
#define BLKZSIZE 8
@@ -403,7 +392,7 @@ __global__ void Update3D_LLT_ROF_kernel(float *U0, float *U, float *D1_LLT, floa
/************************ HOST FUNCTION ****************************/
/*******************************************************************/
-extern "C" void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z)
+extern "C" int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z)
{
// set up device
int dev = 0;
@@ -480,4 +469,5 @@ extern "C" void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, f
CHECK(cudaFree(D1_ROF));
CHECK(cudaFree(D2_ROF));
CHECK(cudaFree(D3_ROF));
+ return 0;
}
diff --git a/Core/regularisers_GPU/LLT_ROF_GPU_core.h b/Core/regularisers_GPU/LLT_ROF_GPU_core.h
index 4a19d09..a6bfcc7 100644
--- a/Core/regularisers_GPU/LLT_ROF_GPU_core.h
+++ b/Core/regularisers_GPU/LLT_ROF_GPU_core.h
@@ -3,6 +3,6 @@
#include "CCPiDefines.h"
#include <stdio.h>
-extern "C" CCPI_EXPORT void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z);
+extern "C" CCPI_EXPORT int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z);
#endif
diff --git a/Core/regularisers_GPU/NonlDiff_GPU_core.cu b/Core/regularisers_GPU/NonlDiff_GPU_core.cu
index 8048830..ff7ce4d 100644
--- a/Core/regularisers_GPU/NonlDiff_GPU_core.cu
+++ b/Core/regularisers_GPU/NonlDiff_GPU_core.cu
@@ -18,6 +18,7 @@ limitations under the License.
*/
#include "NonlDiff_GPU_core.h"
+#include "shared.h"
/* CUDA implementation of linear and nonlinear diffusion with the regularisation model [1,2] (2D/3D case)
* The minimisation is performed using explicit scheme.
@@ -38,18 +39,7 @@ limitations under the License.
* [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
*/
-#define CHECK(call) \
-{ \
- const cudaError_t error = call; \
- if (error != cudaSuccess) \
- { \
- fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
- fprintf(stderr, "code: %d, reason: %s\n", error, \
- cudaGetErrorString(error)); \
- exit(1); \
- } \
-}
-
+
#define BLKXSIZE 8
#define BLKYSIZE 8
#define BLKZSIZE 8
@@ -295,7 +285,7 @@ __global__ void NonLinearDiff3D_kernel(float *Input, float *Output, float lambda
/////////////////////////////////////////////////
// HOST FUNCTION
-extern "C" void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z)
+extern "C" int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z)
{
// set up device
int dev = 0;
@@ -350,5 +340,6 @@ extern "C" void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar,
CHECK(cudaMemcpy(Output,d_output,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost));
CHECK(cudaFree(d_input));
CHECK(cudaFree(d_output));
- //cudaDeviceReset();
+ //cudaDeviceReset();
+ return 0;
}
diff --git a/Core/regularisers_GPU/NonlDiff_GPU_core.h b/Core/regularisers_GPU/NonlDiff_GPU_core.h
index afd712b..5fe457e 100644
--- a/Core/regularisers_GPU/NonlDiff_GPU_core.h
+++ b/Core/regularisers_GPU/NonlDiff_GPU_core.h
@@ -3,6 +3,6 @@
#include "CCPiDefines.h"
#include <stdio.h>
-extern "C" CCPI_EXPORT void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z);
+extern "C" CCPI_EXPORT int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z);
#endif
diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.cu b/Core/regularisers_GPU/PatchSelect_GPU_core.cu
index f558b0f..d173124 100644
--- a/Core/regularisers_GPU/PatchSelect_GPU_core.cu
+++ b/Core/regularisers_GPU/PatchSelect_GPU_core.cu
@@ -19,7 +19,8 @@
*/
#include "PatchSelect_GPU_core.h"
-
+#include "shared.h"
+
/* CUDA implementation of non-local weight pre-calculation for non-local priors
* Weights and associated indices are stored into pre-allocated arrays and passed
* to the regulariser
@@ -36,32 +37,20 @@
* 1. AR_i - indeces of i neighbours
* 2. AR_j - indeces of j neighbours
* 3. Weights_ij - associated weights
- */
-
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
- if (cudaSuccess != err)
- {
- fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
- file, line, (int)err, cudaGetErrorString(err));
- exit(EXIT_FAILURE);
- }
-}
-
-#define BLKXSIZE 16
-#define BLKYSIZE 16
-#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
-#define M_PI 3.14159265358979323846
-#define EPS 1.0e-8
+ */
+
+
+#define BLKXSIZE 16
+#define BLKYSIZE 16
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+#define M_PI 3.14159265358979323846
+#define EPS 1.0e-8
#define CONSTVECSIZE5 121
#define CONSTVECSIZE7 225
#define CONSTVECSIZE9 361
#define CONSTVECSIZE11 529
#define CONSTVECSIZE13 729
-
+
__device__ void swap(float *xp, float *yp)
{
float temp = *xp;
@@ -75,9 +64,9 @@ __device__ void swapUS(unsigned short *xp, unsigned short *yp)
*yp = temp;
}
-/********************************************************************************/
-__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+/********************************************************************************/
+__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
float normsum;
@@ -85,10 +74,10 @@ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigne
float Weight_Vec[CONSTVECSIZE5];
unsigned short ind_i[CONSTVECSIZE5];
unsigned short ind_j[CONSTVECSIZE5];
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
long index = i*M+j;
counter = 0;
@@ -139,10 +128,10 @@ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigne
H_j_d[index2] = ind_j[x];
Weights_d[index2] = Weight_Vec[x];
}
-}
-/********************************************************************************/
-__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+}
+/********************************************************************************/
+__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
float normsum;
@@ -150,10 +139,10 @@ __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigne
float Weight_Vec[CONSTVECSIZE7];
unsigned short ind_i[CONSTVECSIZE7];
unsigned short ind_j[CONSTVECSIZE7];
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
long index = i*M+j;
counter = 0;
@@ -204,9 +193,9 @@ __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigne
H_j_d[index2] = ind_j[x];
Weights_d[index2] = Weight_Vec[x];
}
-}
-__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+}
+__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
float normsum;
@@ -214,10 +203,10 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne
float Weight_Vec[CONSTVECSIZE9];
unsigned short ind_i[CONSTVECSIZE9];
unsigned short ind_j[CONSTVECSIZE9];
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
long index = i*M+j;
counter = 0;
@@ -269,8 +258,8 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne
Weights_d[index2] = Weight_Vec[x];
}
}
-__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
float normsum;
@@ -278,10 +267,10 @@ __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsign
float Weight_Vec[CONSTVECSIZE11];
unsigned short ind_i[CONSTVECSIZE11];
unsigned short ind_j[CONSTVECSIZE11];
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
long index = i*M+j;
counter = 0;
@@ -333,8 +322,8 @@ __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsign
Weights_d[index2] = Weight_Vec[x];
}
}
-__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
-{
+__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
float normsum;
@@ -342,10 +331,10 @@ __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsign
float Weight_Vec[CONSTVECSIZE13];
unsigned short ind_i[CONSTVECSIZE13];
unsigned short ind_j[CONSTVECSIZE13];
-
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
-
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
long index = i*M+j;
counter = 0;
@@ -398,29 +387,29 @@ __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsign
}
}
-
+
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
/********************* MAIN HOST FUNCTION ******************/
-/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h)
-{
- int deviceCount = -1; // number of devices
- cudaGetDeviceCount(&deviceCount);
- if (deviceCount == 0) {
- fprintf(stderr, "No CUDA devices found\n");
- return;
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+extern "C" int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h)
+{
+ int deviceCount = -1; // number of devices
+ cudaGetDeviceCount(&deviceCount);
+ if (deviceCount == 0) {
+ fprintf(stderr, "No CUDA devices found\n");
+ return -1;
}
-
- int SearchW_full, SimilW_full, counterG, i, j;
+
+ int SearchW_full, SimilW_full, counterG, i, j;
float *Ad, *Weights_d, h2, *Eucl_Vec, *Eucl_Vec_d;
- unsigned short *H_i_d, *H_j_d;
+ unsigned short *H_i_d, *H_j_d;
h2 = h*h;
-
- dim3 dimBlock(BLKXSIZE,BLKYSIZE);
- dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));
-
- SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window size */
- SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1); /* the full similarity window size */
+
+ dim3 dimBlock(BLKXSIZE,BLKYSIZE);
+ dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));
+
+ SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window size */
+ SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1); /* the full similarity window size */
/* generate a 2D Gaussian kernel for NLM procedure */
Eucl_Vec = (float*) calloc (SimilW_full,sizeof(float));
@@ -432,16 +421,16 @@ extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned sho
}} /*main neighb loop */
- /*allocate space on the device*/
- checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) );
+ /*allocate space on the device*/
+ checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) );
checkCudaErrors( cudaMalloc((void**)&H_i_d, N*M*NumNeighb*sizeof(unsigned short)) );
checkCudaErrors( cudaMalloc((void**)&H_j_d, N*M*NumNeighb*sizeof(unsigned short)) );
checkCudaErrors( cudaMalloc((void**)&Weights_d, N*M*NumNeighb*sizeof(float)) );
- checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d, SimilW_full*sizeof(float)) );
-
- /* copy data from the host to the device */
+ checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d, SimilW_full*sizeof(float)) );
+
+ /* copy data from the host to the device */
checkCudaErrors( cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice) );
- checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );
+ checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );
/********************** Run CUDA kernel here ********************/
if (SearchWindow == 5) IndexSelect2D_5_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
@@ -450,19 +439,20 @@ extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned sho
else if (SearchWindow == 11) IndexSelect2D_11_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
else if (SearchWindow == 13) IndexSelect2D_13_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
else {
- fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n");
- return;}
- checkCudaErrors(cudaPeekAtLastError() );
- checkCudaErrors(cudaDeviceSynchronize());
- /***************************************************************/
-
+ fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n");
+ return -1;}
+ checkCudaErrors(cudaPeekAtLastError() );
+ checkCudaErrors(cudaDeviceSynchronize());
+ /***************************************************************/
+
checkCudaErrors(cudaMemcpy(H_i, H_i_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) );
checkCudaErrors(cudaMemcpy(H_j, H_j_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) );
checkCudaErrors(cudaMemcpy(Weights, Weights_d, N*M*NumNeighb*sizeof(float),cudaMemcpyDeviceToHost) );
-
+
cudaFree(Ad);
cudaFree(H_i_d);
cudaFree(H_j_d);
cudaFree(Weights_d);
- cudaFree(Eucl_Vec_d);
+ cudaFree(Eucl_Vec_d);
+ return 0;
}
diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.h b/Core/regularisers_GPU/PatchSelect_GPU_core.h
index d20fe9f..8c124d3 100644
--- a/Core/regularisers_GPU/PatchSelect_GPU_core.h
+++ b/Core/regularisers_GPU/PatchSelect_GPU_core.h
@@ -3,6 +3,6 @@
#include "CCPiDefines.h"
#include <stdio.h>
-extern "C" CCPI_EXPORT void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h);
+extern "C" CCPI_EXPORT int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h);
#endif
diff --git a/Core/regularisers_GPU/TGV_GPU_core.cu b/Core/regularisers_GPU/TGV_GPU_core.cu
index 3081011..73232a9 100644
--- a/Core/regularisers_GPU/TGV_GPU_core.cu
+++ b/Core/regularisers_GPU/TGV_GPU_core.cu
@@ -18,6 +18,7 @@ limitations under the License.
*/
#include "TGV_GPU_core.h"
+#include "shared.h"
/* CUDA implementation of Primal-Dual denoising method for
* Total Generilized Variation (TGV)-L2 model [1] (2D case only)
@@ -36,19 +37,6 @@ limitations under the License.
* References:
* [1] K. Bredies "Total Generalized Variation"
*/
-
-#define CHECK(call) \
-{ \
- const cudaError_t error = call; \
- if (error != cudaSuccess) \
- { \
- fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
- fprintf(stderr, "code: %d, reason: %s\n", error, \
- cudaGetErrorString(error)); \
- exit(1); \
- } \
-}
-
#define BLKXSIZE2D 16
#define BLKYSIZE2D 16
@@ -239,7 +227,7 @@ __global__ void newU_kernel(float *U, float *U_old, int N, int M, int num_total)
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
/********************* MAIN HOST FUNCTION ******************/
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY)
+extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY)
{
int dimTotal, dev = 0;
CHECK(cudaSetDevice(dev));
@@ -320,4 +308,5 @@ extern "C" void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, fl
CHECK(cudaFree(V2));
CHECK(cudaFree(V1_old));
CHECK(cudaFree(V2_old));
+ return 0;
}
diff --git a/Core/regularisers_GPU/TGV_GPU_core.h b/Core/regularisers_GPU/TGV_GPU_core.h
index 663378f..5a4eb76 100644
--- a/Core/regularisers_GPU/TGV_GPU_core.h
+++ b/Core/regularisers_GPU/TGV_GPU_core.h
@@ -3,6 +3,6 @@
#include "CCPiDefines.h"
#include <stdio.h>
-extern "C" CCPI_EXPORT void TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY);
+extern "C" CCPI_EXPORT int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY);
#endif
diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.cu b/Core/regularisers_GPU/TV_FGP_GPU_core.cu
index eab7a44..b371c5d 100755
--- a/Core/regularisers_GPU/TV_FGP_GPU_core.cu
+++ b/Core/regularisers_GPU/TV_FGP_GPU_core.cu
@@ -18,6 +18,7 @@ limitations under the License.
*/
#include "TV_FGP_GPU_core.h"
+#include "shared.h"
#include <thrust/device_vector.h>
#include <thrust/transform_reduce.h>
@@ -39,18 +40,6 @@ limitations under the License.
* [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
*/
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
- if (cudaSuccess != err)
- {
- fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
- file, line, (int)err, cudaGetErrorString(err));
- exit(EXIT_FAILURE);
- }
-}
#define BLKXSIZE2D 16
#define BLKYSIZE2D 16
@@ -354,13 +343,13 @@ __global__ void FGPResidCalc3D_kernel(float *Input1, float *Input2, float* Outpu
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
////////////MAIN HOST FUNCTION ///////////////
-extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
{
int deviceCount = -1; // number of devices
cudaGetDeviceCount(&deviceCount);
if (deviceCount == 0) {
fprintf(stderr, "No CUDA devices found\n");
- return;
+ return -1;
}
int count = 0, i;
@@ -570,5 +559,6 @@ extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, in
cudaFree(R2);
cudaFree(R3);
}
- //cudaDeviceReset();
+ //cudaDeviceReset();
+ return 0;
}
diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.h b/Core/regularisers_GPU/TV_FGP_GPU_core.h
index 107d243..b28cdf3 100755
--- a/Core/regularisers_GPU/TV_FGP_GPU_core.h
+++ b/Core/regularisers_GPU/TV_FGP_GPU_core.h
@@ -5,6 +5,6 @@
#ifndef _TV_FGP_GPU_
#define _TV_FGP_GPU_
-extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
#endif
diff --git a/Core/regularisers_GPU/TV_ROF_GPU_core.cu b/Core/regularisers_GPU/TV_ROF_GPU_core.cu
index 57de63d..76f5be9 100755
--- a/Core/regularisers_GPU/TV_ROF_GPU_core.cu
+++ b/Core/regularisers_GPU/TV_ROF_GPU_core.cu
@@ -35,18 +35,7 @@ limitations under the License.
*
* D. Kazantsev, 2016-18
*/
-
-#define CHECK(call) \
-{ \
- const cudaError_t error = call; \
- if (error != cudaSuccess) \
- { \
- fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
- fprintf(stderr, "code: %d, reason: %s\n", error, \
- cudaGetErrorString(error)); \
- exit(1); \
- } \
-}
+#include "shared.h"
#define BLKXSIZE 8
#define BLKYSIZE 8
@@ -304,7 +293,7 @@ __host__ __device__ int sign (float x)
/////////////////////////////////////////////////
// HOST FUNCTION
-extern "C" void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z)
+extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z)
{
// set up device
int dev = 0;
@@ -364,5 +353,6 @@ extern "C" void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, in
CHECK(cudaFree(d_update));
CHECK(cudaFree(d_D1));
CHECK(cudaFree(d_D2));
- //cudaDeviceReset();
+ //cudaDeviceReset();
+ return 0;
}
diff --git a/Core/regularisers_GPU/TV_ROF_GPU_core.h b/Core/regularisers_GPU/TV_ROF_GPU_core.h
index d772aba..3a09296 100755
--- a/Core/regularisers_GPU/TV_ROF_GPU_core.h
+++ b/Core/regularisers_GPU/TV_ROF_GPU_core.h
@@ -3,6 +3,6 @@
#include "CCPiDefines.h"
#include <stdio.h>
-extern "C" CCPI_EXPORT void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
+extern "C" CCPI_EXPORT int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
#endif
diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.cu b/Core/regularisers_GPU/TV_SB_GPU_core.cu
index 68b9221..1f494ee 100755
--- a/Core/regularisers_GPU/TV_SB_GPU_core.cu
+++ b/Core/regularisers_GPU/TV_SB_GPU_core.cu
@@ -18,6 +18,7 @@ limitations under the License.
*/
#include "TV_SB_GPU_core.h"
+#include "shared.h"
#include <thrust/device_vector.h>
#include <thrust/transform_reduce.h>
@@ -39,17 +40,6 @@ limitations under the License.
*/
// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
- if (cudaSuccess != err)
- {
- fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
- file, line, (int)err, cudaGetErrorString(err));
- exit(EXIT_FAILURE);
- }
-}
#define BLKXSIZE2D 16
#define BLKYSIZE2D 16
@@ -363,13 +353,13 @@ __global__ void SBResidCalc3D_kernel(float *Input1, float *Input2, float* Output
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
/********************* MAIN HOST FUNCTION ******************/
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ)
+extern "C" int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ)
{
int deviceCount = -1; // number of devices
cudaGetDeviceCount(&deviceCount);
if (deviceCount == 0) {
fprintf(stderr, "No CUDA devices found\n");
- return;
+ return -1;
}
int ll, DimTotal;
@@ -557,5 +547,6 @@ extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter,
cudaFree(By);
cudaFree(Bz);
}
- //cudaDeviceReset();
+ //cudaDeviceReset();
+ return 0;
}
diff --git a/Core/regularisers_GPU/TV_SB_GPU_core.h b/Core/regularisers_GPU/TV_SB_GPU_core.h
index bdc9219..d44ab77 100755
--- a/Core/regularisers_GPU/TV_SB_GPU_core.h
+++ b/Core/regularisers_GPU/TV_SB_GPU_core.h
@@ -5,6 +5,6 @@
#ifndef _SB_TV_GPU_
#define _SB_TV_GPU_
-extern "C" void TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
+extern "C" int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
#endif
diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
index 80a78da..7503ec7 100644
--- a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
+++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
@@ -16,7 +16,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
-
+#include "shared.h"
#include "dTV_FGP_GPU_core.h"
#include <thrust/device_vector.h>
#include <thrust/transform_reduce.h>
@@ -45,19 +45,6 @@ limitations under the License.
*/
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
-
-inline void __checkCudaErrors(cudaError err, const char *file, const int line)
-{
- if (cudaSuccess != err)
- {
- fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
- file, line, (int)err, cudaGetErrorString(err));
- exit(EXIT_FAILURE);
- }
-}
-
#define BLKXSIZE2D 16
#define BLKYSIZE2D 16
@@ -468,13 +455,13 @@ __global__ void dTVnonneg3D_kernel(float* Output, int N, int M, int Z, int num_t
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
////////////MAIN HOST FUNCTION ///////////////
-extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+extern "C" int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
{
int deviceCount = -1; // number of devices
cudaGetDeviceCount(&deviceCount);
if (deviceCount == 0) {
fprintf(stderr, "No CUDA devices found\n");
- return;
+ return -1;
}
int count = 0, i;
@@ -748,6 +735,7 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f
cudaFree(InputRef_y);
cudaFree(InputRef_z);
cudaFree(d_InputRef);
- }
- //cudaDeviceReset();
+ }
+ //cudaDeviceReset();
+ return 0;
}
diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.h b/Core/regularisers_GPU/dTV_FGP_GPU_core.h
index b906636..9020b1a 100644
--- a/Core/regularisers_GPU/dTV_FGP_GPU_core.h
+++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.h
@@ -5,6 +5,6 @@
#ifndef _dTV_FGP_GPU_
#define _dTV_FGP_GPU_
-extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+extern "C" int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
#endif
diff --git a/Core/regularisers_GPU/shared.h b/Core/regularisers_GPU/shared.h
new file mode 100644
index 0000000..fe98cd6
--- /dev/null
+++ b/Core/regularisers_GPU/shared.h
@@ -0,0 +1,42 @@
+/*shared macros*/
+
+
+/*checks CUDA call, should be used in functions returning <int> value
+if error happens, writes to standard error and explicitly returns -1*/
+#define CHECK(call) \
+{ \
+ const cudaError_t error = call; \
+ if (error != cudaSuccess) \
+ { \
+ fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
+ fprintf(stderr, "code: %d, reason: %s\n", error, \
+ cudaGetErrorString(error)); \
+ return -1; \
+ } \
+}
+
+// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
+#define checkCudaErrors(call) \
+{ \
+ const cudaError_t error = call; \
+ if (error != cudaSuccess) \
+ { \
+ fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
+ fprintf(stderr, "code: %d, reason: %s\n", error, \
+ cudaGetErrorString(error)); \
+ return -1; \
+ } \
+}
+/*#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
+
+inline void __checkCudaErrors(cudaError err, const char *file, const int line)
+{
+ if (cudaSuccess != err)
+ {
+ fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
+ file, line, (int)err, cudaGetErrorString(err));
+ return;
+ }
+}
+*/
+
diff --git a/Readme.md b/Readme.md
index 089c9fe..cdf823d 100644
--- a/Readme.md
+++ b/Readme.md
@@ -69,7 +69,6 @@ Here an example of build on Linux (see also `run.sh` for additional info):
```bash
git clone https://github.com/vais-ral/CCPi-Regularisation-Toolkit.git
-mkdir build
cd build
cmake .. -DCONDA_BUILD=OFF -DBUILD_MATLAB_WRAPPER=ON -DBUILD_PYTHON_WRAPPER=ON -DBUILD_CUDA=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./install
make install
@@ -88,7 +87,7 @@ conda install ccpi-regulariser -c ccpi -c conda-forge
#### Python (conda-build)
```
- export CIL_VERSION=0.10.2
+ export CIL_VERSION=0.10.3
conda build Wrappers/Python/conda-recipe --numpy 1.12 --python 3.5
conda install ccpi-regulariser=${CIL_VERSION} --use-local --force
cd demos/
@@ -124,7 +123,7 @@ On Windows the `dll` and the mex modules must reside in the same directory. It i
addpath(/path/to/library);
```
-#### Legacy Matlab installation
+#### Legacy Matlab installation (partly supported, please use Cmake)
```
cd /Wrappers/Matlab/mex_compile
diff --git a/Wrappers/Matlab/mex_compile/compileGPU_mex.m b/Wrappers/Matlab/mex_compile/compileGPU_mex.m
index e0311ea..dd1475c 100644
--- a/Wrappers/Matlab/mex_compile/compileGPU_mex.m
+++ b/Wrappers/Matlab/mex_compile/compileGPU_mex.m
@@ -7,11 +7,10 @@
% In the code bellow we provide a full explicit path to nvcc compiler
% ! paths to matlab and CUDA sdk can be different, modify accordingly !
-% Tested on Ubuntu 16.04/MATLAB 2016b/cuda7.5/gcc4.9
-
-% Installation HAS NOT been tested on Windows, please contact me if you'll be able to
-% install software on Windows and I gratefully include it into the master release.
+% Tested on Ubuntu 18.04/MATLAB 2016b/cuda10.0/gcc7.3
+% Installation HAS NOT been tested on Windows, please you Cmake build or
+% modify the code bellow accordingly
fsep = '/';
pathcopyFrom = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'regularisers_GPU'], 1i);
@@ -28,44 +27,45 @@ fprintf('%s \n', '<<<<<<<<<<<Compiling GPU regularisers (CUDA)>>>>>>>>>>>>>');
fprintf('%s \n', 'Compiling ROF-TV...');
!/usr/local/cuda/bin/nvcc -O0 -c TV_ROF_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
-mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu ROF_TV_GPU.cpp TV_ROF_GPU_core.o
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu ROF_TV_GPU.cpp TV_ROF_GPU_core.o
movefile('ROF_TV_GPU.mex*',Pathmove);
fprintf('%s \n', 'Compiling FGP-TV...');
!/usr/local/cuda/bin/nvcc -O0 -c TV_FGP_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
-mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu FGP_TV_GPU.cpp TV_FGP_GPU_core.o
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu FGP_TV_GPU.cpp TV_FGP_GPU_core.o
movefile('FGP_TV_GPU.mex*',Pathmove);
fprintf('%s \n', 'Compiling SB-TV...');
!/usr/local/cuda/bin/nvcc -O0 -c TV_SB_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
-mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu SB_TV_GPU.cpp TV_SB_GPU_core.o
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu SB_TV_GPU.cpp TV_SB_GPU_core.o
movefile('SB_TV_GPU.mex*',Pathmove);
fprintf('%s \n', 'Compiling TGV...');
!/usr/local/cuda/bin/nvcc -O0 -c TGV_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
-mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu TGV_GPU.cpp TGV_GPU_core.o
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu TGV_GPU.cpp TGV_GPU_core.o
movefile('TGV_GPU.mex*',Pathmove);
fprintf('%s \n', 'Compiling dFGP-TV...');
!/usr/local/cuda/bin/nvcc -O0 -c dTV_FGP_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
-mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu FGP_dTV_GPU.cpp dTV_FGP_GPU_core.o
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu FGP_dTV_GPU.cpp dTV_FGP_GPU_core.o
movefile('FGP_dTV_GPU.mex*',Pathmove);
fprintf('%s \n', 'Compiling NonLinear Diffusion...');
!/usr/local/cuda/bin/nvcc -O0 -c NonlDiff_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
-mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu NonlDiff_GPU.cpp NonlDiff_GPU_core.o
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu NonlDiff_GPU.cpp NonlDiff_GPU_core.o
movefile('NonlDiff_GPU.mex*',Pathmove);
fprintf('%s \n', 'Compiling Anisotropic diffusion of higher order...');
!/usr/local/cuda/bin/nvcc -O0 -c Diffus_4thO_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
-mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu Diffusion_4thO_GPU.cpp Diffus_4thO_GPU_core.o
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu Diffusion_4thO_GPU.cpp Diffus_4thO_GPU_core.o
movefile('Diffusion_4thO_GPU.mex*',Pathmove);
fprintf('%s \n', 'Compiling ROF-LLT...');
!/usr/local/cuda/bin/nvcc -O0 -c LLT_ROF_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
-mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu LLT_ROF_GPU.cpp LLT_ROF_GPU_core.o
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu LLT_ROF_GPU.cpp LLT_ROF_GPU_core.o
movefile('LLT_ROF_GPU.mex*',Pathmove);
+
delete TV_ROF_GPU_core* TV_FGP_GPU_core* TV_SB_GPU_core* dTV_FGP_GPU_core* NonlDiff_GPU_core* Diffus_4thO_GPU_core* TGV_GPU_core* LLT_ROF_GPU_core* CCPiDefines.h
fprintf('%s \n', 'All successfully compiled!');
diff --git a/Wrappers/Python/conda-recipe/build.sh b/Wrappers/Python/conda-recipe/build.sh
index 54bc8e2..eec7c2f 100644
--- a/Wrappers/Python/conda-recipe/build.sh
+++ b/Wrappers/Python/conda-recipe/build.sh
@@ -4,7 +4,7 @@ cp -rv "$RECIPE_DIR/../.." "$SRC_DIR/ccpi"
cp -rv "$RECIPE_DIR/../../../Core" "$SRC_DIR/Core"
cd $SRC_DIR
-
+##cuda=off
cmake -G "Unix Makefiles" $RECIPE_DIR/../../../ -DBUILD_PYTHON_WRAPPER=ON -DCONDA_BUILD=ON -DBUILD_CUDA=ON -DCMAKE_BUILD_TYPE="Release" -DLIBRARY_LIB=$CONDA_PREFIX/lib -DLIBRARY_INC=$CONDA_PREFIX -DCMAKE_INSTALL_PREFIX=$PREFIX
diff --git a/Wrappers/Python/conda-recipe/meta.yaml b/Wrappers/Python/conda-recipe/meta.yaml
index ed73165..808493e 100644
--- a/Wrappers/Python/conda-recipe/meta.yaml
+++ b/Wrappers/Python/conda-recipe/meta.yaml
@@ -1,6 +1,6 @@
package:
name: ccpi-regulariser
- version: 0.10.2
+ version: 0.10.3
build:
diff --git a/Wrappers/Python/conda-recipe/run_test.py b/Wrappers/Python/conda-recipe/run_test.py
index 499ae7f..cfb3f53 100755
--- a/Wrappers/Python/conda-recipe/run_test.py
+++ b/Wrappers/Python/conda-recipe/run_test.py
@@ -2,7 +2,7 @@ import unittest
import numpy as np
import os
import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, DIFF4th
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, DIFF4th
from PIL import Image
class TiffReader(object):
@@ -37,6 +37,8 @@ class TestRegularisers(unittest.TestCase):
def test_ROF_TV_CPU_vs_GPU(self):
+ #print ("tomas debug test function")
+ print(__name__)
filename = os.path.join("lena_gray_512.tif")
plt = TiffReader()
# read image
@@ -63,11 +65,11 @@ class TestRegularisers(unittest.TestCase):
# set parameters
pars = {'algorithm': ROF_TV, \
- 'input' : u0,\
- 'regularisation_parameter':0.04,\
- 'number_of_iterations': 1000,\
- 'time_marching_parameter': 0.0001
- }
+ 'input' : u0,\
+ 'regularisation_parameter':0.04,\
+ 'number_of_iterations': 2500,\
+ 'time_marching_parameter': 0.00002
+ }
print ("#############ROF TV CPU####################")
start_time = timeit.default_timer()
rof_cpu = ROF_TV(pars['input'],
@@ -88,8 +90,8 @@ class TestRegularisers(unittest.TestCase):
pars['number_of_iterations'],
pars['time_marching_parameter'],'gpu')
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
+
rms = rmse(Im, rof_gpu)
pars['rmse'] = rms
pars['algorithm'] = ROF_TV
@@ -101,10 +103,10 @@ class TestRegularisers(unittest.TestCase):
diff_im = np.zeros(np.shape(rof_cpu))
diff_im = abs(rof_cpu - rof_gpu)
diff_im[diff_im > tolerance] = 1
-
self.assertLessEqual(diff_im.sum() , 1)
def test_FGP_TV_CPU_vs_GPU(self):
+ print(__name__)
filename = os.path.join("lena_gray_512.tif")
plt = TiffReader()
# read image
@@ -169,10 +171,10 @@ class TestRegularisers(unittest.TestCase):
pars['methodTV'],
pars['nonneg'],
pars['printingOut'],'gpu')
-
+
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
+
rms = rmse(Im, fgp_gpu)
pars['rmse'] = rms
pars['algorithm'] = FGP_TV
@@ -189,6 +191,7 @@ class TestRegularisers(unittest.TestCase):
self.assertLessEqual(diff_im.sum() , 1)
def test_SB_TV_CPU_vs_GPU(self):
+ print(__name__)
filename = os.path.join("lena_gray_512.tif")
plt = TiffReader()
# read image
@@ -251,10 +254,10 @@ class TestRegularisers(unittest.TestCase):
pars['tolerance_constant'],
pars['methodTV'],
pars['printingOut'],'gpu')
-
+
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
+
rms = rmse(Im, sb_gpu)
pars['rmse'] = rms
pars['algorithm'] = SB_TV
@@ -269,6 +272,7 @@ class TestRegularisers(unittest.TestCase):
self.assertLessEqual(diff_im.sum(), 1)
def test_TGV_CPU_vs_GPU(self):
+ print(__name__)
filename = os.path.join("lena_gray_512.tif")
plt = TiffReader()
# read image
@@ -329,10 +333,10 @@ class TestRegularisers(unittest.TestCase):
pars['alpha0'],
pars['number_of_iterations'],
pars['LipshitzConstant'],'gpu')
-
+
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
+
rms = rmse(Im, tgv_gpu)
pars['rmse'] = rms
pars['algorithm'] = TGV
@@ -347,6 +351,7 @@ class TestRegularisers(unittest.TestCase):
self.assertLessEqual(diff_im.sum() , 1)
def test_LLT_ROF_CPU_vs_GPU(self):
+ print(__name__)
filename = os.path.join("lena_gray_512.tif")
plt = TiffReader()
# read image
@@ -405,8 +410,8 @@ class TestRegularisers(unittest.TestCase):
pars['time_marching_parameter'],'gpu')
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
+
rms = rmse(Im, lltrof_gpu)
pars['rmse'] = rms
pars['algorithm'] = LLT_ROF
@@ -421,6 +426,7 @@ class TestRegularisers(unittest.TestCase):
self.assertLessEqual(diff_im.sum(), 1)
def test_NDF_CPU_vs_GPU(self):
+ print(__name__)
filename = os.path.join("lena_gray_512.tif")
plt = TiffReader()
# read image
@@ -483,8 +489,7 @@ class TestRegularisers(unittest.TestCase):
pars['penalty_type'],'gpu')
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
rms = rmse(Im, ndf_gpu)
pars['rmse'] = rms
pars['algorithm'] = NDF
@@ -557,8 +562,7 @@ class TestRegularisers(unittest.TestCase):
pars['time_marching_parameter'], 'gpu')
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
rms = rmse(Im, diff4th_gpu)
pars['rmse'] = rms
pars['algorithm'] = DIFF4th
@@ -603,8 +607,8 @@ class TestRegularisers(unittest.TestCase):
'input' : u0,\
'refdata' : u_ref,\
'regularisation_parameter':0.04, \
- 'number_of_iterations' :2000 ,\
- 'tolerance_constant':1e-06,\
+ 'number_of_iterations' :1000 ,\
+ 'tolerance_constant':1e-07,\
'eta_const':0.2,\
'methodTV': 0 ,\
'nonneg': 0 ,\
@@ -643,8 +647,7 @@ class TestRegularisers(unittest.TestCase):
pars['nonneg'],
pars['printingOut'],'gpu')
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
rms = rmse(Im, fgp_dtv_gpu)
pars['rmse'] = rms
pars['algorithm'] = FGP_dTV
@@ -765,8 +768,8 @@ class TestRegularisers(unittest.TestCase):
pars_rof_tv['number_of_iterations'],
pars_rof_tv['time_marching_parameter'],'gpu')
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
+
rms_rof = rmse(Im, rof_gpu)
# now compare obtained rms with the expected value
self.assertLess(abs(rms_rof-rms_rof_exp) , tolerance)
@@ -806,10 +809,10 @@ class TestRegularisers(unittest.TestCase):
pars_fgp_tv['nonneg'],
pars_fgp_tv['printingOut'],'gpu')
except ValueError as ve:
- self.assertTrue(True)
- return
+ self.skipTest("Results not comparable. GPU computing error.")
rms_fgp = rmse(Im, fgp_gpu)
# now compare obtained rms with the expected value
+
self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance)
if __name__ == '__main__':
diff --git a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
index 616eab0..6529b5c 100644
--- a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
+++ b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
@@ -656,8 +656,8 @@ pars = {'algorithm' : FGP_dTV, \
'input' : u0,\
'refdata' : u_ref,\
'regularisation_parameter':0.04, \
- 'number_of_iterations' :2000 ,\
- 'tolerance_constant':1e-06,\
+ 'number_of_iterations' :1000 ,\
+ 'tolerance_constant':1e-07,\
'eta_const':0.2,\
'methodTV': 0 ,\
'nonneg': 0 ,\
diff --git a/Wrappers/Python/src/gpu_regularisers.pyx b/Wrappers/Python/src/gpu_regularisers.pyx
index 302727e..2b97865 100644
--- a/Wrappers/Python/src/gpu_regularisers.pyx
+++ b/Wrappers/Python/src/gpu_regularisers.pyx
@@ -18,15 +18,17 @@ import cython
import numpy as np
cimport numpy as np
-cdef extern void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
-cdef extern void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z);
-cdef extern void TV_SB_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int printM, int N, int M, int Z);
-cdef extern void TGV_GPU_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY);
-cdef extern void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z);
-cdef extern void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z);
-cdef extern void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z);
-cdef extern void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z);
-cdef extern void PatchSelect_GPU_main(float *Input, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h);
+CUDAErrorMessage = 'CUDA error'
+
+cdef extern int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
+cdef extern int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z);
+cdef extern int TV_SB_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int printM, int N, int M, int Z);
+cdef extern int TGV_GPU_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY);
+cdef extern int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z);
+cdef extern int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z);
+cdef extern int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z);
+cdef extern int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z);
+cdef extern int PatchSelect_GPU_main(float *Input, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h);
# Total-variation Rudin-Osher-Fatemi (ROF)
def TV_ROF_GPU(inputData,
@@ -186,15 +188,16 @@ def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
np.zeros([dims[0],dims[1]], dtype='float32')
- # Running CUDA code here
- TV_ROF_GPU_main(
+ # Running CUDA code here
+ if (TV_ROF_GPU_main(
&inputData[0,0], &outputData[0,0],
regularisation_parameter,
iterations ,
time_marching_parameter,
- dims[1], dims[0], 1);
-
- return outputData
+ dims[1], dims[0], 1)==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
float regularisation_parameter,
@@ -210,14 +213,15 @@ def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
# Running CUDA code here
- TV_ROF_GPU_main(
+ if (TV_ROF_GPU_main(
&inputData[0,0,0], &outputData[0,0,0],
regularisation_parameter,
iterations ,
time_marching_parameter,
- dims[2], dims[1], dims[0]);
-
- return outputData
+ dims[2], dims[1], dims[0])==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
#****************************************************************#
#********************** Total-variation FGP *********************#
#****************************************************************#
@@ -238,16 +242,18 @@ def FGPTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
np.zeros([dims[0],dims[1]], dtype='float32')
# Running CUDA code here
- TV_FGP_GPU_main(&inputData[0,0], &outputData[0,0],
+ if (TV_FGP_GPU_main(&inputData[0,0], &outputData[0,0],
regularisation_parameter,
iterations,
tolerance_param,
methodTV,
nonneg,
printM,
- dims[1], dims[0], 1);
-
- return outputData
+ dims[1], dims[0], 1)==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
float regularisation_parameter,
@@ -266,16 +272,18 @@ def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
# Running CUDA code here
- TV_FGP_GPU_main(&inputData[0,0,0], &outputData[0,0,0],
+ if (TV_FGP_GPU_main(&inputData[0,0,0], &outputData[0,0,0],
regularisation_parameter ,
iterations,
tolerance_param,
methodTV,
nonneg,
printM,
- dims[2], dims[1], dims[0]);
-
- return outputData
+ dims[2], dims[1], dims[0])==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
#***************************************************************#
#********************** Total-variation SB *********************#
#***************************************************************#
@@ -295,15 +303,17 @@ def SBTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
np.zeros([dims[0],dims[1]], dtype='float32')
# Running CUDA code here
- TV_SB_GPU_main(&inputData[0,0], &outputData[0,0],
+ if (TV_SB_GPU_main(&inputData[0,0], &outputData[0,0],
regularisation_parameter,
iterations,
tolerance_param,
methodTV,
printM,
- dims[1], dims[0], 1);
-
- return outputData
+ dims[1], dims[0], 1)==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
def SBTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
float regularisation_parameter,
@@ -321,15 +331,17 @@ def SBTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
# Running CUDA code here
- TV_SB_GPU_main(&inputData[0,0,0], &outputData[0,0,0],
+ if (TV_SB_GPU_main(&inputData[0,0,0], &outputData[0,0,0],
regularisation_parameter ,
iterations,
tolerance_param,
methodTV,
printM,
- dims[2], dims[1], dims[0]);
-
- return outputData
+ dims[2], dims[1], dims[0])==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
#***************************************************************#
#************************ LLT-ROF model ************************#
@@ -349,8 +361,11 @@ def LLT_ROF_GPU2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
np.zeros([dims[0],dims[1]], dtype='float32')
# Running CUDA code here
- LLT_ROF_GPU_main(&inputData[0,0], &outputData[0,0],regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[1],dims[0],1);
- return outputData
+ if (LLT_ROF_GPU_main(&inputData[0,0], &outputData[0,0],regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[1],dims[0],1)==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
def LLT_ROF_GPU3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
float regularisation_parameterROF,
@@ -367,8 +382,11 @@ def LLT_ROF_GPU3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
# Running CUDA code here
- LLT_ROF_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[2], dims[1], dims[0]);
- return outputData
+ if (LLT_ROF_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[2], dims[1], dims[0])==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
#***************************************************************#
@@ -389,13 +407,16 @@ def TGV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
np.zeros([dims[0],dims[1]], dtype='float32')
#/* Run TGV iterations for 2D data */
- TGV_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter,
+ if (TGV_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter,
alpha1,
alpha0,
iterationsNumb,
LipshitzConst,
- dims[1],dims[0])
- return outputData
+ dims[1],dims[0])==0):
+ return outputData
+ else:
+ raise ValueError(CUDAErrorMessage);
+
#****************************************************************#
#**************Directional Total-variation FGP ******************#
@@ -419,7 +440,7 @@ def FGPdTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
np.zeros([dims[0],dims[1]], dtype='float32')
# Running CUDA code here
- dTV_FGP_GPU_main(&inputData[0,0], &refdata[0,0], &outputData[0,0],
+ if (dTV_FGP_GPU_main(&inputData[0,0], &refdata[0,0], &outputData[0,0],
regularisation_parameter,
iterations,
tolerance_param,
@@ -427,9 +448,11 @@ def FGPdTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
methodTV,
nonneg,
printM,
- dims[1], dims[0], 1);
-
- return outputData
+ dims[1], dims[0], 1)==0):
+ return outputData
+ else:
+ raise ValueError(CUDAErrorMessage);
+
def FGPdTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
np.ndarray[np.float32_t, ndim=3, mode="c"] refdata,
@@ -450,7 +473,7 @@ def FGPdTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
# Running CUDA code here
- dTV_FGP_GPU_main(&inputData[0,0,0], &refdata[0,0,0], &outputData[0,0,0],
+ if (dTV_FGP_GPU_main(&inputData[0,0,0], &refdata[0,0,0], &outputData[0,0,0],
regularisation_parameter ,
iterations,
tolerance_param,
@@ -458,8 +481,11 @@ def FGPdTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
methodTV,
nonneg,
printM,
- dims[2], dims[1], dims[0]);
- return outputData
+ dims[2], dims[1], dims[0])==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
#****************************************************************#
#***************Nonlinear (Isotropic) Diffusion******************#
@@ -483,8 +509,11 @@ def NDF_GPU_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
# Run Nonlinear Diffusion iterations for 2D data
# Running CUDA code here
- NonlDiff_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[1], dims[0], 1)
- return outputData
+ if (NonlDiff_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[1], dims[0], 1)==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
def NDF_GPU_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
float regularisation_parameter,
@@ -502,9 +531,11 @@ def NDF_GPU_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
# Run Nonlinear Diffusion iterations for 3D data
# Running CUDA code here
- NonlDiff_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[2], dims[1], dims[0])
+ if (NonlDiff_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[2], dims[1], dims[0])==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
- return outputData
#****************************************************************#
#************Anisotropic Fourth-Order diffusion******************#
#****************************************************************#
@@ -522,8 +553,11 @@ def Diff4th_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
# Run Anisotropic Fourth-Order diffusion for 2D data
# Running CUDA code here
- Diffus4th_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[1], dims[0], 1)
- return outputData
+ if (Diffus4th_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[1], dims[0], 1)==0):
+ return outputData
+ else:
+ raise ValueError(CUDAErrorMessage);
+
def Diff4th_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
float regularisation_parameter,
@@ -540,9 +574,11 @@ def Diff4th_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
# Run Anisotropic Fourth-Order diffusion for 3D data
# Running CUDA code here
- Diffus4th_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[2], dims[1], dims[0])
+ if (Diffus4th_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[2], dims[1], dims[0])==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
- return outputData
#****************************************************************#
#************Patch-based weights pre-selection******************#
#****************************************************************#
@@ -571,6 +607,8 @@ def PatchSel_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
np.zeros([dims[0], dims[1],dims[2]], dtype='uint16')
# Run patch-based weight selection function
- PatchSelect_GPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], searchwindow, patchwindow, neighbours, edge_parameter)
-
- return H_i, H_j, Weights
+ if (PatchSelect_GPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], searchwindow, patchwindow, neighbours, edge_parameter)==0):
+ return H_i, H_j, Weights;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
diff --git a/build/jenkins-build.sh b/build/jenkins-build.sh
index 04f8da6..0c397b1 100755
--- a/build/jenkins-build.sh
+++ b/build/jenkins-build.sh
@@ -1,12 +1,29 @@
#!/usr/bin/env bash
# Script to builds source code in Jenkins environment
+module try-load conda
-module avail
-module load conda
-# it expects that git clone is done before this script launch
+# install miniconda if the module is not present
+if hash conda 2>/dev/null; then
+ echo using conda
+else
+ if [ ! -f Miniconda3-latest-Linux-x86_64.sh ]; then
+ wget -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
+ chmod +x Miniconda3-latest-Linux-x86_64.sh
+ fi
+ ./Miniconda3-latest-Linux-x86_64.sh -u -b -p .
+ PATH=$PATH:./bin
+fi
+
+# presume that git clone is done before this script launch
# git clone https://github.com/vais-ral/CCPi-Regularisation-Toolkit
-conda install conda-build
+conda install -y conda-build
#export CIL_VERSION=0.10.2
-export CIL_VERSION=0.10.2
-cd CCPi-Regularisation-Toolkit
+if [[ -n ${CIL_VERSION} ]]
+then
+ echo Using defined version: $CIL_VERSION
+else
+ export CIL_VERSION=0.10.3
+ echo Defining version: $CIL_VERSION
+fi
+#cd CCPi-Regularisation-Toolkit # already there by jenkins
conda build Wrappers/Python/conda-recipe
diff --git a/run.sh b/run.sh
index 98b792e..a8e5555 100644
--- a/run.sh
+++ b/run.sh
@@ -3,7 +3,7 @@ echo "Building CCPi-regularisation Toolkit using CMake"
# rm -r build
# Requires Cython, install it first:
# pip install cython
-mkdir build
+# mkdir build
cd build/
make clean
# install Python modules only without CUDA