diff options
author | algol <dkazanc@hotmail.com> | 2018-04-12 11:56:54 +0100 |
---|---|---|
committer | algol <dkazanc@hotmail.com> | 2018-04-12 11:56:54 +0100 |
commit | 22f6e22cbe6db04c6bbe8d259ce761e3748d7102 (patch) | |
tree | 225dcf0db9dc7e0f0fc5fc001a7efb14c19658f8 | |
parent | 58f5ce047b063d53906e38047b6ae744ccdbd4eb (diff) | |
download | regularization-22f6e22cbe6db04c6bbe8d259ce761e3748d7102.tar.gz regularization-22f6e22cbe6db04c6bbe8d259ce761e3748d7102.tar.bz2 regularization-22f6e22cbe6db04c6bbe8d259ce761e3748d7102.tar.xz regularization-22f6e22cbe6db04c6bbe8d259ce761e3748d7102.zip |
dTV some bugs in cython
-rw-r--r-- | Core/regularisers_CPU/FGP_dTV_core.c | 32 | ||||
-rw-r--r-- | Core/regularisers_CPU/FGP_dTV_core.h | 16 | ||||
-rw-r--r-- | Core/regularisers_GPU/dTV_FGP_GPU_core.cu | 90 | ||||
-rw-r--r-- | Wrappers/Python/ccpi/filters/regularisers.py | 4 | ||||
-rw-r--r-- | Wrappers/Python/conda-recipe/run_test.py.in (renamed from Wrappers/Python/conda-recipe/run_test.py) | 17 | ||||
-rw-r--r-- | Wrappers/Python/conda-recipe/testLena.npy | bin | 0 -> 1048656 bytes | |||
-rw-r--r-- | Wrappers/Python/demos/demo_cpu_regularisers.py | 9 | ||||
-rw-r--r-- | Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py | 2 | ||||
-rw-r--r-- | Wrappers/Python/demos/demo_gpu_regularisers.py | 8 | ||||
-rw-r--r-- | Wrappers/Python/src/cpu_regularisers.pyx | 2 | ||||
-rw-r--r-- | Wrappers/Python/src/gpu_regularisers.pyx | 7 |
11 files changed, 96 insertions, 91 deletions
diff --git a/Core/regularisers_CPU/FGP_dTV_core.c b/Core/regularisers_CPU/FGP_dTV_core.c index b182d46..f6b4f79 100644 --- a/Core/regularisers_CPU/FGP_dTV_core.c +++ b/Core/regularisers_CPU/FGP_dTV_core.c @@ -75,20 +75,20 @@ float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambd ProjectVect_func2D(R1, R2, InputRef_x, InputRef_y, dimX, dimY); /* computing the gradient of the objective function */ - Obj_func2D(Input, Output, R1, R2, lambdaPar, dimX, dimY); + Obj_dfunc2D(Input, Output, R1, R2, lambdaPar, dimX, dimY); /* apply nonnegativity */ if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;} /*Taking a step towards minus of the gradient*/ - Grad_func2D(P1, P2, Output, R1, R2, InputRef_x, InputRef_y, lambdaPar, dimX, dimY); + Grad_dfunc2D(P1, P2, Output, R1, R2, InputRef_x, InputRef_y, lambdaPar, dimX, dimY); /* projection step */ - Proj_func2D(P1, P2, methodTV, DimTotal); + Proj_dfunc2D(P1, P2, methodTV, DimTotal); /*updating R and t*/ tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; - Rupd_func2D(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, DimTotal); + Rupd_dfunc2D(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, DimTotal); /* check early stopping criteria */ re = 0.0f; re1 = 0.0f; @@ -139,20 +139,20 @@ float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambd ProjectVect_func3D(R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ); /* computing the gradient of the objective function */ - Obj_func3D(Input, Output, R1, R2, R3, lambdaPar, dimX, dimY, dimZ); + Obj_dfunc3D(Input, Output, R1, R2, R3, lambdaPar, dimX, dimY, dimZ); /* apply nonnegativity */ if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;} /*Taking a step towards minus of the gradient*/ - Grad_func3D(P1, P2, P3, Output, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, lambdaPar, dimX, dimY, dimZ); + Grad_dfunc3D(P1, P2, P3, Output, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, lambdaPar, dimX, dimY, dimZ); /* projection step */ - Proj_func3D(P1, P2, P3, methodTV, DimTotal); + Proj_dfunc3D(P1, P2, P3, methodTV, DimTotal); /*updating R and t*/ tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; - Rupd_func3D(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, DimTotal); + Rupd_dfunc3D(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, DimTotal); /* calculate norm - stopping rules*/ re = 0.0f; re1 = 0.0f; @@ -220,7 +220,7 @@ float ProjectVect_func2D(float *R1, float *R2, float *B_x, float *B_y, int dimX, return 1; } -float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY) +float Obj_dfunc2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY) { float val1, val2; int i,j,index; @@ -235,7 +235,7 @@ float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dim }} return *D; } -float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float *B_x, float *B_y, float lambda, int dimX, int dimY) +float Grad_dfunc2D(float *P1, float *P2, float *D, float *R1, float *R2, float *B_x, float *B_y, float lambda, int dimX, int dimY) { float val1, val2, multip, in_prod; int i,j,index; @@ -258,7 +258,7 @@ float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float *B }} return 1; } -float Proj_func2D(float *P1, float *P2, int methTV, int DimTotal) +float Proj_dfunc2D(float *P1, float *P2, int methTV, int DimTotal) { float val1, val2, denom, sq_denom; int i; @@ -288,7 +288,7 @@ float Proj_func2D(float *P1, float *P2, int methTV, int DimTotal) } return 1; } -float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int DimTotal) +float Rupd_dfunc2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int DimTotal) { int i; float multip; @@ -348,7 +348,7 @@ float ProjectVect_func3D(float *R1, float *R2, float *R3, float *B_x, float *B_y return 1; } -float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ) +float Obj_dfunc3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ) { float val1, val2, val3; int i,j,k,index; @@ -365,7 +365,7 @@ float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lamb }}} return *D; } -float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, float lambda, int dimX, int dimY, int dimZ) +float Grad_dfunc3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, float lambda, int dimX, int dimY, int dimZ) { float val1, val2, val3, multip, in_prod; int i,j,k, index; @@ -391,7 +391,7 @@ float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R }}} return 1; } -float Proj_func3D(float *P1, float *P2, float *P3, int methTV, int DimTotal) +float Proj_dfunc3D(float *P1, float *P2, float *P3, int methTV, int DimTotal) { float val1, val2, val3, denom, sq_denom; int i; @@ -425,7 +425,7 @@ float Proj_func3D(float *P1, float *P2, float *P3, int methTV, int DimTotal) } return 1; } -float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int DimTotal) +float Rupd_dfunc3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int DimTotal) { int i; float multip; diff --git a/Core/regularisers_CPU/FGP_dTV_core.h b/Core/regularisers_CPU/FGP_dTV_core.h index ff9d8be..95dc249 100644 --- a/Core/regularisers_CPU/FGP_dTV_core.h +++ b/Core/regularisers_CPU/FGP_dTV_core.h @@ -56,17 +56,17 @@ float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambd CCPI_EXPORT float GradNorm_func2D(float *B, float *B_x, float *B_y, float eta, int dimX, int dimY); CCPI_EXPORT float ProjectVect_func2D(float *R1, float *R2, float *B_x, float *B_y, int dimX, int dimY); -CCPI_EXPORT float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY); -CCPI_EXPORT float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float *B_x, float *B_y, float lambda, int dimX, int dimY); -CCPI_EXPORT float Proj_func2D(float *P1, float *P2, int methTV, int DimTotal); -CCPI_EXPORT float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int DimTotal); +CCPI_EXPORT float Obj_dfunc2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY); +CCPI_EXPORT float Grad_dfunc2D(float *P1, float *P2, float *D, float *R1, float *R2, float *B_x, float *B_y, float lambda, int dimX, int dimY); +CCPI_EXPORT float Proj_dfunc2D(float *P1, float *P2, int methTV, int DimTotal); +CCPI_EXPORT float Rupd_dfunc2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int DimTotal); CCPI_EXPORT float GradNorm_func3D(float *B, float *B_x, float *B_y, float *B_z, float eta, int dimX, int dimY, int dimZ); CCPI_EXPORT float ProjectVect_func3D(float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, int dimX, int dimY, int dimZ); -CCPI_EXPORT float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ); -CCPI_EXPORT float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, float lambda, int dimX, int dimY, int dimZ); -CCPI_EXPORT float Proj_func3D(float *P1, float *P2, float *P3, int methTV, int DimTotal); -CCPI_EXPORT float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int DimTotal); +CCPI_EXPORT float Obj_dfunc3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ); +CCPI_EXPORT float Grad_dfunc3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, float lambda, int dimX, int dimY, int dimZ); +CCPI_EXPORT float Proj_dfunc3D(float *P1, float *P2, float *P3, int methTV, int DimTotal); +CCPI_EXPORT float Rupd_dfunc3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int DimTotal); #ifdef __cplusplus } #endif diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu index 2b450e6..04047a5 100644 --- a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu +++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu @@ -72,7 +72,7 @@ struct square { __host__ __device__ float operator()(float x) { return x * x; } /*****************2D modules*********************/ /************************************************/ -__global__ void GradNorm_func2D(float *Refd, float *Refd_x, float *Refd_y, float eta, int N, int M, int ImSize) +__global__ void GradNorm_func2D_kernel(float *Refd, float *Refd_x, float *Refd_y, float eta, int N, int M, int ImSize) { float val1, val2, gradX, gradY, magn; @@ -97,7 +97,7 @@ __global__ void GradNorm_func2D(float *Refd, float *Refd_x, float *Refd_y, float return; } -__global__ void ProjectVect_func2D(float *R1, float *R2, float *Refd_x, float *Refd_y, int N, int M, int ImSize) +__global__ void ProjectVect_func2D_kernel(float *R1, float *R2, float *Refd_x, float *Refd_y, int N, int M, int ImSize) { float in_prod; @@ -116,7 +116,7 @@ __global__ void ProjectVect_func2D(float *R1, float *R2, float *Refd_x, float *R } -__global__ void Obj_func2D_kernel(float *Ad, float *D, float *R1, float *R2, int N, int M, int ImSize, float lambda) +__global__ void Obj_dfunc2D_kernel(float *Ad, float *D, float *R1, float *R2, int N, int M, int ImSize, float lambda) { float val1,val2; @@ -137,7 +137,7 @@ __global__ void Obj_func2D_kernel(float *Ad, float *D, float *R1, float *R2, int return; } -__global__ void Grad_func2D_kernel(float *P1, float *P2, float *D, float *R1, float *R2, float *Refd_x, float *Refd_y, int N, int M, int ImSize, float multip) +__global__ void Grad_dfunc2D_kernel(float *P1, float *P2, float *D, float *R1, float *R2, float *Refd_x, float *Refd_y, int N, int M, int ImSize, float multip) { float val1,val2,in_prod; @@ -165,7 +165,7 @@ __global__ void Grad_func2D_kernel(float *P1, float *P2, float *D, float *R1, fl return; } -__global__ void Proj_func2D_iso_kernel(float *P1, float *P2, int N, int M, int ImSize) +__global__ void Proj_dfunc2D_iso_kernel(float *P1, float *P2, int N, int M, int ImSize) { float denom; @@ -184,7 +184,7 @@ __global__ void Proj_func2D_iso_kernel(float *P1, float *P2, int N, int M, int I } return; } -__global__ void Proj_func2D_aniso_kernel(float *P1, float *P2, int N, int M, int ImSize) +__global__ void Proj_dfunc2D_aniso_kernel(float *P1, float *P2, int N, int M, int ImSize) { float val1, val2; @@ -204,7 +204,7 @@ __global__ void Proj_func2D_aniso_kernel(float *P1, float *P2, int N, int M, int } return; } -__global__ void Rupd_func2D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, float multip2, int N, int M, int ImSize) +__global__ void Rupd_dfunc2D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, float multip2, int N, int M, int ImSize) { //calculate each thread global index const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; @@ -218,7 +218,7 @@ __global__ void Rupd_func2D_kernel(float *P1, float *P1_old, float *P2, float *P } return; } -__global__ void nonneg2D_kernel(float* Output, int N, int M, int num_total) +__global__ void dTVnonneg2D_kernel(float* Output, int N, int M, int num_total) { int xIndex = blockDim.x * blockIdx.x + threadIdx.x; int yIndex = blockDim.y * blockIdx.y + threadIdx.y; @@ -229,7 +229,7 @@ __global__ void nonneg2D_kernel(float* Output, int N, int M, int num_total) if (Output[index] < 0.0f) Output[index] = 0.0f; } } -__global__ void copy_kernel2D(float *Input, float* Output, int N, int M, int num_total) +__global__ void dTVcopy_kernel2D(float *Input, float* Output, int N, int M, int num_total) { int xIndex = blockDim.x * blockIdx.x + threadIdx.x; int yIndex = blockDim.y * blockIdx.y + threadIdx.y; @@ -240,7 +240,7 @@ __global__ void copy_kernel2D(float *Input, float* Output, int N, int M, int num Output[index] = Input[index]; } } -__global__ void ResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total) +__global__ void dTVResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total) { int xIndex = blockDim.x * blockIdx.x + threadIdx.x; int yIndex = blockDim.y * blockIdx.y + threadIdx.y; @@ -254,7 +254,7 @@ __global__ void ResidCalc2D_kernel(float *Input1, float *Input2, float* Output, /************************************************/ /*****************3D modules*********************/ /************************************************/ -__global__ void GradNorm_func3D(float *Refd, float *Refd_x, float *Refd_y, float *Refd_z, float eta, int N, int M, int Z, int ImSize) +__global__ void GradNorm_func3D_kernel(float *Refd, float *Refd_x, float *Refd_y, float *Refd_z, float eta, int N, int M, int Z, int ImSize) { float val1, val2, val3, gradX, gradY, gradZ, magn; @@ -283,7 +283,7 @@ __global__ void GradNorm_func3D(float *Refd, float *Refd_x, float *Refd_y, float return; } -__global__ void ProjectVect_func3D(float *R1, float *R2, float *R3, float *Refd_x, float *Refd_y, float *Refd_z, int N, int M, int Z, int ImSize) +__global__ void ProjectVect_func3D_kernel(float *R1, float *R2, float *R3, float *Refd_x, float *Refd_y, float *Refd_z, int N, int M, int Z, int ImSize) { float in_prod; @@ -305,7 +305,7 @@ __global__ void ProjectVect_func3D(float *R1, float *R2, float *R3, float *Refd_ } -__global__ void Obj_func3D_kernel(float *Ad, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float lambda) +__global__ void Obj_dfunc3D_kernel(float *Ad, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float lambda) { float val1,val2,val3; @@ -327,7 +327,7 @@ __global__ void Obj_func3D_kernel(float *Ad, float *D, float *R1, float *R2, flo return; } -__global__ void Grad_func3D_kernel(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *Refd_x, float *Refd_y, float *Refd_z, int N, int M, int Z, int ImSize, float multip) +__global__ void Grad_dfunc3D_kernel(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *Refd_x, float *Refd_y, float *Refd_z, int N, int M, int Z, int ImSize, float multip) { float val1,val2,val3,in_prod; @@ -358,7 +358,7 @@ __global__ void Grad_func3D_kernel(float *P1, float *P2, float *P3, float *D, fl return; } -__global__ void Proj_func3D_iso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize) +__global__ void Proj_dfunc3D_iso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize) { float denom,sq_denom; @@ -382,7 +382,7 @@ __global__ void Proj_func3D_iso_kernel(float *P1, float *P2, float *P3, int N, i return; } -__global__ void Proj_func3D_aniso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize) +__global__ void Proj_dfunc3D_aniso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize) { float val1, val2, val3; @@ -408,7 +408,7 @@ __global__ void Proj_func3D_aniso_kernel(float *P1, float *P2, float *P3, int N, } -__global__ void Rupd_func3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize) +__global__ void Rupd_dfunc3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize) { //calculate each thread global index int i = blockDim.x * blockIdx.x + threadIdx.x; @@ -425,7 +425,7 @@ __global__ void Rupd_func3D_kernel(float *P1, float *P1_old, float *P2, float *P return; } -__global__ void nonneg3D_kernel(float* Output, int N, int M, int Z, int num_total) +__global__ void dTVnonneg3D_kernel(float* Output, int N, int M, int Z, int num_total) { int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; @@ -438,7 +438,7 @@ __global__ void nonneg3D_kernel(float* Output, int N, int M, int Z, int num_tota } } -__global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total) +__global__ void dTVcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total) { int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; @@ -451,7 +451,7 @@ __global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z, } } -__global__ void ResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total) +__global__ void dTVResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total) { int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; @@ -517,7 +517,7 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f /******************** Run CUDA 2D kernel here ********************/ multip = (1.0f/(8.0f*lambdaPar)); /* calculate gradient vectors for the reference */ - GradNorm_func2D<<<dimGrid,dimBlock>>>(d_InputRef, InputRef_x, InputRef_y, eta, dimX, dimY, ImSize); + GradNorm_func2D_kernel<<<dimGrid,dimBlock>>>(d_InputRef, InputRef_x, InputRef_y, eta, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); @@ -525,41 +525,41 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f for (i = 0; i < iter; i++) { /*projects a 2D vector field R-1,2 onto the orthogonal complement of another 2D vector field InputRef_xy*/ - ProjectVect_func2D<<<dimGrid,dimBlock>>>(R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize); + ProjectVect_func2D_kernel<<<dimGrid,dimBlock>>>(R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); /* computing the gradient of the objective function */ - Obj_func2D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambdaPar); + Obj_dfunc2D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambdaPar); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); if (nonneg != 0) { - nonneg2D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, ImSize); + dTVnonneg2D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); } /*Taking a step towards minus of the gradient*/ - Grad_func2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, d_update, R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize, multip); + Grad_dfunc2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, d_update, R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize, multip); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); /* projection step */ - if (methodTV == 0) Proj_func2D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/ - else Proj_func2D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/ + if (methodTV == 0) Proj_dfunc2D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/ + else Proj_dfunc2D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/ checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; multip2 = ((tk-1.0f)/tkp1); - Rupd_func2D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize); + Rupd_dfunc2D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); if (epsil != 0.0f) { /* calculate norm - stopping rules using the Thrust library */ - ResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize); + dTVResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); @@ -572,16 +572,16 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f if (re < epsil) count++; if (count > 4) break; - copy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, ImSize); + dTVcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); } - copy_kernel2D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, ImSize); + dTVcopy_kernel2D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); - copy_kernel2D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, ImSize); + dTVcopy_kernel2D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); @@ -651,7 +651,7 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f /********************** Run CUDA 3D kernel here ********************/ multip = (1.0f/(26.0f*lambdaPar)); /* calculate gradient vectors for the reference */ - GradNorm_func3D<<<dimGrid,dimBlock>>>(d_InputRef, InputRef_x, InputRef_y, InputRef_z, eta, dimX, dimY, dimZ, ImSize); + GradNorm_func3D_kernel<<<dimGrid,dimBlock>>>(d_InputRef, InputRef_x, InputRef_y, InputRef_z, eta, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); @@ -659,41 +659,41 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f for (i = 0; i < iter; i++) { /*projects a 3D vector field R-1,2,3 onto the orthogonal complement of another 3D vector field InputRef_xyz*/ - ProjectVect_func3D<<<dimGrid,dimBlock>>>(R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize); + ProjectVect_func3D_kernel<<<dimGrid,dimBlock>>>(R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); /* computing the gradient of the objective function */ - Obj_func3D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambdaPar); + Obj_dfunc3D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambdaPar); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); if (nonneg != 0) { - nonneg3D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, dimZ, ImSize); + dTVnonneg3D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); } /*Taking a step towards minus of the gradient*/ - Grad_func3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, d_update, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize, multip); + Grad_dfunc3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, d_update, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize, multip); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); /* projection step */ - if (methodTV == 0) Proj_func3D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */ - else Proj_func3D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */ + if (methodTV == 0) Proj_dfunc3D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */ + else Proj_dfunc3D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */ checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; multip2 = ((tk-1.0f)/tkp1); - Rupd_func3D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize); + Rupd_dfunc3D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); if (epsil != 0.0f) { /* calculate norm - stopping rules using the Thrust library */ - ResidCalc3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, dimZ, ImSize); + dTVResidCalc3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); @@ -706,20 +706,20 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f if (re < epsil) count++; if (count > 4) break; - copy_kernel3D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, dimZ, ImSize); + dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); } - copy_kernel3D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, dimZ, ImSize); + dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); - copy_kernel3D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, dimZ, ImSize); + dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); - copy_kernel3D<<<dimGrid,dimBlock>>>(P3, P3_prev, dimX, dimY, dimZ, ImSize); + dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(P3, P3_prev, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); diff --git a/Wrappers/Python/ccpi/filters/regularisers.py b/Wrappers/Python/ccpi/filters/regularisers.py index c6723fa..376cc9c 100644 --- a/Wrappers/Python/ccpi/filters/regularisers.py +++ b/Wrappers/Python/ccpi/filters/regularisers.py @@ -2,8 +2,8 @@ script which assigns a proper device core function based on a flag ('cpu' or 'gpu') """ -from ccpi.filters.cpu_regularisers_cython import TV_ROF_CPU, TV_FGP_CPU dTV_FGP_CPU -from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU dTV_FGP_GPU +from ccpi.filters.cpu_regularisers_cython import TV_ROF_CPU, TV_FGP_CPU, dTV_FGP_CPU +from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU, dTV_FGP_GPU def ROF_TV(inputData, regularisation_parameter, iterations, time_marching_parameter,device='cpu'): diff --git a/Wrappers/Python/conda-recipe/run_test.py b/Wrappers/Python/conda-recipe/run_test.py.in index 04bbd40..9a6f4de 100644 --- a/Wrappers/Python/conda-recipe/run_test.py +++ b/Wrappers/Python/conda-recipe/run_test.py.in @@ -1,8 +1,6 @@ import unittest import numpy as np -import os from ccpi.filters.regularisers import ROF_TV, FGP_TV -import matplotlib.pyplot as plt def rmse(im1, im2): rmse = np.sqrt(np.sum((im1 - im2) ** 2) / float(im1.size)) @@ -14,13 +12,16 @@ class TestRegularisers(unittest.TestCase): pass def test_cpu_regularisers(self): - filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif") + #filename = os.path.join(".." , ".." , ".." , "data" ,"testLena.npy") + Im = np.load('testLena.npy'); + """ # read noiseless image Im = plt.imread(filename) Im = np.asarray(Im, dtype='float32') Im = Im/255 + """ tolerance = 1e-05 rms_rof_exp = 0.006812507 #expected value for ROF model rms_fgp_exp = 0.019152347 #expected value for FGP model @@ -80,13 +81,11 @@ class TestRegularisers(unittest.TestCase): """ self.assertTrue(res) def test_gpu_regularisers(self): - filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif") + #filename = os.path.join(".." , ".." , ".." , "data" ,"testLena.npy") - # read noiseless image - Im = plt.imread(filename) - Im = np.asarray(Im, dtype='float32') + Im = np.load('testLena.npy'); - Im = Im/255 + #Im = Im/255 tolerance = 1e-05 rms_rof_exp = 0.006812507 #expected value for ROF model rms_fgp_exp = 0.019152347 #expected value for FGP model @@ -146,4 +145,4 @@ class TestRegularisers(unittest.TestCase): """ self.assertTrue(res) if __name__ == '__main__': - unittest.main()
\ No newline at end of file + unittest.main() diff --git a/Wrappers/Python/conda-recipe/testLena.npy b/Wrappers/Python/conda-recipe/testLena.npy Binary files differnew file mode 100644 index 0000000..14bc0e3 --- /dev/null +++ b/Wrappers/Python/conda-recipe/testLena.npy diff --git a/Wrappers/Python/demos/demo_cpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_regularisers.py index fd3050c..00beb0b 100644 --- a/Wrappers/Python/demos/demo_cpu_regularisers.py +++ b/Wrappers/Python/demos/demo_cpu_regularisers.py @@ -22,6 +22,8 @@ def printParametersToString(pars): txt += "{0} = {1}".format(key, value.__name__) elif key == 'input': txt += "{0} = {1}".format(key, np.shape(value)) + elif key == 'refdata': + txt += "{0} = {1}".format(key, np.shape(value)) else: txt += "{0} = {1}".format(key, value) txt += '\n' @@ -196,7 +198,7 @@ plt.title('{}'.format('CPU results')) # Uncomment to test 3D regularisation performance #%% - +""" N = 512 slices = 20 @@ -318,8 +320,8 @@ a.set_title('Noisy Image') imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray") # set parameters -pars = {'algorithm' : FGP_dTV, \ - 'input' : noisyVol,\ +pars = {'algorithm' : FGP_dTV,\ + 'input' : noisyVol,\ 'refdata' : noisyRef,\ 'regularisation_parameter':0.04, \ 'number_of_iterations' :300 ,\ @@ -358,4 +360,5 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, verticalalignment='top', bbox=props) imgplot = plt.imshow(fgp_dTV_cpu3D[10,:,:], cmap="gray") plt.title('{}'.format('Recovered volume on the CPU using FGP-dTV')) +""" #%% diff --git a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py index aa1f865..310cf75 100644 --- a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py +++ b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py @@ -22,6 +22,8 @@ def printParametersToString(pars): txt += "{0} = {1}".format(key, value.__name__) elif key == 'input': txt += "{0} = {1}".format(key, np.shape(value)) + elif key == 'refdata': + txt += "{0} = {1}".format(key, np.shape(value)) else: txt += "{0} = {1}".format(key, value) txt += '\n' diff --git a/Wrappers/Python/demos/demo_gpu_regularisers.py b/Wrappers/Python/demos/demo_gpu_regularisers.py index 4759cc3..24a3c88 100644 --- a/Wrappers/Python/demos/demo_gpu_regularisers.py +++ b/Wrappers/Python/demos/demo_gpu_regularisers.py @@ -22,6 +22,8 @@ def printParametersToString(pars): txt += "{0} = {1}".format(key, value.__name__) elif key == 'input': txt += "{0} = {1}".format(key, np.shape(value)) + elif key == 'refdata': + txt += "{0} = {1}".format(key, np.shape(value)) else: txt += "{0} = {1}".format(key, value) txt += '\n' @@ -192,7 +194,7 @@ plt.title('{}'.format('GPU results')) # Uncomment to test 3D regularisation performance #%% - +""" N = 512 slices = 20 @@ -314,7 +316,7 @@ imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray") # set parameters pars = {'algorithm' : FGP_dTV, \ - 'input' : noisyVol,\ + 'input' : noisyVol,\ 'refdata' : noisyRef,\ 'regularisation_parameter':0.04, \ 'number_of_iterations' :300 ,\ @@ -352,5 +354,5 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, verticalalignment='top', bbox=props) imgplot = plt.imshow(fgp_dTV_gpu3D[10,:,:], cmap="gray") plt.title('{}'.format('Recovered volume on the GPU using FGP-dTV')) - +""" #%% diff --git a/Wrappers/Python/src/cpu_regularisers.pyx b/Wrappers/Python/src/cpu_regularisers.pyx index 8f9185a..1661375 100644 --- a/Wrappers/Python/src/cpu_regularisers.pyx +++ b/Wrappers/Python/src/cpu_regularisers.pyx @@ -156,8 +156,8 @@ def dTV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, dTV_FGP_CPU_main(&inputData[0,0], &refdata[0,0], &outputData[0,0], regularisation_parameter, iterationsNumb, tolerance_param, - methodTV, eta_const, + methodTV, nonneg, printM, dims[0], dims[1], 1) diff --git a/Wrappers/Python/src/gpu_regularisers.pyx b/Wrappers/Python/src/gpu_regularisers.pyx index 4a14f69..18efdcd 100644 --- a/Wrappers/Python/src/gpu_regularisers.pyx +++ b/Wrappers/Python/src/gpu_regularisers.pyx @@ -20,7 +20,7 @@ cimport numpy as np cdef extern void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); cdef extern void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z); -cdef extern void dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z); +cdef extern void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z); # Total-variation Rudin-Osher-Fatemi (ROF) def TV_ROF_GPU(inputData, @@ -187,8 +187,7 @@ def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, np.zeros([dims[0],dims[1],dims[2]], dtype='float32') # Running CUDA code here - TV_FGP_GPU_main( - &inputData[0,0,0], &outputData[0,0,0], + TV_FGP_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter , iterations, tolerance_param, @@ -204,7 +203,7 @@ def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, #****************************************************************# #******** Directional TV Fast-Gradient-Projection (FGP)*********# def FGPdTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, - np.ndarray[np.float32_t, ndim=3, mode="c"] refdata, + np.ndarray[np.float32_t, ndim=2, mode="c"] refdata, float regularisation_parameter, int iterations, float tolerance_param, |