From 58f5ce047b063d53906e38047b6ae744ccdbd4eb Mon Sep 17 00:00:00 2001 From: Daniil Kazantsev Date: Thu, 12 Apr 2018 10:25:21 +0100 Subject: dTV method added --- Core/CMakeLists.txt | 2 + Core/regularisers_CPU/FGP_dTV_core.c | 440 ++++++++++++ Core/regularisers_CPU/FGP_dTV_core.h | 72 ++ Core/regularisers_GPU/TV_FGP_GPU_core.cu | 8 +- Core/regularisers_GPU/dTV_FGP_GPU_core.cu | 751 +++++++++++++++++++++ Core/regularisers_GPU/dTV_FGP_GPU_core.h | 10 + Readme.md | 2 + Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m | 45 +- Wrappers/Matlab/demos/demoMatlab_denoise.m | 37 +- Wrappers/Matlab/mex_compile/compileCPU_mex.m | 5 +- Wrappers/Matlab/mex_compile/compileGPU_mex.m | 6 +- .../Matlab/mex_compile/regularisers_CPU/FGP_TV.c | 2 +- .../Matlab/mex_compile/regularisers_CPU/FGP_TV.c~ | 91 --- .../Matlab/mex_compile/regularisers_CPU/FGP_dTV.c | 113 ++++ .../mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp | 111 +++ Wrappers/Python/ccpi/filters/regularisers.py | 29 +- Wrappers/Python/conda-recipe/run_test.py | 149 ++++ Wrappers/Python/demos/demo_cpu_regularisers.py | 127 +++- .../Python/demos/demo_cpu_vs_gpu_regularisers.py | 101 ++- Wrappers/Python/demos/demo_gpu_regularisers.py | 123 +++- Wrappers/Python/setup-regularisers.py.in | 1 + Wrappers/Python/src/cpu_regularisers.pyx | 71 +- Wrappers/Python/src/gpu_regularisers.pyx | 98 ++- .../Python/test/__pycache__/metrics.cpython-35.pyc | Bin 823 -> 0 bytes Wrappers/Python/test/run_test.py | 149 ---- 25 files changed, 2264 insertions(+), 279 deletions(-) create mode 100644 Core/regularisers_CPU/FGP_dTV_core.c create mode 100644 Core/regularisers_CPU/FGP_dTV_core.h create mode 100644 Core/regularisers_GPU/dTV_FGP_GPU_core.cu create mode 100644 Core/regularisers_GPU/dTV_FGP_GPU_core.h delete mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_TV.c~ create mode 100644 Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c create mode 100644 Wrappers/Matlab/mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp create mode 100644 Wrappers/Python/conda-recipe/run_test.py delete mode 100644 Wrappers/Python/test/__pycache__/metrics.cpython-35.pyc delete mode 100644 Wrappers/Python/test/run_test.py diff --git a/Core/CMakeLists.txt b/Core/CMakeLists.txt index 3bc5ceb..26912b9 100644 --- a/Core/CMakeLists.txt +++ b/Core/CMakeLists.txt @@ -89,6 +89,7 @@ add_library(cilreg SHARED #${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/SplitBregman_TV_core.c #${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/TGV_PD_core.c ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/ROF_TV_core.c + ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/FGP_dTV_core.c ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/utils.c ) target_link_libraries(cilreg ${EXTRA_LIBRARIES} ) @@ -129,6 +130,7 @@ if (CUDA_FOUND) CUDA_ADD_LIBRARY(cilregcuda SHARED ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_ROF_GPU_core.cu ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_FGP_GPU_core.cu + ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/dTV_FGP_GPU_core.cu ) if (UNIX) message ("I'd install into ${CMAKE_INSTALL_PREFIX}/lib") diff --git a/Core/regularisers_CPU/FGP_dTV_core.c b/Core/regularisers_CPU/FGP_dTV_core.c new file mode 100644 index 0000000..b182d46 --- /dev/null +++ b/Core/regularisers_CPU/FGP_dTV_core.c @@ -0,0 +1,440 @@ +/* +This work is part of the Core Imaging Library developed by +Visual Analytics and Imaging System Group of the Science Technology +Facilities Council, STFC + +Copyright 2017 Daniil Kazantsev +Copyright 2017 Srikanth Nagella, Edoardo Pasca + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "FGP_dTV_core.h" + +/* C-OMP implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case) + * which employs structural similarity of the level sets of two images/volumes, see [1,2] + * The current implementation updates image 1 while image 2 is being fixed. + * + * Input Parameters: + * 1. Noisy image/volume [REQUIRED] + * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED] + * 3. lambdaPar - regularization parameter [REQUIRED] + * 4. Number of iterations [OPTIONAL] + * 5. eplsilon: tolerance constant [OPTIONAL] + * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * + * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL] + * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL] + * 9. print information: 0 (off) or 1 (on) [OPTIONAL] + * + * Output: + * [1] Filtered/regularized image/volume + * + * This function is based on the Matlab's codes and papers by + * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" + * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106 + */ + +float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +{ + int ll, j, DimTotal; + float re, re1; + float tk = 1.0f; + float tkp1=1.0f; + int count = 0; + + if (dimZ <= 1) { + /*2D case */ + float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL, *InputRef_x=NULL, *InputRef_y=NULL; + DimTotal = dimX*dimY; + + Output_prev = calloc(DimTotal, sizeof(float)); + P1 = calloc(DimTotal, sizeof(float)); + P2 = calloc(DimTotal, sizeof(float)); + P1_prev = calloc(DimTotal, sizeof(float)); + P2_prev = calloc(DimTotal, sizeof(float)); + R1 = calloc(DimTotal, sizeof(float)); + R2 = calloc(DimTotal, sizeof(float)); + InputRef_x = calloc(DimTotal, sizeof(float)); + InputRef_y = calloc(DimTotal, sizeof(float)); + + /* calculate gradient field (smoothed) for the reference image */ + GradNorm_func2D(InputRef, InputRef_x, InputRef_y, eta, dimX, dimY); + + /* begin iterations */ + for(ll=0; ll 4) break; + + /*storing old values*/ + copyIm(Output, Output_prev, dimX, dimY, 1); + copyIm(P1, P1_prev, dimX, dimY, 1); + copyIm(P2, P2_prev, dimX, dimY, 1); + tk = tkp1; + } + if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", ll); + free(Output_prev); free(P1); free(P2); free(P1_prev); free(P2_prev); free(R1); free(R2); free(InputRef_x); free(InputRef_y); + } + else { + /*3D case*/ + float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL, *InputRef_x=NULL, *InputRef_y=NULL, *InputRef_z=NULL; + DimTotal = dimX*dimY*dimZ; + + Output_prev = calloc(DimTotal, sizeof(float)); + P1 = calloc(DimTotal, sizeof(float)); + P2 = calloc(DimTotal, sizeof(float)); + P3 = calloc(DimTotal, sizeof(float)); + P1_prev = calloc(DimTotal, sizeof(float)); + P2_prev = calloc(DimTotal, sizeof(float)); + P3_prev = calloc(DimTotal, sizeof(float)); + R1 = calloc(DimTotal, sizeof(float)); + R2 = calloc(DimTotal, sizeof(float)); + R3 = calloc(DimTotal, sizeof(float)); + InputRef_x = calloc(DimTotal, sizeof(float)); + InputRef_y = calloc(DimTotal, sizeof(float)); + InputRef_z = calloc(DimTotal, sizeof(float)); + + /* calculate gradient field (smoothed) for the reference volume */ + GradNorm_func3D(InputRef, InputRef_x, InputRef_y, InputRef_z, eta, dimX, dimY, dimZ); + + /* begin iterations */ + for(ll=0; ll 4) break; + + /*storing old values*/ + copyIm(Output, Output_prev, dimX, dimY, dimZ); + copyIm(P1, P1_prev, dimX, dimY, dimZ); + copyIm(P2, P2_prev, dimX, dimY, dimZ); + copyIm(P3, P3_prev, dimX, dimY, dimZ); + tk = tkp1; + } + if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", ll); + free(Output_prev); free(P1); free(P2); free(P3); free(P1_prev); free(P2_prev); free(P3_prev); free(R1); free(R2); free(R3); free(InputRef_x); free(InputRef_y); free(InputRef_z); + } + return *Output; +} + + +/********************************************************************/ +/***************************2D Functions*****************************/ +/********************************************************************/ + +float GradNorm_func2D(float *B, float *B_x, float *B_y, float eta, int dimX, int dimY) +{ + int i,j,index; + float val1, val2, gradX, gradY, magn; +#pragma omp parallel for shared(B, B_x, B_y) private(i,j,index,val1,val2,gradX,gradY,magn) + for(i=0; i 1.0f) { + sq_denom = 1.0f/sqrtf(denom); + P1[i] = P1[i]*sq_denom; + P2[i] = P2[i]*sq_denom; + } + } + } + else { + /* anisotropic TV*/ +#pragma omp parallel for shared(P1,P2) private(i,val1,val2) + for(i=0; i 1.0f) { + sq_denom = 1.0f/sqrtf(denom); + P1[i] = P1[i]*sq_denom; + P2[i] = P2[i]*sq_denom; + P3[i] = P3[i]*sq_denom; + } + } + } + else { + /* anisotropic TV*/ +#pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3) + for(i=0; i +#include +#include +#include +#include +#include "omp.h" +#include "utils.h" +#include "CCPiDefines.h" + +/* C-OMP implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case) + * which employs structural similarity of the level sets of two images/volumes, see [1,2] + * The current implementation updates image 1 while image 2 is being fixed. + * + * Input Parameters: + * 1. Noisy image/volume [REQUIRED] + * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED] + * 3. lambdaPar - regularization parameter [REQUIRED] + * 4. Number of iterations [OPTIONAL] + * 5. eplsilon: tolerance constant [OPTIONAL] + * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * + * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL] + * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL] + * 9. print information: 0 (off) or 1 (on) [OPTIONAL] + * + * Output: + * [1] Filtered/regularized image/volume + * + * This function is based on the Matlab's codes and papers by + * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" + * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106 + */ + +#ifdef __cplusplus +extern "C" { +#endif +float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); + +CCPI_EXPORT float GradNorm_func2D(float *B, float *B_x, float *B_y, float eta, int dimX, int dimY); +CCPI_EXPORT float ProjectVect_func2D(float *R1, float *R2, float *B_x, float *B_y, int dimX, int dimY); +CCPI_EXPORT float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY); +CCPI_EXPORT float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float *B_x, float *B_y, float lambda, int dimX, int dimY); +CCPI_EXPORT float Proj_func2D(float *P1, float *P2, int methTV, int DimTotal); +CCPI_EXPORT float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int DimTotal); + +CCPI_EXPORT float GradNorm_func3D(float *B, float *B_x, float *B_y, float *B_z, float eta, int dimX, int dimY, int dimZ); +CCPI_EXPORT float ProjectVect_func3D(float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, int dimX, int dimY, int dimZ); +CCPI_EXPORT float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ); +CCPI_EXPORT float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, float lambda, int dimX, int dimY, int dimZ); +CCPI_EXPORT float Proj_func3D(float *P1, float *P2, float *P3, int methTV, int DimTotal); +CCPI_EXPORT float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int DimTotal); +#ifdef __cplusplus +} +#endif diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.cu b/Core/regularisers_GPU/TV_FGP_GPU_core.cu index 314a367..3fbbcde 100755 --- a/Core/regularisers_GPU/TV_FGP_GPU_core.cu +++ b/Core/regularisers_GPU/TV_FGP_GPU_core.cu @@ -417,14 +417,14 @@ extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, in checkCudaErrors(cudaPeekAtLastError() ); if (epsil != 0.0f) { - /* calculate norm - stopping rules using the Thrust library */ + /* calculate norm - stopping rules using the Thrust library */ ResidCalc2D_kernel<<>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); - thrust::device_vector d_vec(P1_prev, P1_prev + ImSize); - float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus())); - thrust::device_vector d_vec2(d_update, d_update + ImSize); + thrust::device_vector d_vec(P1_prev, P1_prev + ImSize); + float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus())); + thrust::device_vector d_vec2(d_update, d_update + ImSize); float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus())); re = (reduction/reduction2); diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu new file mode 100644 index 0000000..2b450e6 --- /dev/null +++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu @@ -0,0 +1,751 @@ + /* +This work is part of the Core Imaging Library developed by +Visual Analytics and Imaging System Group of the Science Technology +Facilities Council, STFC + +Copyright 2017 Daniil Kazantsev +Copyright 2017 Srikanth Nagella, Edoardo Pasca + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "dTV_FGP_GPU_core.h" +#include +#include + +/* CUDA implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case) + * which employs structural similarity of the level sets of two images/volumes, see [1,2] + * The current implementation updates image 1 while image 2 is being fixed. + * + * Input Parameters: + * 1. Noisy image/volume [REQUIRED] + * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED] + * 3. lambdaPar - regularization parameter [REQUIRED] + * 4. Number of iterations [OPTIONAL] + * 5. eplsilon: tolerance constant [OPTIONAL] + * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * + * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL] + * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL] + * 9. print information: 0 (off) or 1 (on) [OPTIONAL] + * + * Output: + * [1] Filtered/regularized image/volume + * + * This function is based on the Matlab's codes and papers by + * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" + * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106 + */ + + +// This will output the proper CUDA error strings in the event that a CUDA host call returns an error +#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) + +inline void __checkCudaErrors(cudaError err, const char *file, const int line) +{ + if (cudaSuccess != err) + { + fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", + file, line, (int)err, cudaGetErrorString(err)); + exit(EXIT_FAILURE); + } +} + +#define BLKXSIZE2D 16 +#define BLKYSIZE2D 16 + +#define BLKXSIZE 8 +#define BLKYSIZE 8 +#define BLKZSIZE 8 + +#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) +struct square { __host__ __device__ float operator()(float x) { return x * x; } }; + +/************************************************/ +/*****************2D modules*********************/ +/************************************************/ + +__global__ void GradNorm_func2D(float *Refd, float *Refd_x, float *Refd_y, float eta, int N, int M, int ImSize) +{ + + float val1, val2, gradX, gradY, magn; + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + /* boundary conditions */ + if (xIndex >= N-1) val1 = 0.0f; else val1 = Refd[(xIndex+1) + N*yIndex]; + if (yIndex >= M-1) val2 = 0.0f; else val2 = Refd[(xIndex) + N*(yIndex + 1)]; + + gradX = val1 - Refd[index]; + gradY = val2 - Refd[index]; + magn = pow(gradX,2) + pow(gradY,2); + magn = sqrt(magn + pow(eta,2)); + Refd_x[index] = gradX/magn; + Refd_y[index] = gradY/magn; + } + return; +} + +__global__ void ProjectVect_func2D(float *R1, float *R2, float *Refd_x, float *Refd_y, int N, int M, int ImSize) +{ + + float in_prod; + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + in_prod = R1[index]*Refd_x[index] + R2[index]*Refd_y[index]; /* calculate inner product */ + R1[index] = R1[index] - in_prod*Refd_x[index]; + R2[index] = R2[index] - in_prod*Refd_y[index]; + } + return; +} + + +__global__ void Obj_func2D_kernel(float *Ad, float *D, float *R1, float *R2, int N, int M, int ImSize, float lambda) +{ + + float val1,val2; + + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + if (xIndex <= 0) {val1 = 0.0f;} else {val1 = R1[(xIndex-1) + N*yIndex];} + if (yIndex <= 0) {val2 = 0.0f;} else {val2 = R2[xIndex + N*(yIndex-1)];} + + //Write final result to global memory + D[index] = Ad[index] - lambda*(R1[index] + R2[index] - val1 - val2); + } + return; +} + +__global__ void Grad_func2D_kernel(float *P1, float *P2, float *D, float *R1, float *R2, float *Refd_x, float *Refd_y, int N, int M, int ImSize, float multip) +{ + + float val1,val2,in_prod; + + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + + /* boundary conditions */ + if (xIndex >= N-1) val1 = 0.0f; else val1 = D[index] - D[(xIndex+1) + N*yIndex]; + if (yIndex >= M-1) val2 = 0.0f; else val2 = D[index] - D[(xIndex) + N*(yIndex + 1)]; + + in_prod = val1*Refd_x[index] + val2*Refd_y[index]; /* calculate inner product */ + val1 = val1 - in_prod*Refd_x[index]; + val2 = val2 - in_prod*Refd_y[index]; + + //Write final result to global memory + P1[index] = R1[index] + multip*val1; + P2[index] = R2[index] + multip*val2; + } + return; +} + +__global__ void Proj_func2D_iso_kernel(float *P1, float *P2, int N, int M, int ImSize) +{ + + float denom; + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + denom = pow(P1[index],2) + pow(P2[index],2); + if (denom > 1.0f) { + P1[index] = P1[index]/sqrt(denom); + P2[index] = P2[index]/sqrt(denom); + } + } + return; +} +__global__ void Proj_func2D_aniso_kernel(float *P1, float *P2, int N, int M, int ImSize) +{ + + float val1, val2; + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + val1 = abs(P1[index]); + val2 = abs(P2[index]); + if (val1 < 1.0f) {val1 = 1.0f;} + if (val2 < 1.0f) {val2 = 1.0f;} + P1[index] = P1[index]/val1; + P2[index] = P2[index]/val2; + } + return; +} +__global__ void Rupd_func2D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, float multip2, int N, int M, int ImSize) +{ + //calculate each thread global index + const int xIndex=blockIdx.x*blockDim.x+threadIdx.x; + const int yIndex=blockIdx.y*blockDim.y+threadIdx.y; + + int index = xIndex + N*yIndex; + + if ((xIndex < N) && (yIndex < M)) { + R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]); + R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]); + } + return; +} +__global__ void nonneg2D_kernel(float* Output, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + if (Output[index] < 0.0f) Output[index] = 0.0f; + } +} +__global__ void copy_kernel2D(float *Input, float* Output, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + Output[index] = Input[index]; + } +} +__global__ void ResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total) +{ + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + + int index = xIndex + N*yIndex; + + if (index < num_total) { + Output[index] = Input1[index] - Input2[index]; + } +} +/************************************************/ +/*****************3D modules*********************/ +/************************************************/ +__global__ void GradNorm_func3D(float *Refd, float *Refd_x, float *Refd_y, float *Refd_z, float eta, int N, int M, int Z, int ImSize) +{ + + float val1, val2, val3, gradX, gradY, gradZ, magn; + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + /* boundary conditions */ + if (i >= N-1) val1 = 0.0f; else val1 = Refd[(N*M)*k + (i+1) + N*j]; + if (j >= M-1) val2 = 0.0f; else val2 = Refd[(N*M)*k + i + N*(j+1)]; + if (k >= Z-1) val3 = 0.0f; else val3 = Refd[(N*M)*(k+1) + i + N*j]; + + gradX = val1 - Refd[index]; + gradY = val2 - Refd[index]; + gradZ = val3 - Refd[index]; + magn = pow(gradX,2) + pow(gradY,2) + pow(gradZ,2); + magn = sqrt(magn + pow(eta,2)); + Refd_x[index] = gradX/magn; + Refd_y[index] = gradY/magn; + Refd_z[index] = gradZ/magn; + } + return; +} + +__global__ void ProjectVect_func3D(float *R1, float *R2, float *R3, float *Refd_x, float *Refd_y, float *Refd_z, int N, int M, int Z, int ImSize) +{ + + float in_prod; + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + in_prod = R1[index]*Refd_x[index] + R2[index]*Refd_y[index] + R3[index]*Refd_z[index]; /* calculate inner product */ + + R1[index] = R1[index] - in_prod*Refd_x[index]; + R2[index] = R2[index] - in_prod*Refd_y[index]; + R3[index] = R3[index] - in_prod*Refd_z[index]; + } + return; +} + + +__global__ void Obj_func3D_kernel(float *Ad, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float lambda) +{ + + float val1,val2,val3; + + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + if (i <= 0) {val1 = 0.0f;} else {val1 = R1[(N*M)*(k) + (i-1) + N*j];} + if (j <= 0) {val2 = 0.0f;} else {val2 = R2[(N*M)*(k) + i + N*(j-1)];} + if (k <= 0) {val3 = 0.0f;} else {val3 = R3[(N*M)*(k-1) + i + N*j];} + //Write final result to global memory + D[index] = Ad[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3); + } + return; +} + +__global__ void Grad_func3D_kernel(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *Refd_x, float *Refd_y, float *Refd_z, int N, int M, int Z, int ImSize, float multip) +{ + + float val1,val2,val3,in_prod; + + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + /* boundary conditions */ + if (i >= N-1) val1 = 0.0f; else val1 = D[index] - D[(N*M)*(k) + (i+1) + N*j]; + if (j >= M-1) val2 = 0.0f; else val2 = D[index] - D[(N*M)*(k) + i + N*(j+1)]; + if (k >= Z-1) val3 = 0.0f; else val3 = D[index] - D[(N*M)*(k+1) + i + N*j]; + + in_prod = val1*Refd_x[index] + val2*Refd_y[index] + val3*Refd_z[index]; /* calculate inner product */ + val1 = val1 - in_prod*Refd_x[index]; + val2 = val2 - in_prod*Refd_y[index]; + val3 = val3 - in_prod*Refd_z[index]; + + //Write final result to global memory + P1[index] = R1[index] + multip*val1; + P2[index] = R2[index] + multip*val2; + P3[index] = R3[index] + multip*val3; + } + return; +} + +__global__ void Proj_func3D_iso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize) +{ + + float denom,sq_denom; + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + denom = pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2); + + if (denom > 1.0f) { + sq_denom = 1.0f/sqrt(denom); + P1[index] = P1[index]*sq_denom; + P2[index] = P2[index]*sq_denom; + P3[index] = P3[index]*sq_denom; + } + } + return; +} + +__global__ void Proj_func3D_aniso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize) +{ + + float val1, val2, val3; + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + val1 = abs(P1[index]); + val2 = abs(P2[index]); + val3 = abs(P3[index]); + if (val1 < 1.0f) {val1 = 1.0f;} + if (val2 < 1.0f) {val2 = 1.0f;} + if (val3 < 1.0f) {val3 = 1.0f;} + P1[index] = P1[index]/val1; + P2[index] = P2[index]/val2; + P3[index] = P3[index]/val3; + } + return; +} + + +__global__ void Rupd_func3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize) +{ + //calculate each thread global index + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if ((i < N) && (j < M) && (k < Z)) { + R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]); + R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]); + R3[index] = P3[index] + multip2*(P3[index] - P3_old[index]); + } + return; +} + +__global__ void nonneg3D_kernel(float* Output, int N, int M, int Z, int num_total) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if (index < num_total) { + if (Output[index] < 0.0f) Output[index] = 0.0f; + } +} + +__global__ void copy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if (index < num_total) { + Output[index] = Input[index]; + } +} + +__global__ void ResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + int index = (N*M)*k + i + N*j; + + if (index < num_total) { + Output[index] = Input1[index] - Input2[index]; + } +} +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ + +////////////MAIN HOST FUNCTION /////////////// +extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ) +{ + int deviceCount = -1; // number of devices + cudaGetDeviceCount(&deviceCount); + if (deviceCount == 0) { + fprintf(stderr, "No CUDA devices found\n"); + return; + } + + int count = 0, i; + float re, multip,multip2; + float tk = 1.0f; + float tkp1=1.0f; + + if (dimZ <= 1) { + /*2D verson*/ + int ImSize = dimX*dimY; + float *d_input, *d_update=NULL, *d_update_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL, *InputRef_x=NULL, *InputRef_y=NULL, *d_InputRef=NULL; + + dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); + dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D)); + + /*allocate space for images on device*/ + checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) ); + if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&d_InputRef,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&InputRef_x,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&InputRef_y,ImSize*sizeof(float)) ); + + checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(d_InputRef,InputRef,ImSize*sizeof(float),cudaMemcpyHostToDevice)); + + cudaMemset(P1, 0, ImSize*sizeof(float)); + cudaMemset(P2, 0, ImSize*sizeof(float)); + cudaMemset(P1_prev, 0, ImSize*sizeof(float)); + cudaMemset(P2_prev, 0, ImSize*sizeof(float)); + cudaMemset(R1, 0, ImSize*sizeof(float)); + cudaMemset(R2, 0, ImSize*sizeof(float)); + cudaMemset(InputRef_x, 0, ImSize*sizeof(float)); + cudaMemset(InputRef_y, 0, ImSize*sizeof(float)); + + /******************** Run CUDA 2D kernel here ********************/ + multip = (1.0f/(8.0f*lambdaPar)); + /* calculate gradient vectors for the reference */ + GradNorm_func2D<<>>(d_InputRef, InputRef_x, InputRef_y, eta, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + /* The main kernel */ + for (i = 0; i < iter; i++) { + + /*projects a 2D vector field R-1,2 onto the orthogonal complement of another 2D vector field InputRef_xy*/ + ProjectVect_func2D<<>>(R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + /* computing the gradient of the objective function */ + Obj_func2D_kernel<<>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambdaPar); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + if (nonneg != 0) { + nonneg2D_kernel<<>>(d_update, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); } + + /*Taking a step towards minus of the gradient*/ + Grad_func2D_kernel<<>>(P1, P2, d_update, R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize, multip); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + /* projection step */ + if (methodTV == 0) Proj_func2D_iso_kernel<<>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/ + else Proj_func2D_aniso_kernel<<>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/ + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; + multip2 = ((tk-1.0f)/tkp1); + + Rupd_func2D_kernel<<>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + if (epsil != 0.0f) { + /* calculate norm - stopping rules using the Thrust library */ + ResidCalc2D_kernel<<>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + thrust::device_vector d_vec(P1_prev, P1_prev + ImSize); + float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus())); + thrust::device_vector d_vec2(d_update, d_update + ImSize); + float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus())); + + re = (reduction/reduction2); + if (re < epsil) count++; + if (count > 4) break; + + copy_kernel2D<<>>(d_update, d_update_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + } + + copy_kernel2D<<>>(P1, P1_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + copy_kernel2D<<>>(P2, P2_prev, dimX, dimY, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tk = tkp1; + } + if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", i); + /***************************************************************/ + //copy result matrix from device to host memory + cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost); + + cudaFree(d_input); + cudaFree(d_update); + if (epsil != 0.0f) cudaFree(d_update_prev); + cudaFree(P1); + cudaFree(P2); + cudaFree(P1_prev); + cudaFree(P2_prev); + cudaFree(R1); + cudaFree(R2); + + cudaFree(d_InputRef); + cudaFree(InputRef_x); + cudaFree(InputRef_y); + } + else { + /*3D verson*/ + int ImSize = dimX*dimY*dimZ; + float *d_input, *d_update=NULL, *d_update_prev, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL, *InputRef_x=NULL, *InputRef_y=NULL, *InputRef_z=NULL, *d_InputRef=NULL; + + dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE); + dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE)); + + /*allocate space for images on device*/ + checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) ); + if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P3,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&P3_prev,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&R3,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&d_InputRef,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&InputRef_x,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&InputRef_y,ImSize*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&InputRef_z,ImSize*sizeof(float)) ); + + checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice)); + checkCudaErrors( cudaMemcpy(d_InputRef,InputRef,ImSize*sizeof(float),cudaMemcpyHostToDevice)); + + cudaMemset(P1, 0, ImSize*sizeof(float)); + cudaMemset(P2, 0, ImSize*sizeof(float)); + cudaMemset(P3, 0, ImSize*sizeof(float)); + cudaMemset(P1_prev, 0, ImSize*sizeof(float)); + cudaMemset(P2_prev, 0, ImSize*sizeof(float)); + cudaMemset(P3_prev, 0, ImSize*sizeof(float)); + cudaMemset(R1, 0, ImSize*sizeof(float)); + cudaMemset(R2, 0, ImSize*sizeof(float)); + cudaMemset(R3, 0, ImSize*sizeof(float)); + cudaMemset(InputRef_x, 0, ImSize*sizeof(float)); + cudaMemset(InputRef_y, 0, ImSize*sizeof(float)); + cudaMemset(InputRef_z, 0, ImSize*sizeof(float)); + + /********************** Run CUDA 3D kernel here ********************/ + multip = (1.0f/(26.0f*lambdaPar)); + /* calculate gradient vectors for the reference */ + GradNorm_func3D<<>>(d_InputRef, InputRef_x, InputRef_y, InputRef_z, eta, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + /* The main kernel */ + for (i = 0; i < iter; i++) { + + /*projects a 3D vector field R-1,2,3 onto the orthogonal complement of another 3D vector field InputRef_xyz*/ + ProjectVect_func3D<<>>(R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + /* computing the gradient of the objective function */ + Obj_func3D_kernel<<>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambdaPar); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + if (nonneg != 0) { + nonneg3D_kernel<<>>(d_update, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); } + + /*Taking a step towards minus of the gradient*/ + Grad_func3D_kernel<<>>(P1, P2, P3, d_update, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize, multip); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + /* projection step */ + if (methodTV == 0) Proj_func3D_iso_kernel<<>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */ + else Proj_func3D_aniso_kernel<<>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */ + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; + multip2 = ((tk-1.0f)/tkp1); + + Rupd_func3D_kernel<<>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + if (epsil != 0.0f) { + /* calculate norm - stopping rules using the Thrust library */ + ResidCalc3D_kernel<<>>(d_update, d_update_prev, P1_prev, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + thrust::device_vector d_vec(P1_prev, P1_prev + ImSize); + float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus())); + thrust::device_vector d_vec2(d_update, d_update + ImSize); + float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus())); + + re = (reduction/reduction2); + if (re < epsil) count++; + if (count > 4) break; + + copy_kernel3D<<>>(d_update, d_update_prev, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + } + + copy_kernel3D<<>>(P1, P1_prev, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + copy_kernel3D<<>>(P2, P2_prev, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + copy_kernel3D<<>>(P3, P3_prev, dimX, dimY, dimZ, ImSize); + checkCudaErrors( cudaDeviceSynchronize() ); + checkCudaErrors(cudaPeekAtLastError() ); + + tk = tkp1; + } + if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", i); + /***************************************************************/ + //copy result matrix from device to host memory + cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost); + + cudaFree(d_input); + cudaFree(d_update); + if (epsil != 0.0f) cudaFree(d_update_prev); + cudaFree(P1); + cudaFree(P2); + cudaFree(P3); + cudaFree(P1_prev); + cudaFree(P2_prev); + cudaFree(P3_prev); + cudaFree(R1); + cudaFree(R2); + cudaFree(R3); + cudaFree(InputRef_x); + cudaFree(InputRef_y); + cudaFree(InputRef_z); + cudaFree(d_InputRef); + } + cudaDeviceReset(); +} diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.h b/Core/regularisers_GPU/dTV_FGP_GPU_core.h new file mode 100644 index 0000000..b906636 --- /dev/null +++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.h @@ -0,0 +1,10 @@ +#include +#include +#include + +#ifndef _dTV_FGP_GPU_ +#define _dTV_FGP_GPU_ + +extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); + +#endif diff --git a/Readme.md b/Readme.md index 056ade5..31d03a1 100644 --- a/Readme.md +++ b/Readme.md @@ -18,6 +18,7 @@ can also be used as image denoising iterative filters. The core modules are writ 2. Fast-Gradient-Projection (FGP) Total Variation [2D/3D GPU/CPU]; (Ref. 2) ### Multi-channel +1. Fast-Gradient-Projection (FGP) Directional Total Variation [2D/3D GPU/CPU]; (Ref. 4,2) ## Installation: @@ -43,6 +44,7 @@ can also be used as image denoising iterative filters. The core modules are writ 1. Rudin, L.I., Osher, S. and Fatemi, E., 1992. Nonlinear total variation based noise removal algorithms. Physica D: nonlinear phenomena, 60(1-4), pp.259-268. 2. Beck, A. and Teboulle, M., 2009. Fast gradient-based algorithms for constrained total variation image denoising and deblurring problems. IEEE Transactions on Image Processing, 18(11), pp.2419-2434. 3. Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590. +4. Ehrhardt, M.J. and Betcke, M.M., 2016. Multicontrast MRI reconstruction with structure-guided total variation. SIAM Journal on Imaging Sciences, 9(3), pp.1084-1106. ### License: [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) diff --git a/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m b/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m index 71082e7..dc49d9c 100644 --- a/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m +++ b/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m @@ -1,5 +1,6 @@ % Volume (3D) denoising demo using CCPi-RGL - +clear +close all addpath('../mex_compile/installed'); addpath('../../../data/'); @@ -14,31 +15,65 @@ vol3D(vol3D < 0) = 0; figure; imshow(vol3D(:,:,15), [0 1]); title('Noisy image'); %% -fprintf('Denoise using ROF-TV model (CPU) \n'); +fprintf('Denoise a volume using the ROF-TV model (CPU) \n'); lambda_rof = 0.03; % regularisation parameter tau_rof = 0.0025; % time-marching constant iter_rof = 300; % number of ROF iterations tic; u_rof = ROF_TV(single(vol3D), lambda_rof, iter_rof, tau_rof); toc; figure; imshow(u_rof(:,:,15), [0 1]); title('ROF-TV denoised volume (CPU)'); %% -% fprintf('Denoise using ROF-TV model (GPU) \n'); +% fprintf('Denoise a volume using the ROF-TV model (GPU) \n'); % lambda_rof = 0.03; % regularisation parameter % tau_rof = 0.0025; % time-marching constant % iter_rof = 300; % number of ROF iterations % tic; u_rofG = ROF_TV_GPU(single(vol3D), lambda_rof, iter_rof, tau_rof); toc; % figure; imshow(u_rofG(:,:,15), [0 1]); title('ROF-TV denoised volume (GPU)'); %% -fprintf('Denoise using FGP-TV model (CPU) \n'); +fprintf('Denoise a volume using the FGP-TV model (CPU) \n'); lambda_fgp = 0.03; % regularisation parameter iter_fgp = 300; % number of FGP iterations epsil_tol = 1.0e-05; % tolerance tic; u_fgp = FGP_TV(single(vol3D), lambda_fgp, iter_fgp, epsil_tol); toc; figure; imshow(u_fgp(:,:,15), [0 1]); title('FGP-TV denoised volume (CPU)'); %% -% fprintf('Denoise using FGP-TV model (GPU) \n'); +% fprintf('Denoise a volume using the FGP-TV model (GPU) \n'); % lambda_fgp = 0.03; % regularisation parameter % iter_fgp = 300; % number of FGP iterations % epsil_tol = 1.0e-05; % tolerance % tic; u_fgpG = FGP_TV_GPU(single(vol3D), lambda_fgp, iter_fgp, epsil_tol); toc; % figure; imshow(u_fgpG(:,:,15), [0 1]); title('FGP-TV denoised volume (GPU)'); %% +fprintf('Denoise a volume using the FGP-dTV model (CPU) \n'); + +% create another volume (reference) with slightly less amount of noise +vol3D_ref = zeros(N,N,slices, 'single'); +for i = 1:slices +vol3D_ref(:,:,i) = Im + .01*randn(size(Im)); +end +vol3D_ref(vol3D_ref < 0) = 0; +% vol3D_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV) + +lambda_fgp = 0.03; % regularisation parameter +iter_fgp = 300; % number of FGP iterations +epsil_tol = 1.0e-05; % tolerance +eta = 0.2; % Reference image gradient smoothing constant +tic; u_fgp_dtv = FGP_dTV(single(vol3D), single(vol3D_ref), lambda_fgp, iter_fgp, epsil_tol, eta); toc; +figure; imshow(u_fgp_dtv(:,:,15), [0 1]); title('FGP-dTV denoised volume (CPU)'); +%% +fprintf('Denoise a volume using the FGP-dTV model (GPU) \n'); + +% create another volume (reference) with slightly less amount of noise +vol3D_ref = zeros(N,N,slices, 'single'); +for i = 1:slices +vol3D_ref(:,:,i) = Im + .01*randn(size(Im)); +end +vol3D_ref(vol3D_ref < 0) = 0; +% vol3D_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV) + +lambda_fgp = 0.03; % regularisation parameter +iter_fgp = 300; % number of FGP iterations +epsil_tol = 1.0e-05; % tolerance +eta = 0.2; % Reference image gradient smoothing constant +tic; u_fgp_dtv_g = FGP_dTV_GPU(single(vol3D), single(vol3D_ref), lambda_fgp, iter_fgp, epsil_tol, eta); toc; +figure; imshow(u_fgp_dtv_g(:,:,15), [0 1]); title('FGP-dTV denoised volume (GPU)'); +%% \ No newline at end of file diff --git a/Wrappers/Matlab/demos/demoMatlab_denoise.m b/Wrappers/Matlab/demos/demoMatlab_denoise.m index 7f87fbb..145f2ff 100644 --- a/Wrappers/Matlab/demos/demoMatlab_denoise.m +++ b/Wrappers/Matlab/demos/demoMatlab_denoise.m @@ -1,5 +1,6 @@ % Image (2D) denoising demo using CCPi-RGL - +clear +close all addpath('../mex_compile/installed'); addpath('../../../data/'); @@ -8,31 +9,55 @@ u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0; figure; imshow(u0, [0 1]); title('Noisy image'); %% -fprintf('Denoise using ROF-TV model (CPU) \n'); +fprintf('Denoise using the ROF-TV model (CPU) \n'); lambda_rof = 0.03; % regularisation parameter tau_rof = 0.0025; % time-marching constant iter_rof = 2000; % number of ROF iterations tic; u_rof = ROF_TV(single(u0), lambda_rof, iter_rof, tau_rof); toc; figure; imshow(u_rof, [0 1]); title('ROF-TV denoised image (CPU)'); %% -% fprintf('Denoise using ROF-TV model (GPU) \n'); +% fprintf('Denoise using the ROF-TV model (GPU) \n'); % lambda_rof = 0.03; % regularisation parameter % tau_rof = 0.0025; % time-marching constant % iter_rof = 2000; % number of ROF iterations % tic; u_rofG = ROF_TV_GPU(single(u0), lambda_rof, iter_rof, tau_rof); toc; % figure; imshow(u_rofG, [0 1]); title('ROF-TV denoised image (GPU)'); %% -fprintf('Denoise using FGP-TV model (CPU) \n'); +fprintf('Denoise using the FGP-TV model (CPU) \n'); lambda_fgp = 0.03; % regularisation parameter iter_fgp = 1000; % number of FGP iterations -epsil_tol = 1.0e-05; % tolerance +epsil_tol = 1.0e-06; % tolerance tic; u_fgp = FGP_TV(single(u0), lambda_fgp, iter_fgp, epsil_tol); toc; figure; imshow(u_fgp, [0 1]); title('FGP-TV denoised image (CPU)'); %% -% fprintf('Denoise using FGP-TV model (GPU) \n'); +% fprintf('Denoise using the FGP-TV model (GPU) \n'); % lambda_fgp = 0.03; % regularisation parameter % iter_fgp = 1000; % number of FGP iterations % epsil_tol = 1.0e-05; % tolerance % tic; u_fgpG = FGP_TV_GPU(single(u0), lambda_fgp, iter_fgp, epsil_tol); toc; % figure; imshow(u_fgpG, [0 1]); title('FGP-TV denoised image (GPU)'); %% +fprintf('Denoise using the FGP-dTV model (CPU) \n'); +% create another image (reference) with slightly less amount of noise +u_ref = Im + .01*randn(size(Im)); u_ref(u_ref < 0) = 0; +% u_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV) + +lambda_fgp = 0.03; % regularisation parameter +iter_fgp = 1000; % number of FGP iterations +epsil_tol = 1.0e-06; % tolerance +eta = 0.2; % Reference image gradient smoothing constant +tic; u_fgp_dtv = FGP_dTV(single(u0), single(u_ref), lambda_fgp, iter_fgp, epsil_tol, eta); toc; +figure; imshow(u_fgp_dtv, [0 1]); title('FGP-dTV denoised image (CPU)'); +%% +% fprintf('Denoise using the FGP-dTV model (GPU) \n'); +% % create another image (reference) with slightly less amount of noise +% u_ref = Im + .01*randn(size(Im)); u_ref(u_ref < 0) = 0; +% % u_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV) +% +% lambda_fgp = 0.03; % regularisation parameter +% iter_fgp = 1000; % number of FGP iterations +% epsil_tol = 1.0e-06; % tolerance +% eta = 0.2; % Reference image gradient smoothing constant +% tic; u_fgp_dtvG = FGP_dTV_GPU(single(u0), single(u_ref), lambda_fgp, iter_fgp, epsil_tol, eta); toc; +% figure; imshow(u_fgp_dtvG, [0 1]); title('FGP-dTV denoised image (GPU)'); +%% diff --git a/Wrappers/Matlab/mex_compile/compileCPU_mex.m b/Wrappers/Matlab/mex_compile/compileCPU_mex.m index 8da81ad..71f345a 100644 --- a/Wrappers/Matlab/mex_compile/compileCPU_mex.m +++ b/Wrappers/Matlab/mex_compile/compileCPU_mex.m @@ -11,7 +11,10 @@ movefile ROF_TV.mex* ../installed/ mex FGP_TV.c FGP_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" movefile FGP_TV.mex* ../installed/ -delete ROF_TV_core* FGP_TV_core* utils.c utils.h CCPiDefines.h +mex FGP_dTV.c FGP_dTV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" +movefile FGP_dTV.mex* ../installed/ + +delete ROF_TV_core* FGP_TV_core* FGP_dTV_core* utils* CCPiDefines.h fprintf('%s \n', 'All successfully compiled!'); diff --git a/Wrappers/Matlab/mex_compile/compileGPU_mex.m b/Wrappers/Matlab/mex_compile/compileGPU_mex.m index 45236fa..f58e9bc 100644 --- a/Wrappers/Matlab/mex_compile/compileGPU_mex.m +++ b/Wrappers/Matlab/mex_compile/compileGPU_mex.m @@ -23,7 +23,11 @@ movefile ROF_TV_GPU.mex* ../installed/ mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu FGP_TV_GPU.cpp TV_FGP_GPU_core.o movefile FGP_TV_GPU.mex* ../installed/ -delete TV_ROF_GPU_core* TV_FGP_GPU_core* CCPiDefines.h +!/usr/local/cuda/bin/nvcc -O0 -c dTV_FGP_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/ +mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu FGP_dTV_GPU.cpp dTV_FGP_GPU_core.o +movefile FGP_dTV_GPU.mex* ../installed/ + +delete TV_ROF_GPU_core* TV_FGP_GPU_core* dTV_FGP_GPU_core* CCPiDefines.h fprintf('%s \n', 'All successfully compiled!'); cd ../../ diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_TV.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_TV.c index ba06cc7..aae1cb7 100644 --- a/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_TV.c +++ b/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_TV.c @@ -52,7 +52,7 @@ void mexFunction( dim_array = mxGetDimensions(prhs[0]); /*Handling Matlab input data*/ - if ((nrhs < 2) || (nrhs > 7)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), nonnegativity switch, print switch"); + if ((nrhs < 2) || (nrhs > 7)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter, Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), nonnegativity switch, print switch"); Input = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */ lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */ diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_TV.c~ b/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_TV.c~ deleted file mode 100644 index 30d61cd..0000000 --- a/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_TV.c~ +++ /dev/null @@ -1,91 +0,0 @@ -/* - * This work is part of the Core Imaging Library developed by - * Visual Analytics and Imaging System Group of the Science Technology - * Facilities Council, STFC - * - * Copyright 2017 Daniil Kazantsev - * Copyright 2017 Srikanth Nagella, Edoardo Pasca - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0 - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "matrix.h" -#include "mex.h" -#include "FGP_TV_core.h" - -/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case) - * - * Input Parameters: - * 1. Noisy image/volume - * 2. lambdaPar - regularization parameter - * 3. Number of iterations - * 4. eplsilon: tolerance constant - * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1) - * 6. nonneg: 'nonnegativity (0 is OFF by default) - * 7. print information: 0 (off) or 1 (on) - * - * Output: - * [1] Filtered/regularized image - * - * This function is based on the Matlab's code and paper by - * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" - */ - - -void mexFunction( - int nlhs, mxArray *plhs[], - int nrhs, const mxArray *prhs[]) - -{ - int number_of_dims, iter, dimX, dimY, dimZ, methTV, printswitch; - const int *dim_array; - float *Input, *Output, lambda, epsil; - - number_of_dims = mxGetNumberOfDimensions(prhs[0]); - dim_array = mxGetDimensions(prhs[0]); - - /*Handling Matlab input data*/ - if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), print switch"); - - Input = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */ - lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */ - iter = 300; /* default iterations number */ - epsil = 0.0001; /* default tolerance constant */ - methTV = 0; /* default isotropic TV penalty */ - printswitch = 0; /*default print is switched off - 0 */ - - if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); } - - if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6)) iter = (int) mxGetScalar(prhs[2]); /* iterations number */ - if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6)) epsil = (float) mxGetScalar(prhs[3]); /* tolerance constant */ - if ((nrhs == 5) || (nrhs == 6)) { - char *penalty_type; - penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */ - if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',"); - if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */ - mxFree(penalty_type); - } - if (nrhs == 6) { - printswitch = (int) mxGetScalar(prhs[5]); - if ((printswitch != 0) || (printswitch != 1)) {mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0"); } - } - - /*Handling Matlab output data*/ - dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; - - if (number_of_dims == 2) { - dimZ = 1; /*2D case*/ - Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - } - if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - - - TV_FGP_CPU_main(Input, Output, lambda, iter, epsil, methTV, nonneg, printswitch, dimX, dimY, dimZ) -} diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c new file mode 100644 index 0000000..bb868c7 --- /dev/null +++ b/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c @@ -0,0 +1,113 @@ +/* + * This work is part of the Core Imaging Library developed by + * Visual Analytics and Imaging System Group of the Science Technology + * Facilities Council, STFC + * + * Copyright 2017 Daniil Kazantsev + * Copyright 2017 Srikanth Nagella, Edoardo Pasca + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "matrix.h" +#include "mex.h" +#include "FGP_dTV_core.h" + +/* C-OMP implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case) + * which employs structural similarity of the level sets of two images/volumes, see [1,2] + * The current implementation updates image 1 while image 2 is being fixed. + * + * Input Parameters: + * 1. Noisy image/volume [REQUIRED] + * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED] + * 3. lambdaPar - regularization parameter [REQUIRED] + * 4. Number of iterations [OPTIONAL] + * 5. eplsilon: tolerance constant [OPTIONAL] + * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * + * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL] + * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL] + * 9. print information: 0 (off) or 1 (on) [OPTIONAL] + * + * Output: + * [1] Filtered/regularized image/volume + * + * This function is based on the Matlab's codes and papers by + * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" + * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106 + */ + + +void mexFunction( + int nlhs, mxArray *plhs[], + int nrhs, const mxArray *prhs[]) + +{ + int number_of_dims, iter, dimX, dimY, dimZ, methTV, printswitch, nonneg; + const int *dim_array; + const int *dim_array2; + float *Input, *InputRef, *Output=NULL, lambda, epsil, eta; + + number_of_dims = mxGetNumberOfDimensions(prhs[0]); + dim_array = mxGetDimensions(prhs[0]); + dim_array2 = mxGetDimensions(prhs[1]); + + /*Handling Matlab input data*/ + if ((nrhs < 3) || (nrhs > 9)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Reference(2D/3D), Regularization parameter, iterations number, tolerance, smoothing constant, penalty type ('iso' or 'l1'), nonnegativity switch, print switch"); + + Input = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */ + InputRef = (float *) mxGetData(prhs[1]); /* reference image (2D/3D) */ + lambda = (float) mxGetScalar(prhs[2]); /* regularization parameter */ + iter = 300; /* default iterations number */ + epsil = 0.0001; /* default tolerance constant */ + eta = 0.01; /* default smoothing constant */ + methTV = 0; /* default isotropic TV penalty */ + nonneg = 0; /* default nonnegativity switch, off - 0 */ + printswitch = 0; /*default print is switched, off - 0 */ + + + if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); } + if (mxGetClassID(prhs[1]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); } + + /*Handling Matlab output data*/ + dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; + if (number_of_dims == 2) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1])) mexErrMsgTxt("The input images have different dimensionalities");} + if (number_of_dims == 3) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1]) || (dimZ != dim_array2[2])) mexErrMsgTxt("The input volumes have different dimensionalities");} + + + if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9)) iter = (int) mxGetScalar(prhs[3]); /* iterations number */ + if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9)) epsil = (float) mxGetScalar(prhs[4]); /* tolerance constant */ + if ((nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9)) { + eta = (float) mxGetScalar(prhs[5]); /* smoothing constant for the gradient of InputRef */ + } + if ((nrhs == 7) || (nrhs == 8) || (nrhs == 9)) { + char *penalty_type; + penalty_type = mxArrayToString(prhs[6]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */ + if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',"); + if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */ + mxFree(penalty_type); + } + if ((nrhs == 8) || (nrhs == 9)) { + nonneg = (int) mxGetScalar(prhs[7]); + if ((nonneg != 0) && (nonneg != 1)) mexErrMsgTxt("Nonnegativity constraint can be enabled by choosing 1 or off - 0"); + } + if (nrhs == 9) { + printswitch = (int) mxGetScalar(prhs[8]); + if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0"); + } + + if (number_of_dims == 2) { + dimZ = 1; /*2D case*/ + Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + } + if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + + /* running the function */ + dTV_FGP_CPU_main(Input, InputRef, Output, lambda, iter, epsil, eta, methTV, nonneg, printswitch, dimX, dimY, dimZ); +} \ No newline at end of file diff --git a/Wrappers/Matlab/mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp b/Wrappers/Matlab/mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp new file mode 100644 index 0000000..5b80616 --- /dev/null +++ b/Wrappers/Matlab/mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp @@ -0,0 +1,111 @@ +/* + * This work is part of the Core Imaging Library developed by + * Visual Analytics and Imaging System Group of the Science Technology + * Facilities Council, STFC + * + * Copyright 2017 Daniil Kazantsev + * Copyright 2017 Srikanth Nagella, Edoardo Pasca + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "matrix.h" +#include "mex.h" +#include "dTV_FGP_GPU_core.h" + +/* CUDA implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case) + * which employs structural similarity of the level sets of two images/volumes, see [1,2] + * The current implementation updates image 1 while image 2 is being fixed. + * + * Input Parameters: + * 1. Noisy image/volume [REQUIRED] + * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED] + * 3. lambdaPar - regularization parameter [REQUIRED] + * 4. Number of iterations [OPTIONAL] + * 5. eplsilon: tolerance constant [OPTIONAL] + * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * + * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL] + * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL] + * 9. print information: 0 (off) or 1 (on) [OPTIONAL] + * + * Output: + * [1] Filtered/regularized image/volume + * + * This function is based on the Matlab's codes and papers by + * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" + * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106 + */ +void mexFunction( + int nlhs, mxArray *plhs[], + int nrhs, const mxArray *prhs[]) + +{ + int number_of_dims, iter, dimX, dimY, dimZ, methTV, printswitch, nonneg; + const int *dim_array; + const int *dim_array2; + float *Input, *InputRef, *Output=NULL, lambda, epsil, eta; + + number_of_dims = mxGetNumberOfDimensions(prhs[0]); + dim_array = mxGetDimensions(prhs[0]); + dim_array2 = mxGetDimensions(prhs[1]); + + /*Handling Matlab input data*/ + if ((nrhs < 3) || (nrhs > 9)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Reference(2D/3D), Regularization parameter, iterations number, tolerance, smoothing constant, penalty type ('iso' or 'l1'), nonnegativity switch, print switch"); + + Input = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */ + InputRef = (float *) mxGetData(prhs[1]); /* reference image (2D/3D) */ + lambda = (float) mxGetScalar(prhs[2]); /* regularization parameter */ + iter = 300; /* default iterations number */ + epsil = 0.0001; /* default tolerance constant */ + eta = 0.01; /* default smoothing constant */ + methTV = 0; /* default isotropic TV penalty */ + nonneg = 0; /* default nonnegativity switch, off - 0 */ + printswitch = 0; /*default print is switched, off - 0 */ + + + if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); } + if (mxGetClassID(prhs[1]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); } + + /*Handling Matlab output data*/ + dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; + if (number_of_dims == 2) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1])) mexErrMsgTxt("The input images have different dimensionalities");} + if (number_of_dims == 3) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1]) || (dimZ != dim_array2[2])) mexErrMsgTxt("The input volumes have different dimensionalities");} + + + if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9)) iter = (int) mxGetScalar(prhs[3]); /* iterations number */ + if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9)) epsil = (float) mxGetScalar(prhs[4]); /* tolerance constant */ + if ((nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9)) { + eta = (float) mxGetScalar(prhs[5]); /* smoothing constant for the gradient of InputRef */ + } + if ((nrhs == 7) || (nrhs == 8) || (nrhs == 9)) { + char *penalty_type; + penalty_type = mxArrayToString(prhs[6]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */ + if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',"); + if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */ + mxFree(penalty_type); + } + if ((nrhs == 8) || (nrhs == 9)) { + nonneg = (int) mxGetScalar(prhs[7]); + if ((nonneg != 0) && (nonneg != 1)) mexErrMsgTxt("Nonnegativity constraint can be enabled by choosing 1 or off - 0"); + } + if (nrhs == 9) { + printswitch = (int) mxGetScalar(prhs[8]); + if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0"); + } + + if (number_of_dims == 2) { + dimZ = 1; /*2D case*/ + Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + } + if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + + /* running the function */ + dTV_FGP_GPU_main(Input, InputRef, Output, lambda, iter, epsil, eta, methTV, nonneg, printswitch, dimX, dimY, dimZ); +} \ No newline at end of file diff --git a/Wrappers/Python/ccpi/filters/regularisers.py b/Wrappers/Python/ccpi/filters/regularisers.py index 039daab..c6723fa 100644 --- a/Wrappers/Python/ccpi/filters/regularisers.py +++ b/Wrappers/Python/ccpi/filters/regularisers.py @@ -2,8 +2,8 @@ script which assigns a proper device core function based on a flag ('cpu' or 'gpu') """ -from ccpi.filters.cpu_regularisers_cython import TV_ROF_CPU, TV_FGP_CPU -from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU +from ccpi.filters.cpu_regularisers_cython import TV_ROF_CPU, TV_FGP_CPU dTV_FGP_CPU +from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU dTV_FGP_GPU def ROF_TV(inputData, regularisation_parameter, iterations, time_marching_parameter,device='cpu'): @@ -42,3 +42,28 @@ def FGP_TV(inputData, regularisation_parameter,iterations, else: raise ValueError('Unknown device {0}. Expecting gpu or cpu'\ .format(device)) +def FGP_dTV(inputData, refdata, regularisation_parameter, iterations, + tolerance_param, eta_const, methodTV, nonneg, printM, device='cpu'): + if device == 'cpu': + return dTV_FGP_CPU(inputData, + refdata, + regularisation_parameter, + iterations, + tolerance_param, + eta_const, + methodTV, + nonneg, + printM) + elif device == 'gpu': + return dTV_FGP_GPU(inputData, + refdata, + regularisation_parameter, + iterations, + tolerance_param, + eta_const, + methodTV, + nonneg, + printM) + else: + raise ValueError('Unknown device {0}. Expecting gpu or cpu'\ + .format(device)) diff --git a/Wrappers/Python/conda-recipe/run_test.py b/Wrappers/Python/conda-recipe/run_test.py new file mode 100644 index 0000000..04bbd40 --- /dev/null +++ b/Wrappers/Python/conda-recipe/run_test.py @@ -0,0 +1,149 @@ +import unittest +import numpy as np +import os +from ccpi.filters.regularisers import ROF_TV, FGP_TV +import matplotlib.pyplot as plt + +def rmse(im1, im2): + rmse = np.sqrt(np.sum((im1 - im2) ** 2) / float(im1.size)) + return rmse + +class TestRegularisers(unittest.TestCase): + + def setUp(self): + pass + + def test_cpu_regularisers(self): + filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif") + + # read noiseless image + Im = plt.imread(filename) + Im = np.asarray(Im, dtype='float32') + + Im = Im/255 + tolerance = 1e-05 + rms_rof_exp = 0.006812507 #expected value for ROF model + rms_fgp_exp = 0.019152347 #expected value for FGP model + + # set parameters for ROF-TV + pars_rof_tv = {'algorithm': ROF_TV, \ + 'input' : Im,\ + 'regularisation_parameter':0.04,\ + 'number_of_iterations': 50,\ + 'time_marching_parameter': 0.0025 + } + # set parameters for FGP-TV + pars_fgp_tv = {'algorithm' : FGP_TV, \ + 'input' : Im,\ + 'regularisation_parameter':0.04, \ + 'number_of_iterations' :50 ,\ + 'tolerance_constant':1e-08,\ + 'methodTV': 0 ,\ + 'nonneg': 0 ,\ + 'printingOut': 0 + } + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + print ("_________testing ROF-TV (2D, CPU)__________") + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + res = True + rof_cpu = ROF_TV(pars_rof_tv['input'], + pars_rof_tv['regularisation_parameter'], + pars_rof_tv['number_of_iterations'], + pars_rof_tv['time_marching_parameter'],'cpu') + rms_rof = rmse(Im, rof_cpu) + # now compare obtained rms with the expected value + self.assertLess(abs(rms_rof-rms_rof_exp) , tolerance) + """ + if abs(rms_rof-self.rms_rof_exp) > self.tolerance: + raise TypeError('ROF-TV (2D, CPU) test FAILED') + else: + print ("test PASSED") + """ + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + print ("_________testing FGP-TV (2D, CPU)__________") + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + fgp_cpu = FGP_TV(pars_fgp_tv['input'], + pars_fgp_tv['regularisation_parameter'], + pars_fgp_tv['number_of_iterations'], + pars_fgp_tv['tolerance_constant'], + pars_fgp_tv['methodTV'], + pars_fgp_tv['nonneg'], + pars_fgp_tv['printingOut'],'cpu') + rms_fgp = rmse(Im, fgp_cpu) + # now compare obtained rms with the expected value + self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance) + """ + if abs(rms_fgp-self.rms_fgp_exp) > self.tolerance: + raise TypeError('FGP-TV (2D, CPU) test FAILED') + else: + print ("test PASSED") + """ + self.assertTrue(res) + def test_gpu_regularisers(self): + filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif") + + # read noiseless image + Im = plt.imread(filename) + Im = np.asarray(Im, dtype='float32') + + Im = Im/255 + tolerance = 1e-05 + rms_rof_exp = 0.006812507 #expected value for ROF model + rms_fgp_exp = 0.019152347 #expected value for FGP model + + # set parameters for ROF-TV + pars_rof_tv = {'algorithm': ROF_TV, \ + 'input' : Im,\ + 'regularisation_parameter':0.04,\ + 'number_of_iterations': 50,\ + 'time_marching_parameter': 0.0025 + } + # set parameters for FGP-TV + pars_fgp_tv = {'algorithm' : FGP_TV, \ + 'input' : Im,\ + 'regularisation_parameter':0.04, \ + 'number_of_iterations' :50 ,\ + 'tolerance_constant':1e-08,\ + 'methodTV': 0 ,\ + 'nonneg': 0 ,\ + 'printingOut': 0 + } + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + print ("_________testing ROF-TV (2D, GPU)__________") + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + res = True + rof_gpu = ROF_TV(pars_rof_tv['input'], + pars_rof_tv['regularisation_parameter'], + pars_rof_tv['number_of_iterations'], + pars_rof_tv['time_marching_parameter'],'gpu') + rms_rof = rmse(Im, rof_gpu) + # now compare obtained rms with the expected value + self.assertLess(abs(rms_rof-rms_rof_exp) , tolerance) + """ + if abs(rms_rof-self.rms_rof_exp) > self.tolerance: + raise TypeError('ROF-TV (2D, GPU) test FAILED') + else: + print ("test PASSED") + """ + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + print ("_________testing FGP-TV (2D, GPU)__________") + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + fgp_gpu = FGP_TV(pars_fgp_tv['input'], + pars_fgp_tv['regularisation_parameter'], + pars_fgp_tv['number_of_iterations'], + pars_fgp_tv['tolerance_constant'], + pars_fgp_tv['methodTV'], + pars_fgp_tv['nonneg'], + pars_fgp_tv['printingOut'],'gpu') + rms_fgp = rmse(Im, fgp_gpu) + # now compare obtained rms with the expected value + self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance) + """ + if abs(rms_fgp-self.rms_fgp_exp) > self.tolerance: + raise TypeError('FGP-TV (2D, GPU) test FAILED') + else: + print ("test PASSED") + """ + self.assertTrue(res) +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/Wrappers/Python/demos/demo_cpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_regularisers.py index 929f0af..fd3050c 100644 --- a/Wrappers/Python/demos/demo_cpu_regularisers.py +++ b/Wrappers/Python/demos/demo_cpu_regularisers.py @@ -12,7 +12,7 @@ import matplotlib.pyplot as plt import numpy as np import os import timeit -from ccpi.filters.regularisers import ROF_TV, FGP_TV +from ccpi.filters.regularisers import ROF_TV, FGP_TV, FGP_dTV from qualitymetrics import rmse ############################################################################### def printParametersToString(pars): @@ -39,9 +39,14 @@ perc = 0.05 u0 = Im + np.random.normal(loc = 0 , scale = perc * Im , size = np.shape(Im)) +u_ref = Im + np.random.normal(loc = 0 , + scale = 0.01 * Im , + size = np.shape(Im)) + # map the u0 u0->u0>0 # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1) u0 = u0.astype('float32') +u_ref = u_ref.astype('float32') print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") @@ -134,9 +139,64 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, imgplot = plt.imshow(fgp_cpu, cmap="gray") plt.title('{}'.format('CPU results')) + +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("_____________FGP-dTV (2D)__________________") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + +## plot +fig = plt.figure(3) +plt.suptitle('Performance of FGP-dTV regulariser using the CPU') +a=fig.add_subplot(1,2,1) +a.set_title('Noisy Image') +imgplot = plt.imshow(u0,cmap="gray") + +# set parameters +pars = {'algorithm' : FGP_dTV, \ + 'input' : u0,\ + 'refdata' : u_ref,\ + 'regularisation_parameter':0.04, \ + 'number_of_iterations' :2000 ,\ + 'tolerance_constant':1e-06,\ + 'eta_const':0.2,\ + 'methodTV': 0 ,\ + 'nonneg': 0 ,\ + 'printingOut': 0 + } + +print ("#############FGP dTV CPU####################") +start_time = timeit.default_timer() +fgp_dtv_cpu = FGP_dTV(pars['input'], + pars['refdata'], + pars['regularisation_parameter'], + pars['number_of_iterations'], + pars['tolerance_constant'], + pars['eta_const'], + pars['methodTV'], + pars['nonneg'], + pars['printingOut'],'cpu') + +rms = rmse(Im, fgp_dtv_cpu) +pars['rmse'] = rms + +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,2,2) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(fgp_dtv_cpu, cmap="gray") +plt.title('{}'.format('CPU results')) + + + # Uncomment to test 3D regularisation performance #%% -""" + N = 512 slices = 20 @@ -148,10 +208,12 @@ Im = Im/255 perc = 0.05 noisyVol = np.zeros((slices,N,N),dtype='float32') +noisyRef = np.zeros((slices,N,N),dtype='float32') idealVol = np.zeros((slices,N,N),dtype='float32') for i in range (slices): noisyVol[i,:,:] = Im + np.random.normal(loc = 0 , scale = perc * Im , size = np.shape(Im)) + noisyRef[i,:,:] = Im + np.random.normal(loc = 0 , scale = 0.01 * Im , size = np.shape(Im)) idealVol[i,:,:] = Im print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") @@ -159,7 +221,7 @@ print ("_______________ROF-TV (3D)_________________") print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") ## plot -fig = plt.figure(3) +fig = plt.figure(4) plt.suptitle('Performance of ROF-TV regulariser using the CPU') a=fig.add_subplot(1,2,1) a.set_title('Noisy 15th slice of a volume') @@ -199,7 +261,7 @@ print ("_______________FGP-TV (3D)__________________") print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") ## plot -fig = plt.figure(4) +fig = plt.figure(5) plt.suptitle('Performance of FGP-TV regulariser using the CPU') a=fig.add_subplot(1,2,1) a.set_title('Noisy Image') @@ -242,5 +304,58 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, verticalalignment='top', bbox=props) imgplot = plt.imshow(fgp_cpu3D[10,:,:], cmap="gray") plt.title('{}'.format('Recovered volume on the CPU using FGP-TV')) -""" -#%% \ No newline at end of file + + +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("_______________FGP-dTV (3D)__________________") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + +## plot +fig = plt.figure(6) +plt.suptitle('Performance of FGP-dTV regulariser using the CPU') +a=fig.add_subplot(1,2,1) +a.set_title('Noisy Image') +imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray") + +# set parameters +pars = {'algorithm' : FGP_dTV, \ + 'input' : noisyVol,\ + 'refdata' : noisyRef,\ + 'regularisation_parameter':0.04, \ + 'number_of_iterations' :300 ,\ + 'tolerance_constant':0.00001,\ + 'eta_const':0.2,\ + 'methodTV': 0 ,\ + 'nonneg': 0 ,\ + 'printingOut': 0 + } + +print ("#############FGP dTV CPU####################") +start_time = timeit.default_timer() +fgp_dTV_cpu3D = FGP_dTV(pars['input'], + pars['refdata'], + pars['regularisation_parameter'], + pars['number_of_iterations'], + pars['tolerance_constant'], + pars['eta_const'], + pars['methodTV'], + pars['nonneg'], + pars['printingOut'],'cpu') + + +rms = rmse(idealVol, fgp_dTV_cpu3D) +pars['rmse'] = rms + +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,2,2) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(fgp_dTV_cpu3D[10,:,:], cmap="gray") +plt.title('{}'.format('Recovered volume on the CPU using FGP-dTV')) +#%% diff --git a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py index cfe2e7d..aa1f865 100644 --- a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py +++ b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py @@ -12,7 +12,7 @@ import matplotlib.pyplot as plt import numpy as np import os import timeit -from ccpi.filters.regularisers import ROF_TV, FGP_TV +from ccpi.filters.regularisers import ROF_TV, FGP_TV, FGP_dTV from qualitymetrics import rmse ############################################################################### def printParametersToString(pars): @@ -39,10 +39,14 @@ perc = 0.05 u0 = Im + np.random.normal(loc = 0 , scale = perc * Im , size = np.shape(Im)) +u_ref = Im + np.random.normal(loc = 0 , + scale = 0.01 * Im , + size = np.shape(Im)) + # map the u0 u0->u0>0 # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1) u0 = u0.astype('float32') - +u_ref = u_ref.astype('float32') print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") print ("____________ROF-TV bench___________________") @@ -213,3 +217,96 @@ else: print ("Arrays match") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("____________FGP-dTV bench___________________") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + +## plot +fig = plt.figure(3) +plt.suptitle('Comparison of FGP-dTV regulariser using CPU and GPU implementations') +a=fig.add_subplot(1,4,1) +a.set_title('Noisy Image') +imgplot = plt.imshow(u0,cmap="gray") + +# set parameters +pars = {'algorithm' : FGP_dTV, \ + 'input' : u0,\ + 'refdata' : u_ref,\ + 'regularisation_parameter':0.04, \ + 'number_of_iterations' :2000 ,\ + 'tolerance_constant':1e-06,\ + 'eta_const':0.2,\ + 'methodTV': 0 ,\ + 'nonneg': 0 ,\ + 'printingOut': 0 + } + +print ("#############FGP dTV CPU####################") +start_time = timeit.default_timer() +fgp_dtv_cpu = FGP_dTV(pars['input'], + pars['refdata'], + pars['regularisation_parameter'], + pars['number_of_iterations'], + pars['tolerance_constant'], + pars['eta_const'], + pars['methodTV'], + pars['nonneg'], + pars['printingOut'],'cpu') + + +rms = rmse(Im, fgp_dtv_cpu) +pars['rmse'] = rms + +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,4,2) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(fgp_dtv_cpu, cmap="gray") +plt.title('{}'.format('CPU results')) + +print ("##############FGP dTV GPU##################") +start_time = timeit.default_timer() +fgp_dtv_gpu = FGP_dTV(pars['input'], + pars['refdata'], + pars['regularisation_parameter'], + pars['number_of_iterations'], + pars['tolerance_constant'], + pars['eta_const'], + pars['methodTV'], + pars['nonneg'], + pars['printingOut'],'gpu') +rms = rmse(Im, fgp_dtv_gpu) +pars['rmse'] = rms +pars['algorithm'] = FGP_dTV +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,4,3) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(fgp_dtv_gpu, cmap="gray") +plt.title('{}'.format('GPU results')) + + +print ("--------Compare the results--------") +tolerance = 1e-05 +diff_im = np.zeros(np.shape(rof_cpu)) +diff_im = abs(fgp_dtv_cpu - fgp_dtv_gpu) +diff_im[diff_im > tolerance] = 1 +a=fig.add_subplot(1,4,4) +imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray") +plt.title('{}'.format('Pixels larger threshold difference')) +if (diff_im.sum() > 1): + print ("Arrays do not match!") +else: + print ("Arrays match") diff --git a/Wrappers/Python/demos/demo_gpu_regularisers.py b/Wrappers/Python/demos/demo_gpu_regularisers.py index c496e1c..4759cc3 100644 --- a/Wrappers/Python/demos/demo_gpu_regularisers.py +++ b/Wrappers/Python/demos/demo_gpu_regularisers.py @@ -12,7 +12,7 @@ import matplotlib.pyplot as plt import numpy as np import os import timeit -from ccpi.filters.regularisers import ROF_TV, FGP_TV +from ccpi.filters.regularisers import ROF_TV, FGP_TV, FGP_dTV from qualitymetrics import rmse ############################################################################### def printParametersToString(pars): @@ -39,10 +39,13 @@ perc = 0.05 u0 = Im + np.random.normal(loc = 0 , scale = perc * Im , size = np.shape(Im)) +u_ref = Im + np.random.normal(loc = 0 , + scale = 0.01 * Im , + size = np.shape(Im)) # map the u0 u0->u0>0 # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1) u0 = u0.astype('float32') - +u_ref = u_ref.astype('float32') print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") print ("____________ROF-TV bench___________________") @@ -134,10 +137,62 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, imgplot = plt.imshow(fgp_gpu, cmap="gray") plt.title('{}'.format('GPU results')) +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("____________FGP-dTV bench___________________") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + +## plot +fig = plt.figure(3) +plt.suptitle('Performance of the FGP-dTV regulariser using the GPU') +a=fig.add_subplot(1,2,1) +a.set_title('Noisy Image') +imgplot = plt.imshow(u0,cmap="gray") + +# set parameters +pars = {'algorithm' : FGP_dTV, \ + 'input' : u0,\ + 'refdata' : u_ref,\ + 'regularisation_parameter':0.04, \ + 'number_of_iterations' :2000 ,\ + 'tolerance_constant':1e-06,\ + 'eta_const':0.2,\ + 'methodTV': 0 ,\ + 'nonneg': 0 ,\ + 'printingOut': 0 + } + +print ("##############FGP dTV GPU##################") +start_time = timeit.default_timer() +fgp_dtv_gpu = FGP_dTV(pars['input'], + pars['refdata'], + pars['regularisation_parameter'], + pars['number_of_iterations'], + pars['tolerance_constant'], + pars['eta_const'], + pars['methodTV'], + pars['nonneg'], + pars['printingOut'],'gpu') + +rms = rmse(Im, fgp_dtv_gpu) +pars['rmse'] = rms +pars['algorithm'] = FGP_dTV +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,2,2) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(fgp_dtv_gpu, cmap="gray") +plt.title('{}'.format('GPU results')) + # Uncomment to test 3D regularisation performance #%% -""" + N = 512 slices = 20 @@ -149,10 +204,12 @@ Im = Im/255 perc = 0.05 noisyVol = np.zeros((slices,N,N),dtype='float32') +noisyRef = np.zeros((slices,N,N),dtype='float32') idealVol = np.zeros((slices,N,N),dtype='float32') for i in range (slices): noisyVol[i,:,:] = Im + np.random.normal(loc = 0 , scale = perc * Im , size = np.shape(Im)) + noisyRef[i,:,:] = Im + np.random.normal(loc = 0 , scale = 0.01 * Im , size = np.shape(Im)) idealVol[i,:,:] = Im print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") @@ -160,7 +217,7 @@ print ("_______________ROF-TV (3D)_________________") print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") ## plot -fig = plt.figure(3) +fig = plt.figure(4) plt.suptitle('Performance of ROF-TV regulariser using the GPU') a=fig.add_subplot(1,2,1) a.set_title('Noisy 15th slice of a volume') @@ -200,7 +257,7 @@ print ("_______________FGP-TV (3D)__________________") print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") ## plot -fig = plt.figure(4) +fig = plt.figure(5) plt.suptitle('Performance of FGP-TV regulariser using the GPU') a=fig.add_subplot(1,2,1) a.set_title('Noisy Image') @@ -242,6 +299,58 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, verticalalignment='top', bbox=props) imgplot = plt.imshow(fgp_gpu3D[10,:,:], cmap="gray") plt.title('{}'.format('Recovered volume on the GPU using FGP-TV')) -#%% -""" + +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("_______________FGP-dTV (3D)________________") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + +## plot +fig = plt.figure(6) +plt.suptitle('Performance of FGP-dTV regulariser using the GPU') +a=fig.add_subplot(1,2,1) +a.set_title('Noisy Image') +imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray") + +# set parameters +pars = {'algorithm' : FGP_dTV, \ + 'input' : noisyVol,\ + 'refdata' : noisyRef,\ + 'regularisation_parameter':0.04, \ + 'number_of_iterations' :300 ,\ + 'tolerance_constant':0.00001,\ + 'eta_const':0.2,\ + 'methodTV': 0 ,\ + 'nonneg': 0 ,\ + 'printingOut': 0 + } + +print ("#############FGP TV GPU####################") +start_time = timeit.default_timer() +fgp_dTV_gpu3D = FGP_dTV(pars['input'], + pars['refdata'], + pars['regularisation_parameter'], + pars['number_of_iterations'], + pars['tolerance_constant'], + pars['eta_const'], + pars['methodTV'], + pars['nonneg'], + pars['printingOut'],'gpu') + +rms = rmse(idealVol, fgp_dTV_gpu3D) +pars['rmse'] = rms + +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,2,2) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(fgp_dTV_gpu3D[10,:,:], cmap="gray") +plt.title('{}'.format('Recovered volume on the GPU using FGP-dTV')) + +#%% diff --git a/Wrappers/Python/setup-regularisers.py.in b/Wrappers/Python/setup-regularisers.py.in index a1c1ab6..c7ebb5c 100644 --- a/Wrappers/Python/setup-regularisers.py.in +++ b/Wrappers/Python/setup-regularisers.py.in @@ -36,6 +36,7 @@ extra_include_dirs += [os.path.join(".." , ".." , "Core"), os.path.join(".." , ".." , "Core", "regularisers_CPU"), os.path.join(".." , ".." , "Core", "regularisers_GPU" , "TV_FGP" ) , os.path.join(".." , ".." , "Core", "regularisers_GPU" , "TV_ROF" ) , + os.path.join(".." , ".." , "Core", "regularisers_GPU" , "dTV_FGP" ) , "."] if platform.system() == 'Windows': diff --git a/Wrappers/Python/src/cpu_regularisers.pyx b/Wrappers/Python/src/cpu_regularisers.pyx index 0f08f7f..8f9185a 100644 --- a/Wrappers/Python/src/cpu_regularisers.pyx +++ b/Wrappers/Python/src/cpu_regularisers.pyx @@ -20,6 +20,7 @@ cimport numpy as np cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ); cdef extern float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); +cdef extern float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ); #****************************************************************# @@ -89,7 +90,7 @@ def TV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \ np.zeros([dims[0],dims[1]], dtype='float32') - #/* Run ROF iterations for 2D data */ + #/* Run FGP-TV iterations for 2D data */ TV_FGP_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, iterationsNumb, tolerance_param, @@ -115,7 +116,7 @@ def TV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \ np.zeros([dims[0], dims[1], dims[2]], dtype='float32') - #/* Run ROF iterations for 3D data */ + #/* Run FGP-TV iterations for 3D data */ TV_FGP_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, iterationsNumb, tolerance_param, @@ -124,3 +125,69 @@ def TV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, printM, dims[2], dims[1], dims[0]) return outputData +#****************************************************************# +#**************Directional Total-variation FGP ******************# +#****************************************************************# +#******** Directional TV Fast-Gradient-Projection (FGP)*********# +def dTV_FGP_CPU(inputData, refdata, regularisation_parameter, iterationsNumb, tolerance_param, eta_const, methodTV, nonneg, printM): + if inputData.ndim == 2: + return dTV_FGP_2D(inputData, refdata, regularisation_parameter, iterationsNumb, tolerance_param, eta_const, methodTV, nonneg, printM) + elif inputData.ndim == 3: + return dTV_FGP_3D(inputData, refdata, regularisation_parameter, iterationsNumb, tolerance_param, eta_const, methodTV, nonneg, printM) + +def dTV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, + np.ndarray[np.float32_t, ndim=2, mode="c"] refdata, + float regularisation_parameter, + int iterationsNumb, + float tolerance_param, + float eta_const, + int methodTV, + int nonneg, + int printM): + + cdef long dims[2] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + + cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \ + np.zeros([dims[0],dims[1]], dtype='float32') + + #/* Run FGP-dTV iterations for 2D data */ + dTV_FGP_CPU_main(&inputData[0,0], &refdata[0,0], &outputData[0,0], regularisation_parameter, + iterationsNumb, + tolerance_param, + methodTV, + eta_const, + nonneg, + printM, + dims[0], dims[1], 1) + + return outputData + +def dTV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, + np.ndarray[np.float32_t, ndim=3, mode="c"] refdata, + float regularisation_parameter, + int iterationsNumb, + float tolerance_param, + float eta_const, + int methodTV, + int nonneg, + int printM): + cdef long dims[3] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + dims[2] = inputData.shape[2] + + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \ + np.zeros([dims[0], dims[1], dims[2]], dtype='float32') + + #/* Run FGP-dTV iterations for 3D data */ + dTV_FGP_CPU_main(&inputData[0,0,0], &refdata[0,0,0], &outputData[0,0,0], regularisation_parameter, + iterationsNumb, + tolerance_param, + eta_const, + methodTV, + nonneg, + printM, + dims[2], dims[1], dims[0]) + return outputData diff --git a/Wrappers/Python/src/gpu_regularisers.pyx b/Wrappers/Python/src/gpu_regularisers.pyx index ea746d3..4a14f69 100644 --- a/Wrappers/Python/src/gpu_regularisers.pyx +++ b/Wrappers/Python/src/gpu_regularisers.pyx @@ -20,6 +20,7 @@ cimport numpy as np cdef extern void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); cdef extern void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z); +cdef extern void dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z); # Total-variation Rudin-Osher-Fatemi (ROF) def TV_ROF_GPU(inputData, @@ -61,7 +62,36 @@ def TV_FGP_GPU(inputData, methodTV, nonneg, printM) - +# Directional Total-variation Fast-Gradient-Projection (FGP) +def dTV_FGP_GPU(inputData, + refdata, + regularisation_parameter, + iterations, + tolerance_param, + eta_const, + methodTV, + nonneg, + printM): + if inputData.ndim == 2: + return FGPdTV2D(inputData, + refdata, + regularisation_parameter, + iterations, + tolerance_param, + eta_const, + methodTV, + nonneg, + printM) + elif inputData.ndim == 3: + return FGPdTV3D(inputData, + refdata, + regularisation_parameter, + iterations, + tolerance_param, + eta_const, + methodTV, + nonneg, + printM) #****************************************************************# #********************** Total-variation ROF *********************# #****************************************************************# @@ -167,4 +197,68 @@ def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, printM, dims[2], dims[1], dims[0]); - return outputData + return outputData + +#****************************************************************# +#**************Directional Total-variation FGP ******************# +#****************************************************************# +#******** Directional TV Fast-Gradient-Projection (FGP)*********# +def FGPdTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, + np.ndarray[np.float32_t, ndim=3, mode="c"] refdata, + float regularisation_parameter, + int iterations, + float tolerance_param, + float eta_const, + int methodTV, + int nonneg, + int printM): + + cdef long dims[2] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + + cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \ + np.zeros([dims[0],dims[1]], dtype='float32') + + # Running CUDA code here + dTV_FGP_GPU_main(&inputData[0,0], &refdata[0,0], &outputData[0,0], + regularisation_parameter, + iterations, + tolerance_param, + eta_const, + methodTV, + nonneg, + printM, + dims[0], dims[1], 1); + + return outputData + +def FGPdTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, + np.ndarray[np.float32_t, ndim=3, mode="c"] refdata, + float regularisation_parameter, + int iterations, + float tolerance_param, + float eta_const, + int methodTV, + int nonneg, + int printM): + + cdef long dims[3] + dims[0] = inputData.shape[0] + dims[1] = inputData.shape[1] + dims[2] = inputData.shape[2] + + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \ + np.zeros([dims[0],dims[1],dims[2]], dtype='float32') + + # Running CUDA code here + dTV_FGP_GPU_main(&inputData[0,0,0], &refdata[0,0,0], &outputData[0,0,0], + regularisation_parameter , + iterations, + tolerance_param, + eta_const, + methodTV, + nonneg, + printM, + dims[2], dims[1], dims[0]); + return outputData diff --git a/Wrappers/Python/test/__pycache__/metrics.cpython-35.pyc b/Wrappers/Python/test/__pycache__/metrics.cpython-35.pyc deleted file mode 100644 index 2196a53..0000000 Binary files a/Wrappers/Python/test/__pycache__/metrics.cpython-35.pyc and /dev/null differ diff --git a/Wrappers/Python/test/run_test.py b/Wrappers/Python/test/run_test.py deleted file mode 100644 index 04bbd40..0000000 --- a/Wrappers/Python/test/run_test.py +++ /dev/null @@ -1,149 +0,0 @@ -import unittest -import numpy as np -import os -from ccpi.filters.regularisers import ROF_TV, FGP_TV -import matplotlib.pyplot as plt - -def rmse(im1, im2): - rmse = np.sqrt(np.sum((im1 - im2) ** 2) / float(im1.size)) - return rmse - -class TestRegularisers(unittest.TestCase): - - def setUp(self): - pass - - def test_cpu_regularisers(self): - filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif") - - # read noiseless image - Im = plt.imread(filename) - Im = np.asarray(Im, dtype='float32') - - Im = Im/255 - tolerance = 1e-05 - rms_rof_exp = 0.006812507 #expected value for ROF model - rms_fgp_exp = 0.019152347 #expected value for FGP model - - # set parameters for ROF-TV - pars_rof_tv = {'algorithm': ROF_TV, \ - 'input' : Im,\ - 'regularisation_parameter':0.04,\ - 'number_of_iterations': 50,\ - 'time_marching_parameter': 0.0025 - } - # set parameters for FGP-TV - pars_fgp_tv = {'algorithm' : FGP_TV, \ - 'input' : Im,\ - 'regularisation_parameter':0.04, \ - 'number_of_iterations' :50 ,\ - 'tolerance_constant':1e-08,\ - 'methodTV': 0 ,\ - 'nonneg': 0 ,\ - 'printingOut': 0 - } - print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") - print ("_________testing ROF-TV (2D, CPU)__________") - print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") - res = True - rof_cpu = ROF_TV(pars_rof_tv['input'], - pars_rof_tv['regularisation_parameter'], - pars_rof_tv['number_of_iterations'], - pars_rof_tv['time_marching_parameter'],'cpu') - rms_rof = rmse(Im, rof_cpu) - # now compare obtained rms with the expected value - self.assertLess(abs(rms_rof-rms_rof_exp) , tolerance) - """ - if abs(rms_rof-self.rms_rof_exp) > self.tolerance: - raise TypeError('ROF-TV (2D, CPU) test FAILED') - else: - print ("test PASSED") - """ - print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") - print ("_________testing FGP-TV (2D, CPU)__________") - print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") - fgp_cpu = FGP_TV(pars_fgp_tv['input'], - pars_fgp_tv['regularisation_parameter'], - pars_fgp_tv['number_of_iterations'], - pars_fgp_tv['tolerance_constant'], - pars_fgp_tv['methodTV'], - pars_fgp_tv['nonneg'], - pars_fgp_tv['printingOut'],'cpu') - rms_fgp = rmse(Im, fgp_cpu) - # now compare obtained rms with the expected value - self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance) - """ - if abs(rms_fgp-self.rms_fgp_exp) > self.tolerance: - raise TypeError('FGP-TV (2D, CPU) test FAILED') - else: - print ("test PASSED") - """ - self.assertTrue(res) - def test_gpu_regularisers(self): - filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif") - - # read noiseless image - Im = plt.imread(filename) - Im = np.asarray(Im, dtype='float32') - - Im = Im/255 - tolerance = 1e-05 - rms_rof_exp = 0.006812507 #expected value for ROF model - rms_fgp_exp = 0.019152347 #expected value for FGP model - - # set parameters for ROF-TV - pars_rof_tv = {'algorithm': ROF_TV, \ - 'input' : Im,\ - 'regularisation_parameter':0.04,\ - 'number_of_iterations': 50,\ - 'time_marching_parameter': 0.0025 - } - # set parameters for FGP-TV - pars_fgp_tv = {'algorithm' : FGP_TV, \ - 'input' : Im,\ - 'regularisation_parameter':0.04, \ - 'number_of_iterations' :50 ,\ - 'tolerance_constant':1e-08,\ - 'methodTV': 0 ,\ - 'nonneg': 0 ,\ - 'printingOut': 0 - } - print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") - print ("_________testing ROF-TV (2D, GPU)__________") - print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") - res = True - rof_gpu = ROF_TV(pars_rof_tv['input'], - pars_rof_tv['regularisation_parameter'], - pars_rof_tv['number_of_iterations'], - pars_rof_tv['time_marching_parameter'],'gpu') - rms_rof = rmse(Im, rof_gpu) - # now compare obtained rms with the expected value - self.assertLess(abs(rms_rof-rms_rof_exp) , tolerance) - """ - if abs(rms_rof-self.rms_rof_exp) > self.tolerance: - raise TypeError('ROF-TV (2D, GPU) test FAILED') - else: - print ("test PASSED") - """ - print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") - print ("_________testing FGP-TV (2D, GPU)__________") - print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") - fgp_gpu = FGP_TV(pars_fgp_tv['input'], - pars_fgp_tv['regularisation_parameter'], - pars_fgp_tv['number_of_iterations'], - pars_fgp_tv['tolerance_constant'], - pars_fgp_tv['methodTV'], - pars_fgp_tv['nonneg'], - pars_fgp_tv['printingOut'],'gpu') - rms_fgp = rmse(Im, fgp_gpu) - # now compare obtained rms with the expected value - self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance) - """ - if abs(rms_fgp-self.rms_fgp_exp) > self.tolerance: - raise TypeError('FGP-TV (2D, GPU) test FAILED') - else: - print ("test PASSED") - """ - self.assertTrue(res) -if __name__ == '__main__': - unittest.main() \ No newline at end of file -- cgit v1.2.3 From 22f6e22cbe6db04c6bbe8d259ce761e3748d7102 Mon Sep 17 00:00:00 2001 From: algol Date: Thu, 12 Apr 2018 11:56:54 +0100 Subject: dTV some bugs in cython --- Core/regularisers_CPU/FGP_dTV_core.c | 32 ++--- Core/regularisers_CPU/FGP_dTV_core.h | 16 +-- Core/regularisers_GPU/dTV_FGP_GPU_core.cu | 90 ++++++------- Wrappers/Python/ccpi/filters/regularisers.py | 4 +- Wrappers/Python/conda-recipe/run_test.py | 149 --------------------- Wrappers/Python/conda-recipe/run_test.py.in | 148 ++++++++++++++++++++ Wrappers/Python/conda-recipe/testLena.npy | Bin 0 -> 1048656 bytes Wrappers/Python/demos/demo_cpu_regularisers.py | 9 +- .../Python/demos/demo_cpu_vs_gpu_regularisers.py | 2 + Wrappers/Python/demos/demo_gpu_regularisers.py | 8 +- Wrappers/Python/src/cpu_regularisers.pyx | 2 +- Wrappers/Python/src/gpu_regularisers.pyx | 7 +- 12 files changed, 236 insertions(+), 231 deletions(-) delete mode 100644 Wrappers/Python/conda-recipe/run_test.py create mode 100644 Wrappers/Python/conda-recipe/run_test.py.in create mode 100644 Wrappers/Python/conda-recipe/testLena.npy diff --git a/Core/regularisers_CPU/FGP_dTV_core.c b/Core/regularisers_CPU/FGP_dTV_core.c index b182d46..f6b4f79 100644 --- a/Core/regularisers_CPU/FGP_dTV_core.c +++ b/Core/regularisers_CPU/FGP_dTV_core.c @@ -75,20 +75,20 @@ float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambd ProjectVect_func2D(R1, R2, InputRef_x, InputRef_y, dimX, dimY); /* computing the gradient of the objective function */ - Obj_func2D(Input, Output, R1, R2, lambdaPar, dimX, dimY); + Obj_dfunc2D(Input, Output, R1, R2, lambdaPar, dimX, dimY); /* apply nonnegativity */ if (nonneg == 1) for(j=0; j>>(d_InputRef, InputRef_x, InputRef_y, eta, dimX, dimY, ImSize); + GradNorm_func2D_kernel<<>>(d_InputRef, InputRef_x, InputRef_y, eta, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); @@ -525,41 +525,41 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f for (i = 0; i < iter; i++) { /*projects a 2D vector field R-1,2 onto the orthogonal complement of another 2D vector field InputRef_xy*/ - ProjectVect_func2D<<>>(R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize); + ProjectVect_func2D_kernel<<>>(R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); /* computing the gradient of the objective function */ - Obj_func2D_kernel<<>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambdaPar); + Obj_dfunc2D_kernel<<>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambdaPar); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); if (nonneg != 0) { - nonneg2D_kernel<<>>(d_update, dimX, dimY, ImSize); + dTVnonneg2D_kernel<<>>(d_update, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); } /*Taking a step towards minus of the gradient*/ - Grad_func2D_kernel<<>>(P1, P2, d_update, R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize, multip); + Grad_dfunc2D_kernel<<>>(P1, P2, d_update, R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize, multip); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); /* projection step */ - if (methodTV == 0) Proj_func2D_iso_kernel<<>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/ - else Proj_func2D_aniso_kernel<<>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/ + if (methodTV == 0) Proj_dfunc2D_iso_kernel<<>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/ + else Proj_dfunc2D_aniso_kernel<<>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/ checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; multip2 = ((tk-1.0f)/tkp1); - Rupd_func2D_kernel<<>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize); + Rupd_dfunc2D_kernel<<>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); if (epsil != 0.0f) { /* calculate norm - stopping rules using the Thrust library */ - ResidCalc2D_kernel<<>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize); + dTVResidCalc2D_kernel<<>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); @@ -572,16 +572,16 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f if (re < epsil) count++; if (count > 4) break; - copy_kernel2D<<>>(d_update, d_update_prev, dimX, dimY, ImSize); + dTVcopy_kernel2D<<>>(d_update, d_update_prev, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); } - copy_kernel2D<<>>(P1, P1_prev, dimX, dimY, ImSize); + dTVcopy_kernel2D<<>>(P1, P1_prev, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); - copy_kernel2D<<>>(P2, P2_prev, dimX, dimY, ImSize); + dTVcopy_kernel2D<<>>(P2, P2_prev, dimX, dimY, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); @@ -651,7 +651,7 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f /********************** Run CUDA 3D kernel here ********************/ multip = (1.0f/(26.0f*lambdaPar)); /* calculate gradient vectors for the reference */ - GradNorm_func3D<<>>(d_InputRef, InputRef_x, InputRef_y, InputRef_z, eta, dimX, dimY, dimZ, ImSize); + GradNorm_func3D_kernel<<>>(d_InputRef, InputRef_x, InputRef_y, InputRef_z, eta, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); @@ -659,41 +659,41 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f for (i = 0; i < iter; i++) { /*projects a 3D vector field R-1,2,3 onto the orthogonal complement of another 3D vector field InputRef_xyz*/ - ProjectVect_func3D<<>>(R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize); + ProjectVect_func3D_kernel<<>>(R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); /* computing the gradient of the objective function */ - Obj_func3D_kernel<<>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambdaPar); + Obj_dfunc3D_kernel<<>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambdaPar); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); if (nonneg != 0) { - nonneg3D_kernel<<>>(d_update, dimX, dimY, dimZ, ImSize); + dTVnonneg3D_kernel<<>>(d_update, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); } /*Taking a step towards minus of the gradient*/ - Grad_func3D_kernel<<>>(P1, P2, P3, d_update, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize, multip); + Grad_dfunc3D_kernel<<>>(P1, P2, P3, d_update, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize, multip); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); /* projection step */ - if (methodTV == 0) Proj_func3D_iso_kernel<<>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */ - else Proj_func3D_aniso_kernel<<>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */ + if (methodTV == 0) Proj_dfunc3D_iso_kernel<<>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */ + else Proj_dfunc3D_aniso_kernel<<>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */ checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; multip2 = ((tk-1.0f)/tkp1); - Rupd_func3D_kernel<<>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize); + Rupd_dfunc3D_kernel<<>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); if (epsil != 0.0f) { /* calculate norm - stopping rules using the Thrust library */ - ResidCalc3D_kernel<<>>(d_update, d_update_prev, P1_prev, dimX, dimY, dimZ, ImSize); + dTVResidCalc3D_kernel<<>>(d_update, d_update_prev, P1_prev, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); @@ -706,20 +706,20 @@ extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, f if (re < epsil) count++; if (count > 4) break; - copy_kernel3D<<>>(d_update, d_update_prev, dimX, dimY, dimZ, ImSize); + dTVcopy_kernel3D<<>>(d_update, d_update_prev, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); } - copy_kernel3D<<>>(P1, P1_prev, dimX, dimY, dimZ, ImSize); + dTVcopy_kernel3D<<>>(P1, P1_prev, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); - copy_kernel3D<<>>(P2, P2_prev, dimX, dimY, dimZ, ImSize); + dTVcopy_kernel3D<<>>(P2, P2_prev, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); - copy_kernel3D<<>>(P3, P3_prev, dimX, dimY, dimZ, ImSize); + dTVcopy_kernel3D<<>>(P3, P3_prev, dimX, dimY, dimZ, ImSize); checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); diff --git a/Wrappers/Python/ccpi/filters/regularisers.py b/Wrappers/Python/ccpi/filters/regularisers.py index c6723fa..376cc9c 100644 --- a/Wrappers/Python/ccpi/filters/regularisers.py +++ b/Wrappers/Python/ccpi/filters/regularisers.py @@ -2,8 +2,8 @@ script which assigns a proper device core function based on a flag ('cpu' or 'gpu') """ -from ccpi.filters.cpu_regularisers_cython import TV_ROF_CPU, TV_FGP_CPU dTV_FGP_CPU -from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU dTV_FGP_GPU +from ccpi.filters.cpu_regularisers_cython import TV_ROF_CPU, TV_FGP_CPU, dTV_FGP_CPU +from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU, dTV_FGP_GPU def ROF_TV(inputData, regularisation_parameter, iterations, time_marching_parameter,device='cpu'): diff --git a/Wrappers/Python/conda-recipe/run_test.py b/Wrappers/Python/conda-recipe/run_test.py deleted file mode 100644 index 04bbd40..0000000 --- a/Wrappers/Python/conda-recipe/run_test.py +++ /dev/null @@ -1,149 +0,0 @@ -import unittest -import numpy as np -import os -from ccpi.filters.regularisers import ROF_TV, FGP_TV -import matplotlib.pyplot as plt - -def rmse(im1, im2): - rmse = np.sqrt(np.sum((im1 - im2) ** 2) / float(im1.size)) - return rmse - -class TestRegularisers(unittest.TestCase): - - def setUp(self): - pass - - def test_cpu_regularisers(self): - filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif") - - # read noiseless image - Im = plt.imread(filename) - Im = np.asarray(Im, dtype='float32') - - Im = Im/255 - tolerance = 1e-05 - rms_rof_exp = 0.006812507 #expected value for ROF model - rms_fgp_exp = 0.019152347 #expected value for FGP model - - # set parameters for ROF-TV - pars_rof_tv = {'algorithm': ROF_TV, \ - 'input' : Im,\ - 'regularisation_parameter':0.04,\ - 'number_of_iterations': 50,\ - 'time_marching_parameter': 0.0025 - } - # set parameters for FGP-TV - pars_fgp_tv = {'algorithm' : FGP_TV, \ - 'input' : Im,\ - 'regularisation_parameter':0.04, \ - 'number_of_iterations' :50 ,\ - 'tolerance_constant':1e-08,\ - 'methodTV': 0 ,\ - 'nonneg': 0 ,\ - 'printingOut': 0 - } - print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") - print ("_________testing ROF-TV (2D, CPU)__________") - print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") - res = True - rof_cpu = ROF_TV(pars_rof_tv['input'], - pars_rof_tv['regularisation_parameter'], - pars_rof_tv['number_of_iterations'], - pars_rof_tv['time_marching_parameter'],'cpu') - rms_rof = rmse(Im, rof_cpu) - # now compare obtained rms with the expected value - self.assertLess(abs(rms_rof-rms_rof_exp) , tolerance) - """ - if abs(rms_rof-self.rms_rof_exp) > self.tolerance: - raise TypeError('ROF-TV (2D, CPU) test FAILED') - else: - print ("test PASSED") - """ - print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") - print ("_________testing FGP-TV (2D, CPU)__________") - print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") - fgp_cpu = FGP_TV(pars_fgp_tv['input'], - pars_fgp_tv['regularisation_parameter'], - pars_fgp_tv['number_of_iterations'], - pars_fgp_tv['tolerance_constant'], - pars_fgp_tv['methodTV'], - pars_fgp_tv['nonneg'], - pars_fgp_tv['printingOut'],'cpu') - rms_fgp = rmse(Im, fgp_cpu) - # now compare obtained rms with the expected value - self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance) - """ - if abs(rms_fgp-self.rms_fgp_exp) > self.tolerance: - raise TypeError('FGP-TV (2D, CPU) test FAILED') - else: - print ("test PASSED") - """ - self.assertTrue(res) - def test_gpu_regularisers(self): - filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif") - - # read noiseless image - Im = plt.imread(filename) - Im = np.asarray(Im, dtype='float32') - - Im = Im/255 - tolerance = 1e-05 - rms_rof_exp = 0.006812507 #expected value for ROF model - rms_fgp_exp = 0.019152347 #expected value for FGP model - - # set parameters for ROF-TV - pars_rof_tv = {'algorithm': ROF_TV, \ - 'input' : Im,\ - 'regularisation_parameter':0.04,\ - 'number_of_iterations': 50,\ - 'time_marching_parameter': 0.0025 - } - # set parameters for FGP-TV - pars_fgp_tv = {'algorithm' : FGP_TV, \ - 'input' : Im,\ - 'regularisation_parameter':0.04, \ - 'number_of_iterations' :50 ,\ - 'tolerance_constant':1e-08,\ - 'methodTV': 0 ,\ - 'nonneg': 0 ,\ - 'printingOut': 0 - } - print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") - print ("_________testing ROF-TV (2D, GPU)__________") - print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") - res = True - rof_gpu = ROF_TV(pars_rof_tv['input'], - pars_rof_tv['regularisation_parameter'], - pars_rof_tv['number_of_iterations'], - pars_rof_tv['time_marching_parameter'],'gpu') - rms_rof = rmse(Im, rof_gpu) - # now compare obtained rms with the expected value - self.assertLess(abs(rms_rof-rms_rof_exp) , tolerance) - """ - if abs(rms_rof-self.rms_rof_exp) > self.tolerance: - raise TypeError('ROF-TV (2D, GPU) test FAILED') - else: - print ("test PASSED") - """ - print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") - print ("_________testing FGP-TV (2D, GPU)__________") - print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") - fgp_gpu = FGP_TV(pars_fgp_tv['input'], - pars_fgp_tv['regularisation_parameter'], - pars_fgp_tv['number_of_iterations'], - pars_fgp_tv['tolerance_constant'], - pars_fgp_tv['methodTV'], - pars_fgp_tv['nonneg'], - pars_fgp_tv['printingOut'],'gpu') - rms_fgp = rmse(Im, fgp_gpu) - # now compare obtained rms with the expected value - self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance) - """ - if abs(rms_fgp-self.rms_fgp_exp) > self.tolerance: - raise TypeError('FGP-TV (2D, GPU) test FAILED') - else: - print ("test PASSED") - """ - self.assertTrue(res) -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/Wrappers/Python/conda-recipe/run_test.py.in b/Wrappers/Python/conda-recipe/run_test.py.in new file mode 100644 index 0000000..9a6f4de --- /dev/null +++ b/Wrappers/Python/conda-recipe/run_test.py.in @@ -0,0 +1,148 @@ +import unittest +import numpy as np +from ccpi.filters.regularisers import ROF_TV, FGP_TV + +def rmse(im1, im2): + rmse = np.sqrt(np.sum((im1 - im2) ** 2) / float(im1.size)) + return rmse + +class TestRegularisers(unittest.TestCase): + + def setUp(self): + pass + + def test_cpu_regularisers(self): + #filename = os.path.join(".." , ".." , ".." , "data" ,"testLena.npy") + + Im = np.load('testLena.npy'); + """ + # read noiseless image + Im = plt.imread(filename) + Im = np.asarray(Im, dtype='float32') + + Im = Im/255 + """ + tolerance = 1e-05 + rms_rof_exp = 0.006812507 #expected value for ROF model + rms_fgp_exp = 0.019152347 #expected value for FGP model + + # set parameters for ROF-TV + pars_rof_tv = {'algorithm': ROF_TV, \ + 'input' : Im,\ + 'regularisation_parameter':0.04,\ + 'number_of_iterations': 50,\ + 'time_marching_parameter': 0.0025 + } + # set parameters for FGP-TV + pars_fgp_tv = {'algorithm' : FGP_TV, \ + 'input' : Im,\ + 'regularisation_parameter':0.04, \ + 'number_of_iterations' :50 ,\ + 'tolerance_constant':1e-08,\ + 'methodTV': 0 ,\ + 'nonneg': 0 ,\ + 'printingOut': 0 + } + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + print ("_________testing ROF-TV (2D, CPU)__________") + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + res = True + rof_cpu = ROF_TV(pars_rof_tv['input'], + pars_rof_tv['regularisation_parameter'], + pars_rof_tv['number_of_iterations'], + pars_rof_tv['time_marching_parameter'],'cpu') + rms_rof = rmse(Im, rof_cpu) + # now compare obtained rms with the expected value + self.assertLess(abs(rms_rof-rms_rof_exp) , tolerance) + """ + if abs(rms_rof-self.rms_rof_exp) > self.tolerance: + raise TypeError('ROF-TV (2D, CPU) test FAILED') + else: + print ("test PASSED") + """ + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + print ("_________testing FGP-TV (2D, CPU)__________") + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + fgp_cpu = FGP_TV(pars_fgp_tv['input'], + pars_fgp_tv['regularisation_parameter'], + pars_fgp_tv['number_of_iterations'], + pars_fgp_tv['tolerance_constant'], + pars_fgp_tv['methodTV'], + pars_fgp_tv['nonneg'], + pars_fgp_tv['printingOut'],'cpu') + rms_fgp = rmse(Im, fgp_cpu) + # now compare obtained rms with the expected value + self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance) + """ + if abs(rms_fgp-self.rms_fgp_exp) > self.tolerance: + raise TypeError('FGP-TV (2D, CPU) test FAILED') + else: + print ("test PASSED") + """ + self.assertTrue(res) + def test_gpu_regularisers(self): + #filename = os.path.join(".." , ".." , ".." , "data" ,"testLena.npy") + + Im = np.load('testLena.npy'); + + #Im = Im/255 + tolerance = 1e-05 + rms_rof_exp = 0.006812507 #expected value for ROF model + rms_fgp_exp = 0.019152347 #expected value for FGP model + + # set parameters for ROF-TV + pars_rof_tv = {'algorithm': ROF_TV, \ + 'input' : Im,\ + 'regularisation_parameter':0.04,\ + 'number_of_iterations': 50,\ + 'time_marching_parameter': 0.0025 + } + # set parameters for FGP-TV + pars_fgp_tv = {'algorithm' : FGP_TV, \ + 'input' : Im,\ + 'regularisation_parameter':0.04, \ + 'number_of_iterations' :50 ,\ + 'tolerance_constant':1e-08,\ + 'methodTV': 0 ,\ + 'nonneg': 0 ,\ + 'printingOut': 0 + } + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + print ("_________testing ROF-TV (2D, GPU)__________") + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + res = True + rof_gpu = ROF_TV(pars_rof_tv['input'], + pars_rof_tv['regularisation_parameter'], + pars_rof_tv['number_of_iterations'], + pars_rof_tv['time_marching_parameter'],'gpu') + rms_rof = rmse(Im, rof_gpu) + # now compare obtained rms with the expected value + self.assertLess(abs(rms_rof-rms_rof_exp) , tolerance) + """ + if abs(rms_rof-self.rms_rof_exp) > self.tolerance: + raise TypeError('ROF-TV (2D, GPU) test FAILED') + else: + print ("test PASSED") + """ + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + print ("_________testing FGP-TV (2D, GPU)__________") + print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + fgp_gpu = FGP_TV(pars_fgp_tv['input'], + pars_fgp_tv['regularisation_parameter'], + pars_fgp_tv['number_of_iterations'], + pars_fgp_tv['tolerance_constant'], + pars_fgp_tv['methodTV'], + pars_fgp_tv['nonneg'], + pars_fgp_tv['printingOut'],'gpu') + rms_fgp = rmse(Im, fgp_gpu) + # now compare obtained rms with the expected value + self.assertLess(abs(rms_fgp-rms_fgp_exp) , tolerance) + """ + if abs(rms_fgp-self.rms_fgp_exp) > self.tolerance: + raise TypeError('FGP-TV (2D, GPU) test FAILED') + else: + print ("test PASSED") + """ + self.assertTrue(res) +if __name__ == '__main__': + unittest.main() diff --git a/Wrappers/Python/conda-recipe/testLena.npy b/Wrappers/Python/conda-recipe/testLena.npy new file mode 100644 index 0000000..14bc0e3 Binary files /dev/null and b/Wrappers/Python/conda-recipe/testLena.npy differ diff --git a/Wrappers/Python/demos/demo_cpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_regularisers.py index fd3050c..00beb0b 100644 --- a/Wrappers/Python/demos/demo_cpu_regularisers.py +++ b/Wrappers/Python/demos/demo_cpu_regularisers.py @@ -22,6 +22,8 @@ def printParametersToString(pars): txt += "{0} = {1}".format(key, value.__name__) elif key == 'input': txt += "{0} = {1}".format(key, np.shape(value)) + elif key == 'refdata': + txt += "{0} = {1}".format(key, np.shape(value)) else: txt += "{0} = {1}".format(key, value) txt += '\n' @@ -196,7 +198,7 @@ plt.title('{}'.format('CPU results')) # Uncomment to test 3D regularisation performance #%% - +""" N = 512 slices = 20 @@ -318,8 +320,8 @@ a.set_title('Noisy Image') imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray") # set parameters -pars = {'algorithm' : FGP_dTV, \ - 'input' : noisyVol,\ +pars = {'algorithm' : FGP_dTV,\ + 'input' : noisyVol,\ 'refdata' : noisyRef,\ 'regularisation_parameter':0.04, \ 'number_of_iterations' :300 ,\ @@ -358,4 +360,5 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, verticalalignment='top', bbox=props) imgplot = plt.imshow(fgp_dTV_cpu3D[10,:,:], cmap="gray") plt.title('{}'.format('Recovered volume on the CPU using FGP-dTV')) +""" #%% diff --git a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py index aa1f865..310cf75 100644 --- a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py +++ b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py @@ -22,6 +22,8 @@ def printParametersToString(pars): txt += "{0} = {1}".format(key, value.__name__) elif key == 'input': txt += "{0} = {1}".format(key, np.shape(value)) + elif key == 'refdata': + txt += "{0} = {1}".format(key, np.shape(value)) else: txt += "{0} = {1}".format(key, value) txt += '\n' diff --git a/Wrappers/Python/demos/demo_gpu_regularisers.py b/Wrappers/Python/demos/demo_gpu_regularisers.py index 4759cc3..24a3c88 100644 --- a/Wrappers/Python/demos/demo_gpu_regularisers.py +++ b/Wrappers/Python/demos/demo_gpu_regularisers.py @@ -22,6 +22,8 @@ def printParametersToString(pars): txt += "{0} = {1}".format(key, value.__name__) elif key == 'input': txt += "{0} = {1}".format(key, np.shape(value)) + elif key == 'refdata': + txt += "{0} = {1}".format(key, np.shape(value)) else: txt += "{0} = {1}".format(key, value) txt += '\n' @@ -192,7 +194,7 @@ plt.title('{}'.format('GPU results')) # Uncomment to test 3D regularisation performance #%% - +""" N = 512 slices = 20 @@ -314,7 +316,7 @@ imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray") # set parameters pars = {'algorithm' : FGP_dTV, \ - 'input' : noisyVol,\ + 'input' : noisyVol,\ 'refdata' : noisyRef,\ 'regularisation_parameter':0.04, \ 'number_of_iterations' :300 ,\ @@ -352,5 +354,5 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, verticalalignment='top', bbox=props) imgplot = plt.imshow(fgp_dTV_gpu3D[10,:,:], cmap="gray") plt.title('{}'.format('Recovered volume on the GPU using FGP-dTV')) - +""" #%% diff --git a/Wrappers/Python/src/cpu_regularisers.pyx b/Wrappers/Python/src/cpu_regularisers.pyx index 8f9185a..1661375 100644 --- a/Wrappers/Python/src/cpu_regularisers.pyx +++ b/Wrappers/Python/src/cpu_regularisers.pyx @@ -156,8 +156,8 @@ def dTV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, dTV_FGP_CPU_main(&inputData[0,0], &refdata[0,0], &outputData[0,0], regularisation_parameter, iterationsNumb, tolerance_param, - methodTV, eta_const, + methodTV, nonneg, printM, dims[0], dims[1], 1) diff --git a/Wrappers/Python/src/gpu_regularisers.pyx b/Wrappers/Python/src/gpu_regularisers.pyx index 4a14f69..18efdcd 100644 --- a/Wrappers/Python/src/gpu_regularisers.pyx +++ b/Wrappers/Python/src/gpu_regularisers.pyx @@ -20,7 +20,7 @@ cimport numpy as np cdef extern void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z); cdef extern void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z); -cdef extern void dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z); +cdef extern void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z); # Total-variation Rudin-Osher-Fatemi (ROF) def TV_ROF_GPU(inputData, @@ -187,8 +187,7 @@ def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, np.zeros([dims[0],dims[1],dims[2]], dtype='float32') # Running CUDA code here - TV_FGP_GPU_main( - &inputData[0,0,0], &outputData[0,0,0], + TV_FGP_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter , iterations, tolerance_param, @@ -204,7 +203,7 @@ def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, #****************************************************************# #******** Directional TV Fast-Gradient-Projection (FGP)*********# def FGPdTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, - np.ndarray[np.float32_t, ndim=3, mode="c"] refdata, + np.ndarray[np.float32_t, ndim=2, mode="c"] refdata, float regularisation_parameter, int iterations, float tolerance_param, -- cgit v1.2.3