From bfe917daf4a5d6ce17f1b5d2497a0f7a345f88a5 Mon Sep 17 00:00:00 2001 From: Daniil Kazantsev Date: Wed, 17 Apr 2019 12:03:27 +0100 Subject: var reg for GPU, ROF TV model --- src/Core/regularisers_CPU/ROF_TV_core.c | 5 +- src/Core/regularisers_CPU/ROF_TV_core.h | 12 ++--- src/Core/regularisers_GPU/TV_ROF_GPU_core.cu | 81 ++++++++++++++-------------- src/Core/regularisers_GPU/TV_ROF_GPU_core.h | 4 +- src/Python/src/cpu_regularisers.pyx | 8 +-- src/Python/src/gpu_regularisers.pyx | 62 +++++++++++++++------ 6 files changed, 100 insertions(+), 72 deletions(-) (limited to 'src') diff --git a/src/Core/regularisers_CPU/ROF_TV_core.c b/src/Core/regularisers_CPU/ROF_TV_core.c index 955cabc..6951268 100644 --- a/src/Core/regularisers_CPU/ROF_TV_core.c +++ b/src/Core/regularisers_CPU/ROF_TV_core.c @@ -30,17 +30,16 @@ int sign(float x) { /* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case) - * * * Input Parameters: * 1. Noisy image/volume [REQUIRED] - * 2. lambda - regularization parameter [REQUIRED] + * 2. lambda - regularization parameter (a constant or the same size as input (1)) * 3. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED] * 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED] * 5. eplsilon: tolerance constant * * Output: - * [1] Regularized image/volume + * [1] Regularised image/volume * [2] Information vector which contains [iteration no., reached tolerance] * * This function is based on the paper by diff --git a/src/Core/regularisers_CPU/ROF_TV_core.h b/src/Core/regularisers_CPU/ROF_TV_core.h index 8b5e2e4..7e29ddb 100644 --- a/src/Core/regularisers_CPU/ROF_TV_core.h +++ b/src/Core/regularisers_CPU/ROF_TV_core.h @@ -27,29 +27,25 @@ limitations under the License. /* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case) * - * * Input Parameters: * 1. Noisy image/volume [REQUIRED] - * 2. lambda - regularization parameter [REQUIRED] + * 2. lambda - regularization parameter (a constant or the same size as input (1)) * 3. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED] * 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED] - * 5. eplsilon: tolerance constant + * 5. eplsilon: tolerance constant * * Output: - * [1] Regularized image/volume + * [1] Regularised image/volume * [2] Information vector which contains [iteration no., reached tolerance] * * This function is based on the paper by * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms" - * - * D. Kazantsev, 2016-18 */ - + #ifdef __cplusplus extern "C" { #endif CCPI_EXPORT float TV_ROF_CPU_main(float *Input, float *Output, float *infovector, float *lambdaPar, int lambda_is_arr, int iterationsNumb, float tau, float epsil, int dimX, int dimY, int dimZ); - CCPI_EXPORT float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float *lambda, int lambda_is_arr, float tau, long dimX, long dimY, long dimZ); CCPI_EXPORT float D1_func(float *A, float *D1, long dimX, long dimY, long dimZ); CCPI_EXPORT float D2_func(float *A, float *D2, long dimX, long dimY, long dimZ); diff --git a/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu b/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu index 193cf53..c6c6e88 100755 --- a/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu +++ b/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu @@ -27,17 +27,16 @@ limitations under the License. * * Input Parameters: * 1. Noisy image/volume [REQUIRED] -* 2. lambda - regularization parameter [REQUIRED] -* 3. tau - marching step for explicit scheme, ~0.1 is recommended [REQUIRED] -* 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED] +* 2. lambda - regularization parameter (a constant or the same size as input (1)) +* 3. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED] +* 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED] * 5. eplsilon: tolerance constant - - * Output: - * [1] Filtered/regularized image/volume - * [2] Information vector which contains [iteration no., reached tolerance] - - - * This function is based on the paper by +* +* Output: +* [1] Regularised image/volume +* [2] Information vector which contains [iteration no., reached tolerance] +* +* This function is based on the paper by * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms" */ @@ -121,27 +120,25 @@ __host__ __device__ int sign (float x) } } - __global__ void TV_kernel2D(float *D1, float *D2, float *Update, float *Input, float lambda, float tau, int N, int M) + __global__ void TV_kernel2D(float *D1, float *D2, float *Update, float *Input, float *lambdaPar_d, int lambda_is_arr, float tau, int N, int M) { int i2, j2; - float dv1,dv2; + float dv1,dv2,lambda_val; int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; + int j = blockDim.y * blockIdx.y + threadIdx.y; - int index = i + N*j; + int index = i + N*j; + lambda_val = *(lambdaPar_d + index* lambda_is_arr); if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) { - /* boundary conditions (Neumann reflections) */ i2 = i - 1; if (i2 < 0) i2 = i+1; j2 = j - 1; if (j2 < 0) j2 = j+1; - /* divergence components */ dv1 = D1[index] - D1[j2*N + i]; dv2 = D2[index] - D2[j*N + i2]; - Update[index] += tau*(lambda*(dv1 + dv2) - (Update[index] - Input[index])); - + Update[index] += tau*(lambda_val*(dv1 + dv2) - (Update[index] - Input[index])); } } /*********************3D case****************************/ @@ -265,19 +262,20 @@ __host__ __device__ int sign (float x) } } - __global__ void TV_kernel3D(float *D1, float *D2, float *D3, float *Update, float *Input, float lambda, float tau, int dimX, int dimY, int dimZ) + __global__ void TV_kernel3D(float *D1, float *D2, float *D3, float *Update, float *Input, float *lambda, int lambda_is_arr, float tau, int dimX, int dimY, int dimZ) { - float dv1, dv2, dv3; + float dv1, dv2, dv3, lambda_val; int i1,i2,k1,j1,j2,k2; int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; + int j = blockDim.y * blockIdx.y + threadIdx.y; + int k = blockDim.z * blockIdx.z + threadIdx.z; - int index = (dimX*dimY)*k + j*dimX+i; + int index = (dimX*dimY)*k + j*dimX+i; + lambda_val = *(lambda + index* lambda_is_arr); - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { + if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { - /* symmetric boundary conditions (Neuman) */ + /* symmetric boundary conditions (Neuman) */ i1 = i + 1; if (i1 >= dimX) i1 = i-1; i2 = i - 1; if (i2 < 0) i2 = i+1; j1 = j + 1; if (j1 >= dimY) j1 = j-1; @@ -290,7 +288,7 @@ __host__ __device__ int sign (float x) dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2]; dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i]; - Update[index] += tau*(lambda*(dv1 + dv2 + dv3) - (Update[index] - Input[index])); + Update[index] += tau*(lambda_val*(dv1 + dv2 + dv3) - (Update[index] - Input[index])); } } @@ -348,7 +346,7 @@ __global__ void ROFResidCalc3D_kernel(float *Input1, float *Input2, float* Outpu ///////////////////////////////////////////////// ///////////////// HOST FUNCTION ///////////////// -extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float lambdaPar, int iter, float tau, float epsil, int N, int M, int Z) +extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float *lambdaPar, int lambda_is_arr, int iter, float tau, float epsil, int N, int M, int Z) { int deviceCount = -1; // number of devices cudaGetDeviceCount(&deviceCount); @@ -360,10 +358,10 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f re = 0.0f; int ImSize, count, n; count = 0; n = 0; - float *d_input, *d_update, *d_D1, *d_D2, *d_update_prev=NULL; + float *d_input, *d_update, *d_D1, *d_D2, *d_update_prev=NULL, *lambdaPar_d=NULL; if (Z == 0) Z = 1; - ImSize = N*M*Z; + ImSize = N*M*Z; CHECK(cudaMalloc((void**)&d_input,ImSize*sizeof(float))); CHECK(cudaMalloc((void**)&d_update,ImSize*sizeof(float))); CHECK(cudaMalloc((void**)&d_D1,ImSize*sizeof(float))); @@ -373,6 +371,16 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f CHECK(cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice)); CHECK(cudaMemcpy(d_update,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice)); + /*dealing with spatially variant reglariser */ + if (lambda_is_arr == 1) { + CHECK(cudaMalloc((void**)&lambdaPar_d,ImSize*sizeof(float))); + CHECK(cudaMemcpy(lambdaPar_d,lambdaPar,ImSize*sizeof(float),cudaMemcpyHostToDevice)); + } + else { + CHECK(cudaMalloc((void**)&lambdaPar_d,1*sizeof(float))); + CHECK(cudaMemcpy(lambdaPar_d,lambdaPar,1*sizeof(float),cudaMemcpyHostToDevice)); + } + if (Z == 1) { // TV - 2D case dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D); @@ -391,7 +399,7 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f D2_func2D<<>>(d_update, d_D2, N, M); CHECK(cudaDeviceSynchronize()); /*running main kernel*/ - TV_kernel2D<<>>(d_D1, d_D2, d_update, d_input, lambdaPar, tau, N, M); + TV_kernel2D<<>>(d_D1, d_D2, d_update, d_input, lambdaPar_d, lambda_is_arr, tau, N, M); CHECK(cudaDeviceSynchronize()); if ((epsil != 0.0f) && (n % 5 == 0)) { @@ -417,7 +425,7 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f } } else { - // TV - 3D case + // TV - 3D case dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE); dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE)); @@ -431,7 +439,6 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f checkCudaErrors( cudaDeviceSynchronize() ); checkCudaErrors(cudaPeekAtLastError() ); } - /* calculate differences */ D1_func3D<<>>(d_update, d_D1, N, M, Z); CHECK(cudaDeviceSynchronize()); @@ -440,7 +447,7 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f D3_func3D<<>>(d_update, d_D3, N, M, Z); CHECK(cudaDeviceSynchronize()); /*running main kernel*/ - TV_kernel3D<<>>(d_D1, d_D2, d_D3, d_update, d_input, lambdaPar, tau, N, M, Z); + TV_kernel3D<<>>(d_D1, d_D2, d_D3, d_update, d_input, lambdaPar_d, lambda_is_arr, tau, N, M, Z); CHECK(cudaDeviceSynchronize()); if ((epsil != 0.0f) && (n % 5 == 0)) { @@ -461,12 +468,8 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f re = (reduction/reduction2); if (re < epsil) count++; if (count > 3) break; - - } - - + } } - CHECK(cudaFree(d_D3)); } CHECK(cudaMemcpy(Output,d_update,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost)); @@ -475,9 +478,9 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, f CHECK(cudaFree(d_update)); CHECK(cudaFree(d_D1)); CHECK(cudaFree(d_D2)); + CHECK(cudaFree(lambdaPar_d)); infovector[0] = (float)(n); /*iterations number (if stopped earlier based on tolerance)*/ infovector[1] = re; /* reached tolerance */ - return 0; } diff --git a/src/Core/regularisers_GPU/TV_ROF_GPU_core.h b/src/Core/regularisers_GPU/TV_ROF_GPU_core.h index 0a75124..30514ad 100755 --- a/src/Core/regularisers_GPU/TV_ROF_GPU_core.h +++ b/src/Core/regularisers_GPU/TV_ROF_GPU_core.h @@ -3,6 +3,6 @@ #include "CCPiDefines.h" #include -extern "C" CCPI_EXPORT int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float lambdaPar, int iter, float tau, float epsil, int N, int M, int Z); +extern "C" CCPI_EXPORT int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float *lambdaPar, int lambda_is_arr, int iter, float tau, float epsil, int N, int M, int Z); -#endif +#endif diff --git a/src/Python/src/cpu_regularisers.pyx b/src/Python/src/cpu_regularisers.pyx index a2c4c32..9855eca 100644 --- a/src/Python/src/cpu_regularisers.pyx +++ b/src/Python/src/cpu_regularisers.pyx @@ -18,7 +18,7 @@ import cython import numpy as np cimport numpy as np -cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float *infovector, float *lambdaPar, int arrayscal, int iterationsNumb, float tau, float epsil, int dimX, int dimY, int dimZ); +cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float *infovector, float *lambdaPar, int lambda_is_arr, int iterationsNumb, float tau, float epsil, int dimX, int dimY, int dimZ); cdef extern float TV_FGP_CPU_main(float *Input, float *Output, float *infovector, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int dimX, int dimY, int dimZ); cdef extern float SB_TV_CPU_main(float *Input, float *Output, float *infovector, float mu, int iter, float epsil, int methodTV, int dimX, int dimY, int dimZ); cdef extern float LLT_ROF_CPU_main(float *Input, float *Output, float *infovector, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, float epsil, int dimX, int dimY, int dimZ); @@ -721,20 +721,20 @@ def MASK_CORR_CPU_2D(np.ndarray[np.uint8_t, ndim=2, mode="c"] maskData, np.ndarray[np.uint8_t, ndim=1, mode="c"] select_classes, int total_classesNum, int CorrectionWindow): - + cdef long dims[2] dims[0] = maskData.shape[0] dims[1] = maskData.shape[1] select_classes_length = select_classes.shape[0] - + cdef np.ndarray[np.uint8_t, ndim=2, mode="c"] mask_upd = \ np.zeros([dims[0],dims[1]], dtype='uint8') cdef np.ndarray[np.uint8_t, ndim=2, mode="c"] corr_regions = \ np.zeros([dims[0],dims[1]], dtype='uint8') # Run the function to process given MASK - Mask_merge_main(&maskData[0,0], &mask_upd[0,0], &corr_regions[0,0], &select_classes[0], select_classes_length, + Mask_merge_main(&maskData[0,0], &mask_upd[0,0], &corr_regions[0,0], &select_classes[0], select_classes_length, total_classesNum, CorrectionWindow, dims[1], dims[0], 1) return (mask_upd,corr_regions) diff --git a/src/Python/src/gpu_regularisers.pyx b/src/Python/src/gpu_regularisers.pyx index 84ee981..8cd8c93 100644 --- a/src/Python/src/gpu_regularisers.pyx +++ b/src/Python/src/gpu_regularisers.pyx @@ -20,7 +20,7 @@ cimport numpy as np CUDAErrorMessage = 'CUDA error' -cdef extern int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float lambdaPar, int iter, float tau, float epsil, int N, int M, int Z); +cdef extern int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float *lambdaPar, int lambda_is_arr, int iter, float tau, float epsil, int N, int M, int Z); cdef extern int TV_FGP_GPU_main(float *Input, float *Output, float *infovector, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int N, int M, int Z); cdef extern int TV_SB_GPU_main(float *Input, float *Output, float *infovector, float lambdaPar, int iter, float epsil, int methodTV, int N, int M, int Z); cdef extern int LLT_ROF_GPU_main(float *Input, float *Output, float *infovector, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, float epsil, int N, int M, int Z); @@ -177,7 +177,7 @@ def Diff4th_GPU(inputData, #********************** Total-variation ROF *********************# #****************************************************************# def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, - float regularisation_parameter, + regularisation_parameter, int iterations, float time_marching_parameter, float tolerance_param): @@ -185,26 +185,39 @@ def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, cdef long dims[2] dims[0] = inputData.shape[0] dims[1] = inputData.shape[1] - + cdef float lambdareg + cdef np.ndarray[np.float32_t, ndim=2, mode="c"] reg cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \ np.zeros([dims[0],dims[1]], dtype='float32') cdef np.ndarray[np.float32_t, ndim=1, mode="c"] infovec = \ np.ones([2], dtype='float32') - # Running CUDA code here - if (TV_ROF_GPU_main( - &inputData[0,0], &outputData[0,0], &infovec[0], - regularisation_parameter, + if isinstance (regularisation_parameter, np.ndarray): + reg = regularisation_parameter.copy() + # Running CUDA code here + if (TV_ROF_GPU_main(&inputData[0,0], &outputData[0,0], &infovec[0], + ®[0,0], 1, iterations, time_marching_parameter, tolerance_param, dims[1], dims[0], 1)==0): - return (outputData,infovec) + return (outputData,infovec) + else: + raise ValueError(CUDAErrorMessage); else: - raise ValueError(CUDAErrorMessage); + lambdareg = regularisation_parameter + if (TV_ROF_GPU_main(&inputData[0,0], &outputData[0,0], &infovec[0], + &lambdareg, 0, + iterations, + time_marching_parameter, + tolerance_param, + dims[1], dims[0], 1)==0): + return (outputData,infovec) + else: + raise ValueError(CUDAErrorMessage); def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, - float regularisation_parameter, + regularisation_parameter, int iterations, float time_marching_parameter, float tolerance_param): @@ -213,23 +226,40 @@ def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, dims[0] = inputData.shape[0] dims[1] = inputData.shape[1] dims[2] = inputData.shape[2] - + cdef float lambdareg + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] reg cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \ np.zeros([dims[0],dims[1],dims[2]], dtype='float32') cdef np.ndarray[np.float32_t, ndim=1, mode="c"] infovec = \ np.ones([2], dtype='float32') - # Running CUDA code here - if (TV_ROF_GPU_main( + if isinstance (regularisation_parameter, np.ndarray): + reg = regularisation_parameter.copy() + # Running CUDA code here + if (TV_ROF_GPU_main( &inputData[0,0,0], &outputData[0,0,0], &infovec[0], - regularisation_parameter, + ®[0,0,0], 1, iterations, time_marching_parameter, tolerance_param, dims[2], dims[1], dims[0])==0): - return (outputData,infovec) + return (outputData,infovec) + else: + raise ValueError(CUDAErrorMessage); else: - raise ValueError(CUDAErrorMessage); + lambdareg = regularisation_parameter + # Running CUDA code here + if (TV_ROF_GPU_main( + &inputData[0,0,0], &outputData[0,0,0], &infovec[0], + &lambdareg, 0, + iterations, + time_marching_parameter, + tolerance_param, + dims[2], dims[1], dims[0])==0): + return (outputData,infovec) + else: + raise ValueError(CUDAErrorMessage); + #****************************************************************# #********************** Total-variation FGP *********************# #****************************************************************# -- cgit v1.2.3