From cbeb8a1174498751e38a3de8cd6fe55abae20192 Mon Sep 17 00:00:00 2001 From: Daniil Kazantsev Date: Thu, 3 Aug 2017 00:10:53 +0100 Subject: some cleaning inside C functions, updated mex_compile file and the work on ordered-subset FISTA started --- main_func/FISTA_REC.m | 40 ++++++++++++- main_func/compile_mex.m | 11 ++-- main_func/regularizers_CPU/FGP_TV_core.c | 26 +++++++++ main_func/regularizers_CPU/FGP_TV_core.h | 27 --------- main_func/regularizers_CPU/LLT_model.c | 26 +++++++++ main_func/regularizers_CPU/LLT_model_core.c | 25 ++++++++ main_func/regularizers_CPU/LLT_model_core.h | 25 -------- main_func/regularizers_CPU/PatchBased_Regul.c | 43 +++++++------- main_func/regularizers_CPU/PatchBased_Regul_core.c | 57 ++++++++---------- main_func/regularizers_CPU/PatchBased_Regul_core.h | 33 ----------- main_func/regularizers_CPU/SplitBregman_TV_core.c | 1 - main_func/regularizers_CPU/SplitBregman_TV_core.h | 24 -------- main_func/regularizers_CPU/TGV_PD.c | 68 +++++----------------- main_func/regularizers_CPU/TGV_PD_core.c | 1 - main_func/regularizers_CPU/TGV_PD_core.h | 29 --------- 15 files changed, 180 insertions(+), 256 deletions(-) (limited to 'main_func') diff --git a/main_func/FISTA_REC.m b/main_func/FISTA_REC.m index 2823691..1e93719 100644 --- a/main_func/FISTA_REC.m +++ b/main_func/FISTA_REC.m @@ -99,7 +99,7 @@ else clear proj_geomT vol_geomT else % divergen beam geometry - fprintf('%s \n', 'Calculating Lipshitz constant for divergen beam geometry...'); + fprintf('%s \n', 'Calculating Lipshitz constant for divergen beam geometry... will take some time!'); niter = 8; % number of iteration for PM x1 = rand(N,N,SlicesZ); sqweight = sqrt(weights); @@ -216,6 +216,39 @@ if (isfield(params,'initialize')) else X = zeros(N,N,SlicesZ, 'single'); % storage for the solution end +if (isfield(params,'OS')) + % Ordered Subsets reorganisation of data and angles + OS = 1; + subsets = 8; + angles = proj_geom.ProjectionAngles; + binEdges = linspace(min(angles),max(angles),subsets+1); + + % assign values to bins + [binsDiscr,~] = histc(angles, [binEdges(1:end-1) Inf]); + + % rearrange angles into subsets + AnglesReorg = zeros(length(angles),1); + SinoReorg = zeros(Detectors, anglesNumb, SlicesZ, 'single'); + + counterM = 0; + for ii = 1:max(binsDiscr(:)) + counter = 0; + for jj = 1:subsets + curr_index = ii+jj-1 + counter; + if (binsDiscr(jj) >= ii) + counterM = counterM + 1; + AnglesReorg(counterM) = angles(curr_index); + SinoReorg(:,counterM,:) = squeeze(sino(:,curr_index,:)); + end + counter = (counter + binsDiscr(jj)) - 1; + end + end + sino = SinoReorg; + clear SinoReorg; +else + OS = 0; % normal FISTA +end + %----------------Reconstruction part------------------------ Resid_error = zeros(iterFISTA,1); % errors vector (if the ground truth is given) @@ -241,7 +274,7 @@ for i = 1:iterFISTA % ring removal part (Group-Huber fidelity) for kkk = 1:anglesNumb residual(:,kkk,:) = squeeze(weights(:,kkk,:)).*(squeeze(sino_updt(:,kkk,:)) - (squeeze(sino(:,kkk,:)) - alpha_ring.*r_x)); - end + end vec = sum(residual,2); if (SlicesZ > 1) vec = squeeze(vec(:,1,:)); @@ -249,7 +282,7 @@ for i = 1:iterFISTA r = r_x - (1./L_const).*vec; else % no ring removal - residual = weights.*(sino_updt - sino); + residual = weights.*(sino_updt - sino); end objective(i) = (0.5*norm(residual(:))^2)/(Detectors*anglesNumb*SlicesZ); % for the objective function output @@ -305,5 +338,6 @@ end output.Resid_error = Resid_error; output.objective = objective; output.L_const = L_const; +output.sino = sino; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% end diff --git a/main_func/compile_mex.m b/main_func/compile_mex.m index 7bfa8eb..66c05da 100644 --- a/main_func/compile_mex.m +++ b/main_func/compile_mex.m @@ -1,10 +1,11 @@ -% compile mex's once +% compile mex's in Matlab once cd regularizers_CPU/ -mex LLT_model.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" -mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" -mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" -mex TGV_PD.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" +mex LLT_model.c LLT_model_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" +mex FGP_TV.c FGP_TV_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" +mex SplitBregman_TV.c SplitBregman_TV_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" +mex TGV_PD.c TGV_PD_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" +mex PatchBased_Regul.c PatchBased_Regul_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" cd ../../ cd demos diff --git a/main_func/regularizers_CPU/FGP_TV_core.c b/main_func/regularizers_CPU/FGP_TV_core.c index c383a71..a55991c 100644 --- a/main_func/regularizers_CPU/FGP_TV_core.c +++ b/main_func/regularizers_CPU/FGP_TV_core.c @@ -19,6 +19,32 @@ limitations under the License. #include "FGP_TV_core.h" +/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case) + * + * Input Parameters: + * 1. Noisy image/volume [REQUIRED] + * 2. lambda - regularization parameter [REQUIRED] + * 3. Number of iterations [OPTIONAL parameter] + * 4. eplsilon: tolerance constant [OPTIONAL parameter] + * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter] + * + * Output: + * [1] Filtered/regularized image + * [2] last function value + * + * Example of image denoising: + * figure; + * Im = double(imread('lena_gray_256.tif'))/255; % loading image + * u0 = Im + .05*randn(size(Im)); % adding noise + * u = FGP_TV(single(u0), 0.05, 100, 1e-04); + * + * This function is based on the Matlab's code and paper by + * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" + * + * D. Kazantsev, 2016-17 + * + */ + /* 2D-case related Functions */ /*****************************************************************/ float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY) diff --git a/main_func/regularizers_CPU/FGP_TV_core.h b/main_func/regularizers_CPU/FGP_TV_core.h index 8d6af3e..e5328fb 100644 --- a/main_func/regularizers_CPU/FGP_TV_core.h +++ b/main_func/regularizers_CPU/FGP_TV_core.h @@ -24,33 +24,6 @@ limitations under the License. #include #include "omp.h" -/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case) -* -* Input Parameters: -* 1. Noisy image/volume [REQUIRED] -* 2. lambda - regularization parameter [REQUIRED] -* 3. Number of iterations [OPTIONAL parameter] -* 4. eplsilon: tolerance constant [OPTIONAL parameter] -* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter] -* -* Output: -* [1] Filtered/regularized image -* [2] last function value -* -* Example of image denoising: -* figure; -* Im = double(imread('lena_gray_256.tif'))/255; % loading image -* u0 = Im + .05*randn(size(Im)); % adding noise -* u = FGP_TV(single(u0), 0.05, 100, 1e-04); -* -* to compile with OMP support: mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" -* This function is based on the Matlab's code and paper by -* [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" -* -* D. Kazantsev, 2016-17 -* -*/ - float copyIm(float *A, float *B, int dimX, int dimY, int dimZ); float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY); float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY); diff --git a/main_func/regularizers_CPU/LLT_model.c b/main_func/regularizers_CPU/LLT_model.c index 6b50d33..47146a5 100644 --- a/main_func/regularizers_CPU/LLT_model.c +++ b/main_func/regularizers_CPU/LLT_model.c @@ -20,6 +20,32 @@ limitations under the License. #include "mex.h" #include "LLT_model_core.h" +/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty +* +* Input Parameters: +* 1. U0 - origanal noise image/volume +* 2. lambda - regularization parameter +* 3. tau - time-step for explicit scheme +* 4. iter - iterations number +* 5. epsil - tolerance constant (to terminate earlier) +* 6. switcher - default is 0, switch to (1) to restrictive smoothing in Z dimension (in test) +* +* Output: +* Filtered/regularized image +* +* Example: +* figure; +* Im = double(imread('lena_gray_256.tif'))/255; % loading image +* u0 = Im + .03*randn(size(Im)); % adding noise +* [Den] = LLT_model(single(u0), 10, 0.1, 1); +* +* +* to compile with OMP support: mex LLT_model.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" +* References: Lysaker, Lundervold and Tai (LLT) 2003, IEEE +* +* 28.11.16/Harwell +*/ + void mexFunction( int nlhs, mxArray *plhs[], diff --git a/main_func/regularizers_CPU/LLT_model_core.c b/main_func/regularizers_CPU/LLT_model_core.c index 3098269..7a1cdbe 100644 --- a/main_func/regularizers_CPU/LLT_model_core.c +++ b/main_func/regularizers_CPU/LLT_model_core.c @@ -19,6 +19,31 @@ limitations under the License. #include "LLT_model_core.h" +/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty +* +* Input Parameters: +* 1. U0 - origanal noise image/volume +* 2. lambda - regularization parameter +* 3. tau - time-step for explicit scheme +* 4. iter - iterations number +* 5. epsil - tolerance constant (to terminate earlier) +* 6. switcher - default is 0, switch to (1) to restrictive smoothing in Z dimension (in test) +* +* Output: +* Filtered/regularized image +* +* Example: +* figure; +* Im = double(imread('lena_gray_256.tif'))/255; % loading image +* u0 = Im + .03*randn(size(Im)); % adding noise +* [Den] = LLT_model(single(u0), 10, 0.1, 1); +* +* References: Lysaker, Lundervold and Tai (LLT) 2003, IEEE +* +* 28.11.16/Harwell +*/ + + float der2D(float *U, float *D1, float *D2, int dimX, int dimY, int dimZ) { int i, j, i_p, i_m, j_m, j_p; diff --git a/main_func/regularizers_CPU/LLT_model_core.h b/main_func/regularizers_CPU/LLT_model_core.h index ef8803d..10f52fe 100644 --- a/main_func/regularizers_CPU/LLT_model_core.h +++ b/main_func/regularizers_CPU/LLT_model_core.h @@ -26,31 +26,6 @@ limitations under the License. #define EPS 0.01 -/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty -* -* Input Parameters: -* 1. U0 - origanal noise image/volume -* 2. lambda - regularization parameter -* 3. tau - time-step for explicit scheme -* 4. iter - iterations number -* 5. epsil - tolerance constant (to terminate earlier) -* 6. switcher - default is 0, switch to (1) to restrictive smoothing in Z dimension (in test) -* -* Output: -* Filtered/regularized image -* -* Example: -* figure; -* Im = double(imread('lena_gray_256.tif'))/255; % loading image -* u0 = Im + .03*randn(size(Im)); % adding noise -* [Den] = LLT_model(single(u0), 10, 0.1, 1); -* -* -* to compile with OMP support: mex LLT_model.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" -* References: Lysaker, Lundervold and Tai (LLT) 2003, IEEE -* -* 28.11.16/Harwell -*/ /* 2D functions */ float der2D(float *U, float *D1, float *D2, int dimX, int dimY, int dimZ); float div_upd2D(float *U0, float *U, float *D1, float *D2, int dimX, int dimY, int dimZ, float lambda, float tau); diff --git a/main_func/regularizers_CPU/PatchBased_Regul.c b/main_func/regularizers_CPU/PatchBased_Regul.c index 24ee210..59eb3b3 100644 --- a/main_func/regularizers_CPU/PatchBased_Regul.c +++ b/main_func/regularizers_CPU/PatchBased_Regul.c @@ -27,27 +27,23 @@ limitations under the License. * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems" * 2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization" * - * Input Parameters (mandatory): - * 1. Image (2D or 3D) - * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window) - * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window) - * 4. h - parameter for the PB penalty function - * 5. lambda - regularization parameter + * Input Parameters: + * 1. Image (2D or 3D) [required] + * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window) [optional] + * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window) [optional] + * 4. h - parameter for the PB penalty function [optional] + * 5. lambda - regularization parameter [optional] * Output: * 1. regularized (denoised) Image (N x N)/volume (N x N x N) * - * Quick 2D denoising example in Matlab: + * 2D denoising example in Matlab: Im = double(imread('lena_gray_256.tif'))/255; % loading image u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise - ImDen = PB_Regul_CPU(single(u0), 3, 1, 0.08, 0.05); - * - * Please see more tests in a file: - TestTemporalSmoothing.m - + ImDen = PatchBased_Regul(single(u0), 3, 1, 0.08, 0.05); * * Matlab + C/mex compilers needed - * to compile with OMP support: mex PB_Regul_CPU.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp" + * to compile with OMP support: mex PatchBased_Regul.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp" * * D. Kazantsev * * 02/07/2014 @@ -70,17 +66,23 @@ void mexFunction( M = dims[1]; Z = dims[2]; - if ((numdims < 2) || (numdims > 3)) {mexErrMsgTxt("The input should be 2D image or 3D volume");} + if ((numdims < 2) || (numdims > 3)) {mexErrMsgTxt("The input is 2D image or 3D volume");} if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); } if(nrhs != 5) mexErrMsgTxt("Five inputs reqired: Image(2D,3D), SearchW, SimilW, Threshold, Regularization parameter"); /*Handling inputs*/ - A = (float *) mxGetData(prhs[0]); /* the image to regularize/filter */ - SearchW_real = (int) mxGetScalar(prhs[1]); /* the searching window ratio */ - SimilW = (int) mxGetScalar(prhs[2]); /* the similarity window ratio */ - h = (float) mxGetScalar(prhs[3]); /* parameter for the PB filtering function */ - lambda = (float) mxGetScalar(prhs[4]); /* regularization parameter */ + A = (float *) mxGetData(prhs[0]); /* the image/volume to regularize/filter */ + SearchW_real = 3; /*default value*/ + SimilW = 1; /*default value*/ + h = 0.1; + lambda = 0.1; + + if ((nrhs == 2) || (nrhs == 3) || (nrhs == 4) || (nrhs == 5)) SearchW_real = (int) mxGetScalar(prhs[1]); /* the searching window ratio */ + if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5)) SimilW = (int) mxGetScalar(prhs[2]); /* the similarity window ratio */ + if ((nrhs == 4) || (nrhs == 5)) h = (float) mxGetScalar(prhs[3]); /* parameter for the PB filtering function */ + if ((nrhs == 5)) lambda = (float) mxGetScalar(prhs[4]); /* regularization parameter */ + if (h <= 0) mexErrMsgTxt("Parmeter for the PB penalty function should be > 0"); if (lambda <= 0) mexErrMsgTxt(" Regularization parmeter should be > 0"); @@ -89,7 +91,6 @@ void mexFunction( /* SearchW_full = 2*SearchW + 1; */ /* the full searching window size */ /* SimilW_full = 2*SimilW + 1; */ /* the full similarity window size */ - padXY = SearchW + 2*SimilW; /* padding sizes */ newsizeX = N + 2*(padXY); /* the X size of the padded array */ @@ -135,4 +136,4 @@ void mexFunction( switchpad_crop = 1; /*cropping*/ pad_crop(Bp, B, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop); } /*end else ndims*/ -} \ No newline at end of file +} diff --git a/main_func/regularizers_CPU/PatchBased_Regul_core.c b/main_func/regularizers_CPU/PatchBased_Regul_core.c index 6f0a48d..acfb464 100644 --- a/main_func/regularizers_CPU/PatchBased_Regul_core.c +++ b/main_func/regularizers_CPU/PatchBased_Regul_core.c @@ -19,40 +19,33 @@ limitations under the License. #include "PatchBased_Regul_core.h" -/* C-OMP implementation of patch-based (PB) regularization (2D and 3D cases). -* This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function -* -* References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems" -* 2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization" -* -* Input Parameters (mandatory): -* 1. Image (2D or 3D) -* 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window) -* 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window) -* 4. h - parameter for the PB penalty function -* 5. lambda - regularization parameter +/* C-OMP implementation of patch-based (PB) regularization (2D and 3D cases). + * This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function + * + * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems" + * 2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization" + * + * Input Parameters: + * 1. Image (2D or 3D) [required] + * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window) [optional] + * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window) [optional] + * 4. h - parameter for the PB penalty function [optional] + * 5. lambda - regularization parameter [optional] -* Output: -* 1. regularized (denoised) Image (N x N)/volume (N x N x N) -* -* Quick 2D denoising example in Matlab: -Im = double(imread('lena_gray_256.tif'))/255; % loading image -u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise -ImDen = PB_Regul_CPU(single(u0), 3, 1, 0.08, 0.05); -* -* Please see more tests in a file: -TestTemporalSmoothing.m - -* -* Matlab + C/mex compilers needed -* to compile with OMP support: mex PB_Regul_CPU.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp" -* -* D. Kazantsev * -* 02/07/2014 -* Harwell, UK -*/ + * Output: + * 1. regularized (denoised) Image (N x N)/volume (N x N x N) + * + * 2D denoising example in Matlab: + Im = double(imread('lena_gray_256.tif'))/255; % loading image + u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise + ImDen = PatchBased_Regul(single(u0), 3, 1, 0.08, 0.05); + + * D. Kazantsev * + * 02/07/2014 + * Harwell, UK + */ -/*2D version*/ +/*2D version function */ float PB_FUNC2D(float *A, float *B, int dimX, int dimY, int padXY, int SearchW, int SimilW, float h, float lambda) { int i, j, i_n, j_n, i_m, j_m, i_p, j_p, i_l, j_l, i1, j1, i2, j2, i3, j3, i5,j5, count, SimilW_full; diff --git a/main_func/regularizers_CPU/PatchBased_Regul_core.h b/main_func/regularizers_CPU/PatchBased_Regul_core.h index b83cf10..5aa6415 100644 --- a/main_func/regularizers_CPU/PatchBased_Regul_core.h +++ b/main_func/regularizers_CPU/PatchBased_Regul_core.h @@ -26,39 +26,6 @@ limitations under the License. #include #include "omp.h" -/* C-OMP implementation of patch-based (PB) regularization (2D and 3D cases). -* This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function -* -* References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems" -* 2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization" -* -* Input Parameters (mandatory): -* 1. Image (2D or 3D) -* 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window) -* 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window) -* 4. h - parameter for the PB penalty function -* 5. lambda - regularization parameter - -* Output: -* 1. regularized (denoised) Image (N x N)/volume (N x N x N) -* -* Quick 2D denoising example in Matlab: -Im = double(imread('lena_gray_256.tif'))/255; % loading image -u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise -ImDen = PB_Regul_CPU(single(u0), 3, 1, 0.08, 0.05); -* -* Please see more tests in a file: -TestTemporalSmoothing.m - -* -* Matlab + C/mex compilers needed -* to compile with OMP support: mex PB_Regul_CPU.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp" -* -* D. Kazantsev * -* 02/07/2014 -* Harwell, UK -*/ - float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop); float PB_FUNC2D(float *A, float *B, int dimX, int dimY, int padXY, int SearchW, int SimilW, float h, float lambda); float PB_FUNC3D(float *A, float *B, int dimX, int dimY, int dimZ, int padXY, int SearchW, int SimilW, float h, float lambda); diff --git a/main_func/regularizers_CPU/SplitBregman_TV_core.c b/main_func/regularizers_CPU/SplitBregman_TV_core.c index 26ad5b1..283dd43 100644 --- a/main_func/regularizers_CPU/SplitBregman_TV_core.c +++ b/main_func/regularizers_CPU/SplitBregman_TV_core.c @@ -37,7 +37,6 @@ limitations under the License. * u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0; * u = SplitBregman_TV(single(u0), 10, 30, 1e-04); * -* to compile with OMP support: mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" * References: * The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher. * D. Kazantsev, 2016* diff --git a/main_func/regularizers_CPU/SplitBregman_TV_core.h b/main_func/regularizers_CPU/SplitBregman_TV_core.h index ed9112c..a7aaabb 100644 --- a/main_func/regularizers_CPU/SplitBregman_TV_core.h +++ b/main_func/regularizers_CPU/SplitBregman_TV_core.h @@ -23,30 +23,6 @@ limitations under the License. #include #include "omp.h" -/* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D) -* -* Input Parameters: -* 1. Noisy image/volume -* 2. lambda - regularization parameter -* 3. Number of iterations [OPTIONAL parameter] -* 4. eplsilon - tolerance constant [OPTIONAL parameter] -* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter] -* -* Output: -* Filtered/regularized image -* -* Example: -* figure; -* Im = double(imread('lena_gray_256.tif'))/255; % loading image -* u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0; -* u = SplitBregman_TV(single(u0), 10, 30, 1e-04); -* -* to compile with OMP support: mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" -* References: -* The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher. -* D. Kazantsev, 2016* -*/ - float copyIm(float *A, float *B, int dimX, int dimY, int dimZ); float gauss_seidel2D(float *U, float *A, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu); float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda); diff --git a/main_func/regularizers_CPU/TGV_PD.c b/main_func/regularizers_CPU/TGV_PD.c index 8bce18a..d4bb787 100644 --- a/main_func/regularizers_CPU/TGV_PD.c +++ b/main_func/regularizers_CPU/TGV_PD.c @@ -39,7 +39,7 @@ limitations under the License. * u0 = Im + .03*randn(size(Im)); % adding noise * tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc; * - * to compile with OMP support: mex TGV_PD.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" + * to compile with OMP support: mex TGV_PD.c TGV_PD_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" * References: * K. Bredies "Total Generalized Variation" * @@ -53,7 +53,7 @@ void mexFunction( { int number_of_dims, iter, dimX, dimY, dimZ, ll; const int *dim_array; - float *A, *U, *U_old, *P1, *P2, *P3, *Q1, *Q2, *Q3, *Q4, *Q5, *Q6, *Q7, *Q8, *Q9, *V1, *V1_old, *V2, *V2_old, *V3, *V3_old, lambda, L2, tau, sigma, alpha1, alpha0; + float *A, *U, *U_old, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, lambda, L2, tau, sigma, alpha1, alpha0; number_of_dims = mxGetNumberOfDimensions(prhs[0]); dim_array = mxGetDimensions(prhs[0]); @@ -88,40 +88,10 @@ void mexFunction( V1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); V1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); V2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - V2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); } - else if (number_of_dims == 3) { - mexErrMsgTxt("The input data should be 2D"); - /*3D case*/ -// dimZ = dim_array[2]; -// U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -// -// P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -// P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -// P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -// -// Q1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -// Q2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -// Q3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -// Q4 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -// Q5 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -// Q6 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -// Q7 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -// Q8 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -// Q9 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -// -// U_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -// -// V1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -// V1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -// V2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -// V2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -// V3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); -// V3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); - } - else {mexErrMsgTxt("The input data should be 2D");} - - - /*printf("%i \n", i);*/ + V2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + + + /*printf("%i \n", i);*/ L2 = 12.0; /*Lipshitz constant*/ tau = 1.0/pow(L2,0.5); sigma = 1.0/pow(L2,0.5); @@ -129,7 +99,6 @@ void mexFunction( /*Copy A to U*/ copyIm(A, U, dimX, dimY, dimZ); - if (number_of_dims == 2) { /* Here primal-dual iterations begin for 2D */ for(ll = 0; ll < iter; ll++) { @@ -164,23 +133,12 @@ void mexFunction( /*get new V*/ newU(V1, V1_old, dimX, dimY, dimZ); newU(V2, V2_old, dimX, dimY, dimZ); - } /*end of iterations*/ + } /*end of iterations*/ } - -// /*3D version*/ -// if (number_of_dims == 3) { -// /* Here primal-dual iterations begin for 3D */ -// for(ll = 0; ll < iter; ll++) { -// -// /* Calculate Dual Variable P */ -// DualP_3D(U, V1, V2, V3, P1, P2, P3, dimX, dimY, dimZ, sigma); -// -// /*Projection onto convex set for P*/ -// ProjP_3D(P1, P2, P3, dimX, dimY, dimZ, alpha1); -// -// /* Calculate Dual Variable Q */ -// DualQ_3D(V1, V2, V2, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, dimX, dimY, dimZ, sigma); -// -// } /*end of iterations*/ -// } + else if (number_of_dims == 3) { + mexErrMsgTxt("The input data should be a 2D array"); + /*3D case*/ + } + else {mexErrMsgTxt("The input data should be a 2D array");} + } diff --git a/main_func/regularizers_CPU/TGV_PD_core.c b/main_func/regularizers_CPU/TGV_PD_core.c index 4c0427c..1164b73 100644 --- a/main_func/regularizers_CPU/TGV_PD_core.c +++ b/main_func/regularizers_CPU/TGV_PD_core.c @@ -38,7 +38,6 @@ limitations under the License. * u0 = Im + .03*randn(size(Im)); % adding noise * tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc; * - * to compile with OMP support: mex TGV_PD.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" * References: * K. Bredies "Total Generalized Variation" * diff --git a/main_func/regularizers_CPU/TGV_PD_core.h b/main_func/regularizers_CPU/TGV_PD_core.h index e25ae68..04ba95c 100644 --- a/main_func/regularizers_CPU/TGV_PD_core.h +++ b/main_func/regularizers_CPU/TGV_PD_core.h @@ -24,32 +24,6 @@ limitations under the License. #include #include "omp.h" -/* C-OMP implementation of Primal-Dual denoising method for -* Total Generilized Variation (TGV)-L2 model (2D case only) -* -* Input Parameters: -* 1. Noisy image/volume (2D) -* 2. lambda - regularization parameter -* 3. parameter to control first-order term (alpha1) -* 4. parameter to control the second-order term (alpha0) -* 5. Number of CP iterations -* -* Output: -* Filtered/regularized image -* -* Example: -* figure; -* Im = double(imread('lena_gray_256.tif'))/255; % loading image -* u0 = Im + .03*randn(size(Im)); % adding noise -* tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc; -* -* to compile with OMP support: mex TGV_PD.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" -* References: -* K. Bredies "Total Generalized Variation" -* -* 28.11.16/Harwell -*/ - /* 2D functions */ float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, int dimZ, float sigma); float ProjP_2D(float *P1, float *P2, int dimX, int dimY, int dimZ, float alpha1); @@ -57,8 +31,5 @@ float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, float ProjQ_2D(float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float alpha0); float DivProjP_2D(float *U, float *A, float *P1, float *P2, int dimX, int dimY, int dimZ, float lambda, float tau); float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float tau); -/*3D functions*/ -float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float sigma); - float newU(float *U, float *U_old, int dimX, int dimY, int dimZ); float copyIm(float *A, float *U, int dimX, int dimY, int dimZ); -- cgit v1.2.3 From e412e73b172a99776d108d3b4c1f8662a20e9fce Mon Sep 17 00:00:00 2001 From: Daniil Kazantsev Date: Thu, 3 Aug 2017 00:26:46 +0100 Subject: 2D or 3D regularization choices added --- main_func/FISTA_REC.m | 114 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 93 insertions(+), 21 deletions(-) (limited to 'main_func') diff --git a/main_func/FISTA_REC.m b/main_func/FISTA_REC.m index 1e93719..43ed0cb 100644 --- a/main_func/FISTA_REC.m +++ b/main_func/FISTA_REC.m @@ -3,6 +3,7 @@ function [X, output] = FISTA_REC(params) % <<<< FISTA-based reconstruction algorithm using ASTRA-toolbox >>>> % ___Input___: % params.[] file: +%----------------General Parameters------------------------ % - .proj_geom (geometry of the projector) [required] % - .vol_geom (geometry of the reconstructed object) [required] % - .sino (vectorized in 2D or 3D sinogram) [required] @@ -13,12 +14,19 @@ function [X, output] = FISTA_REC(params) % - .ROI (Region-of-interest, only if X_ideal is given) % - .initialize (a 'warm start' using SIRT method from ASTRA) %----------------Regularization choices------------------------ -% - .Regul_Lambda_FGPTV (FGP-TV regularization parameter) -% - .Regul_Lambda_SBTV (SplitBregman-TV regularization parameter) -% - .Regul_Lambda_TVLLT (Higher order SB-LLT regularization parameter) +% 1 .Regul_Lambda_FGPTV (FGP-TV regularization parameter) +% 2 .Regul_Lambda_SBTV (SplitBregman-TV regularization parameter) +% 3 .Regul_LambdaLLT (Higher order LLT regularization parameter) +% 3.1 .Regul_tauLLT (time step parameter for LLT (HO) term) +% 4 .Regul_LambdaPatchBased (Patch-based nonlocal regularization parameter) +% 4.1 .Regul_PB_SearchW (ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window)) +% 4.2 .Regul_PB_SimilW (ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window)) +% 4.3 .Regul_PB_h (PB penalty function threshold) +% 5 .Regul_LambdaTGV (Total Generalized variation regularization parameter) % - .Regul_tol (tolerance to terminate regul iterations, default 1.0e-04) % - .Regul_Iterations (iterations for the selected penalty, default 25) -% - .Regul_tauLLT (time step parameter for LLT term) +% - .Regul_Dimension ('2D' or '3D' way to apply regularization, '3D' is the default) +%----------------Ring removal------------------------ % - .Ring_LambdaR_L1 (regularization parameter for L1-ring minimization, if lambdaR_L1 > 0 then switch on ring removal) % - .Ring_Alpha (larger values can accelerate convergence but check stability, default 1) %----------------Visualization parameters------------------------ @@ -150,8 +158,8 @@ if (isfield(params,'Regul_Iterations')) else IterationsRegul = 25; end -if (isfield(params,'Regul_LambdaHO')) - lambdaHO = params.Regul_LambdaHO; +if (isfield(params,'Regul_LambdaLLT')) + lambdaHO = params.Regul_LambdaLLT; else lambdaHO = 0; end @@ -165,6 +173,26 @@ if (isfield(params,'Regul_tauLLT')) else tauHO = 0.0001; end +if (isfield(params,'Regul_LambdaPatchBased')) + lambdaPB = params.Regul_LambdaPatchBased; +else + lambdaPB = 0; +end +if (isfield(params,'Regul_PB_SearchW')) + SearchW = params.Regul_PB_SearchW; +else + SearchW = 3; % default +end +if (isfield(params,'Regul_PB_SimilW')) + SimilW = params.Regul_PB_SimilW; +else + SimilW = 1; % default +end +if (isfield(params,'Regul_PB_h')) + h_PB = params.Regul_PB_h; +else + h_PB = 0.1; % default +end if (isfield(params,'Ring_LambdaR_L1')) lambdaR_L1 = params.Ring_LambdaR_L1; else @@ -175,6 +203,14 @@ if (isfield(params,'Ring_Alpha')) else alpha_ring = 1; end +if (isfield(params,'Regul_Dimension')) + Dimension = params.Regul_Dimension; + if ((strcmp('2D', Dimension) ~= 1) && (strcmp('3D', Dimension) ~= 1)) + Dimension = '3D'; + end +else + Dimension = '3D'; +end if (isfield(params,'show')) show = params.show; else @@ -293,21 +329,57 @@ for i = 1:iterFISTA astra_mex_data3d('delete', sino_id); astra_mex_data3d('delete', id); - if (lambdaFGP_TV > 0) - % FGP-TV regularization - [X, f_val] = FGP_TV(single(X), lambdaFGP_TV, IterationsRegul, tol, 'iso'); - objective(i) = objective(i) + f_val; - end - if (lambdaSB_TV > 0) - % Split Bregman regularization - X = SplitBregman_TV(single(X), lambdaSB_TV, IterationsRegul, tol); % (more memory efficent) - end - if (lambdaHO > 0) - % Higher Order (LLT) regularization - X2 = LLT_model(single(X), lambdaHO, tauHO, iterHO, 3.0e-05, 0); - X = 0.5.*(X + X2); % averaged combination of two solutions - end - + % regularization + if (lambdaFGP_TV > 0) + % FGP-TV regularization + if ((strcmp('2D', Dimension) == 1)) + % 2D regularization + for kkk = 1:SlicesZ + [X(:,:,kkk), f_val] = FGP_TV(single(X(:,:,kkk)), lambdaFGP_TV, IterationsRegul, tol, 'iso'); + end + else + % 3D regularization + [X, f_val] = FGP_TV(single(X), lambdaFGP_TV, IterationsRegul, tol, 'iso'); + end + objective(i) = objective(i) + f_val; + end + if (lambdaSB_TV > 0) + % Split Bregman regularization + if ((strcmp('2D', Dimension) == 1)) + % 2D regularization + for kkk = 1:SlicesZ + X(:,:,kkk) = SplitBregman_TV(single(X(:,:,kkk)), lambdaSB_TV, IterationsRegul, tol); % (more memory efficent) + end + else + % 3D regularization + X = SplitBregman_TV(single(X), lambdaSB_TV, IterationsRegul, tol); % (more memory efficent) + end + end + if (lambdaHO > 0) + % Higher Order (LLT) regularization + X2 = zeros(N,N,SlicesZ,'single'); + if ((strcmp('2D', Dimension) == 1)) + % 2D regularization + for kkk = 1:SlicesZ + X2(:,:,kkk) = LLT_model(single(X(:,:,kkk)), lambdaHO, tauHO, iterHO, 3.0e-05, 0); + end + else + % 3D regularization + X2 = LLT_model(single(X), lambdaHO, tauHO, iterHO, 3.0e-05, 0); + end + X = 0.5.*(X + X2); % averaged combination of two solutions + end + if (lambdaPB > 0) + % Patch-Based regularization (can be slow on CPU) + if ((strcmp('2D', Dimension) == 1)) + % 2D regularization + for kkk = 1:SlicesZ + X(:,:,kkk) = PatchBased_Regul(single(X(:,:,kkk)), SearchW, SimilW, h_PB, lambdaPB); + end + else + X = PatchBased_Regul(single(X), SearchW, SimilW, h_PB, lambdaPB); + end + end if (lambdaR_L1 > 0) -- cgit v1.2.3 From d7281b95f70dd3707f82640972a52f4155c2fde5 Mon Sep 17 00:00:00 2001 From: Daniil Kazantsev Date: Thu, 3 Aug 2017 15:25:50 +0100 Subject: Ordered Subset FISTA added and demos updated in DemoRD2 --- main_func/FISTA_REC.m | 404 +++++++++++++++++++++++++----------- main_func/regularizers_CPU/TGV_PD.c | 6 +- 2 files changed, 291 insertions(+), 119 deletions(-) (limited to 'main_func') diff --git a/main_func/FISTA_REC.m b/main_func/FISTA_REC.m index 43ed0cb..381fa34 100644 --- a/main_func/FISTA_REC.m +++ b/main_func/FISTA_REC.m @@ -1,6 +1,6 @@ function [X, output] = FISTA_REC(params) -% <<<< FISTA-based reconstruction algorithm using ASTRA-toolbox >>>> +% <<<< FISTA-based reconstruction routine using ASTRA-toolbox >>>> % ___Input___: % params.[] file: %----------------General Parameters------------------------ @@ -19,9 +19,9 @@ function [X, output] = FISTA_REC(params) % 3 .Regul_LambdaLLT (Higher order LLT regularization parameter) % 3.1 .Regul_tauLLT (time step parameter for LLT (HO) term) % 4 .Regul_LambdaPatchBased (Patch-based nonlocal regularization parameter) -% 4.1 .Regul_PB_SearchW (ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window)) -% 4.2 .Regul_PB_SimilW (ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window)) -% 4.3 .Regul_PB_h (PB penalty function threshold) +% 4.1 .Regul_PB_SearchW (ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window)) +% 4.2 .Regul_PB_SimilW (ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window)) +% 4.3 .Regul_PB_h (PB penalty function threshold) % 5 .Regul_LambdaTGV (Total Generalized variation regularization parameter) % - .Regul_tol (tolerance to terminate regul iterations, default 1.0e-04) % - .Regul_Iterations (iterations for the selected penalty, default 25) @@ -193,6 +193,11 @@ if (isfield(params,'Regul_PB_h')) else h_PB = 0.1; % default end +if (isfield(params,'Regul_LambdaTGV')) + LambdaTGV = params.Regul_LambdaTGV; +else + LambdaTGV = 0.01; +end if (isfield(params,'Ring_LambdaR_L1')) lambdaR_L1 = params.Ring_LambdaR_L1; else @@ -252,20 +257,17 @@ if (isfield(params,'initialize')) else X = zeros(N,N,SlicesZ, 'single'); % storage for the solution end -if (isfield(params,'OS')) +if (isfield(params,'subsets')) % Ordered Subsets reorganisation of data and angles - OS = 1; - subsets = 8; + subsets = params.subsets; % subsets number angles = proj_geom.ProjectionAngles; binEdges = linspace(min(angles),max(angles),subsets+1); % assign values to bins [binsDiscr,~] = histc(angles, [binEdges(1:end-1) Inf]); - % rearrange angles into subsets - AnglesReorg = zeros(length(angles),1); - SinoReorg = zeros(Detectors, anglesNumb, SlicesZ, 'single'); - + % get rearranged subset indices + IndicesReorg = zeros(length(angles),1); counterM = 0; for ii = 1:max(binsDiscr(:)) counter = 0; @@ -273,143 +275,313 @@ if (isfield(params,'OS')) curr_index = ii+jj-1 + counter; if (binsDiscr(jj) >= ii) counterM = counterM + 1; - AnglesReorg(counterM) = angles(curr_index); - SinoReorg(:,counterM,:) = squeeze(sino(:,curr_index,:)); + IndicesReorg(counterM) = curr_index; end counter = (counter + binsDiscr(jj)) - 1; end end - sino = SinoReorg; - clear SinoReorg; -else - OS = 0; % normal FISTA +else + subsets = 0; % Classical FISTA end - %----------------Reconstruction part------------------------ Resid_error = zeros(iterFISTA,1); % errors vector (if the ground truth is given) objective = zeros(iterFISTA,1); % objective function values vector -t = 1; -X_t = X; -r = zeros(Detectors,SlicesZ, 'single'); % 2D array (for 3D data) of sparse "ring" vectors -r_x = r; % another ring variable -residual = zeros(size(sino),'single'); - -% Outer FISTA iterations loop -for i = 1:iterFISTA - - X_old = X; - t_old = t; - r_old = r; +if (subsets == 0) + % Classical FISTA + t = 1; + X_t = X; - [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geom, vol_geom); + r = zeros(Detectors,SlicesZ, 'single'); % 2D array (for 3D data) of sparse "ring" vectors + r_x = r; % another ring variable + residual = zeros(size(sino),'single'); - if (lambdaR_L1 > 0) - % ring removal part (Group-Huber fidelity) - for kkk = 1:anglesNumb - residual(:,kkk,:) = squeeze(weights(:,kkk,:)).*(squeeze(sino_updt(:,kkk,:)) - (squeeze(sino(:,kkk,:)) - alpha_ring.*r_x)); + % Outer FISTA iterations loop + for i = 1:iterFISTA + + X_old = X; + t_old = t; + r_old = r; + + [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geom, vol_geom); + + if (lambdaR_L1 > 0) + % the ring removal part (Group-Huber fidelity) + for kkk = 1:anglesNumb + residual(:,kkk,:) = squeeze(weights(:,kkk,:)).*(squeeze(sino_updt(:,kkk,:)) - (squeeze(sino(:,kkk,:)) - alpha_ring.*r_x)); + end + vec = sum(residual,2); + if (SlicesZ > 1) + vec = squeeze(vec(:,1,:)); + end + r = r_x - (1./L_const).*vec; + else + % no ring removal + residual = weights.*(sino_updt - sino); end - vec = sum(residual,2); - if (SlicesZ > 1) - vec = squeeze(vec(:,1,:)); + + objective(i) = (0.5*norm(residual(:))^2)/(Detectors*anglesNumb*SlicesZ); % for the objective function output + + [id, x_temp] = astra_create_backprojection3d_cuda(residual, proj_geom, vol_geom); + + X = X_t - (1/L_const).*x_temp; + astra_mex_data3d('delete', sino_id); + astra_mex_data3d('delete', id); + + % regularization + if (lambdaFGP_TV > 0) + % FGP-TV regularization + if ((strcmp('2D', Dimension) == 1)) + % 2D regularization + for kkk = 1:SlicesZ + [X(:,:,kkk), f_val] = FGP_TV(single(X(:,:,kkk)), lambdaFGP_TV, IterationsRegul, tol, 'iso'); + end + else + % 3D regularization + [X, f_val] = FGP_TV(single(X), lambdaFGP_TV, IterationsRegul, tol, 'iso'); + end + objective(i) = objective(i) + f_val; end - r = r_x - (1./L_const).*vec; - else - % no ring removal - residual = weights.*(sino_updt - sino); - end - - objective(i) = (0.5*norm(residual(:))^2)/(Detectors*anglesNumb*SlicesZ); % for the objective function output - - [id, x_temp] = astra_create_backprojection3d_cuda(residual, proj_geom, vol_geom); - - X = X_t - (1/L_const).*x_temp; - astra_mex_data3d('delete', sino_id); - astra_mex_data3d('delete', id); - - % regularization - if (lambdaFGP_TV > 0) - % FGP-TV regularization - if ((strcmp('2D', Dimension) == 1)) - % 2D regularization - for kkk = 1:SlicesZ - [X(:,:,kkk), f_val] = FGP_TV(single(X(:,:,kkk)), lambdaFGP_TV, IterationsRegul, tol, 'iso'); + if (lambdaSB_TV > 0) + % Split Bregman regularization + if ((strcmp('2D', Dimension) == 1)) + % 2D regularization + for kkk = 1:SlicesZ + X(:,:,kkk) = SplitBregman_TV(single(X(:,:,kkk)), lambdaSB_TV, IterationsRegul, tol); % (more memory efficent) + end + else + % 3D regularization + X = SplitBregman_TV(single(X), lambdaSB_TV, IterationsRegul, tol); % (more memory efficent) end - else - % 3D regularization - [X, f_val] = FGP_TV(single(X), lambdaFGP_TV, IterationsRegul, tol, 'iso'); end - objective(i) = objective(i) + f_val; - end - if (lambdaSB_TV > 0) - % Split Bregman regularization - if ((strcmp('2D', Dimension) == 1)) - % 2D regularization - for kkk = 1:SlicesZ - X(:,:,kkk) = SplitBregman_TV(single(X(:,:,kkk)), lambdaSB_TV, IterationsRegul, tol); % (more memory efficent) + if (lambdaHO > 0) + % Higher Order (LLT) regularization + X2 = zeros(N,N,SlicesZ,'single'); + if ((strcmp('2D', Dimension) == 1)) + % 2D regularization + for kkk = 1:SlicesZ + X2(:,:,kkk) = LLT_model(single(X(:,:,kkk)), lambdaHO, tauHO, iterHO, 3.0e-05, 0); + end + else + % 3D regularization + X2 = LLT_model(single(X), lambdaHO, tauHO, iterHO, 3.0e-05, 0); end - else - % 3D regularization - X = SplitBregman_TV(single(X), lambdaSB_TV, IterationsRegul, tol); % (more memory efficent) + X = 0.5.*(X + X2); % averaged combination of two solutions end - end - if (lambdaHO > 0) - % Higher Order (LLT) regularization - X2 = zeros(N,N,SlicesZ,'single'); - if ((strcmp('2D', Dimension) == 1)) - % 2D regularization - for kkk = 1:SlicesZ - X2(:,:,kkk) = LLT_model(single(X(:,:,kkk)), lambdaHO, tauHO, iterHO, 3.0e-05, 0); + if (lambdaPB > 0) + % Patch-Based regularization (can be slow on CPU) + if ((strcmp('2D', Dimension) == 1)) + % 2D regularization + for kkk = 1:SlicesZ + X(:,:,kkk) = PatchBased_Regul(single(X(:,:,kkk)), SearchW, SimilW, h_PB, lambdaPB); + end + else + X = PatchBased_Regul(single(X), SearchW, SimilW, h_PB, lambdaPB); end - else - % 3D regularization - X2 = LLT_model(single(X), lambdaHO, tauHO, iterHO, 3.0e-05, 0); end - X = 0.5.*(X + X2); % averaged combination of two solutions - end - if (lambdaPB > 0) - % Patch-Based regularization (can be slow on CPU) - if ((strcmp('2D', Dimension) == 1)) - % 2D regularization - for kkk = 1:SlicesZ - X(:,:,kkk) = PatchBased_Regul(single(X(:,:,kkk)), SearchW, SimilW, h_PB, lambdaPB); + if (LambdaTGV > 0) + % Total Generalized variation (currently only 2D) + lamTGV1 = 1.1; % smoothing trade-off parameters, see Pock's paper + lamTGV2 = 0.5; % second-order term + for kkk = 1:SlicesZ + X(:,:,kkk) = TGV_PD(single(X(:,:,kkk)), LambdaTGV, lamTGV1, lamTGV2, IterationsRegul); + end + end + + if (lambdaR_L1 > 0) + r = max(abs(r)-lambdaR_L1, 0).*sign(r); % soft-thresholding operator for ring vector + end + + t = (1 + sqrt(1 + 4*t^2))/2; % updating t + X_t = X + ((t_old-1)/t).*(X - X_old); % updating X + + if (lambdaR_L1 > 0) + r_x = r + ((t_old-1)/t).*(r - r_old); % updating r + end + + if (show == 1) + figure(10); imshow(X(:,:,slice), [0 maxvalplot]); + if (lambdaR_L1 > 0) + figure(11); plot(r); title('Rings offset vector') end + pause(0.01); + end + if (strcmp(X_ideal, 'none' ) == 0) + Resid_error(i) = RMSE(X(ROI), X_ideal(ROI)); + fprintf('%s %i %s %s %.4f %s %s %f \n', 'Iteration Number:', i, '|', 'Error RMSE:', Resid_error(i), '|', 'Objective:', objective(i)); else - X = PatchBased_Regul(single(X), SearchW, SimilW, h_PB, lambdaPB); + fprintf('%s %i %s %s %f \n', 'Iteration Number:', i, '|', 'Objective:', objective(i)); end - end - - - if (lambdaR_L1 > 0) - r = max(abs(r)-lambdaR_L1, 0).*sign(r); % soft-thresholding operator for ring vector end +else + % Ordered Subsets (OS) FISTA reconstruction routine (normally one order of magnitude faster than classical) + t = 1; + X_t = X; + proj_geomSUB = proj_geom; - t = (1 + sqrt(1 + 4*t^2))/2; % updating t - X_t = X + ((t_old-1)/t).*(X - X_old); % updating X - if (lambdaR_L1 > 0) - r_x = r + ((t_old-1)/t).*(r - r_old); % updating r - end + r = zeros(Detectors,SlicesZ, 'single'); % 2D array (for 3D data) of sparse "ring" vectors + r_x = r; % another ring variable + residual2 = zeros(size(sino),'single'); - if (show == 1) - figure(10); imshow(X(:,:,slice), [0 maxvalplot]); - if (lambdaR_L1 > 0) - figure(11); plot(r); title('Rings offset vector') + % Outer FISTA iterations loop + for i = 1:iterFISTA + + % With OS approach it becomes trickier to correlate independent subsets, hence additional work is required + % one solution is to work with a full sinogram at times + if ((i >= 3) && (lambdaR_L1 > 0)) + [sino_id2, sino_updt2] = astra_create_sino3d_cuda(X, proj_geom, vol_geom); + astra_mex_data3d('delete', sino_id2); + end + + % subsets loop + counterInd = 1; + for ss = 1:subsets + X_old = X; + t_old = t; + r_old = r; + + numProjSub = binsDiscr(ss); % the number of projections per subset + CurrSubIndeces = IndicesReorg(counterInd:(counterInd + numProjSub - 1)); % extract indeces attached to the subset + proj_geomSUB.ProjectionAngles = angles(CurrSubIndeces); + + if (lambdaR_L1 > 0) + + % the ring removal part (Group-Huber fidelity) + % first 2 iterations do additional work reconstructing whole dataset to ensure + % stablility + if (i < 3) + [sino_id2, sino_updt2] = astra_create_sino3d_cuda(X_t, proj_geom, vol_geom); + astra_mex_data3d('delete', sino_id2); + else + [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geomSUB, vol_geom); + end + + for kkk = 1:anglesNumb + residual2(:,kkk,:) = squeeze(weights(:,kkk,:)).*(squeeze(sino_updt2(:,kkk,:)) - (squeeze(sino(:,kkk,:)) - alpha_ring.*r_x)); + end + + residual = zeros(Detectors, numProjSub, SlicesZ,'single'); + for kkk = 1:numProjSub + indC = CurrSubIndeces(kkk); + if (i < 3) + residual(:,kkk,:) = squeeze(residual2(:,indC,:)); + else + residual(:,kkk,:) = squeeze(weights(:,indC,:)).*(squeeze(sino_updt(:,kkk,:)) - (squeeze(sino(:,indC,:)) - alpha_ring.*r_x)); + end + end + vec = sum(residual2,2); + if (SlicesZ > 1) + vec = squeeze(vec(:,1,:)); + end + r = r_x - (1./L_const).*vec; + else + [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geomSUB, vol_geom); + % no ring removal + residual = squeeze(weights(:,CurrSubIndeces,:)).*(sino_updt - squeeze(sino(:,CurrSubIndeces,:))); + end + + [id, x_temp] = astra_create_backprojection3d_cuda(residual, proj_geomSUB, vol_geom); + + X = X_t - (1/L_const).*x_temp; + astra_mex_data3d('delete', sino_id); + astra_mex_data3d('delete', id); + + % regularization + if (lambdaFGP_TV > 0) + % FGP-TV regularization + if ((strcmp('2D', Dimension) == 1)) + % 2D regularization + for kkk = 1:SlicesZ + [X(:,:,kkk), f_val] = FGP_TV(single(X(:,:,kkk)), lambdaFGP_TV/subsets, IterationsRegul, tol, 'iso'); + end + else + % 3D regularization + [X, f_val] = FGP_TV(single(X), lambdaFGP_TV/subsets, IterationsRegul, tol, 'iso'); + end + objective(i) = objective(i) + f_val; + end + if (lambdaSB_TV > 0) + % Split Bregman regularization + if ((strcmp('2D', Dimension) == 1)) + % 2D regularization + for kkk = 1:SlicesZ + X(:,:,kkk) = SplitBregman_TV(single(X(:,:,kkk)), lambdaSB_TV/subsets, IterationsRegul, tol); % (more memory efficent) + end + else + % 3D regularization + X = SplitBregman_TV(single(X), lambdaSB_TV/subsets, IterationsRegul, tol); % (more memory efficent) + end + end + if (lambdaHO > 0) + % Higher Order (LLT) regularization + X2 = zeros(N,N,SlicesZ,'single'); + if ((strcmp('2D', Dimension) == 1)) + % 2D regularization + for kkk = 1:SlicesZ + X2(:,:,kkk) = LLT_model(single(X(:,:,kkk)), lambdaHO/subsets, tauHO/subsets, iterHO, 2.0e-05, 0); + end + else + % 3D regularization + X2 = LLT_model(single(X), lambdaHO/subsets, tauHO/subsets, iterHO, 2.0e-05, 0); + end + X = 0.5.*(X + X2); % averaged combination of two solutions + end + if (lambdaPB > 0) + % Patch-Based regularization (can be slow on CPU) + if ((strcmp('2D', Dimension) == 1)) + % 2D regularization + for kkk = 1:SlicesZ + X(:,:,kkk) = PatchBased_Regul(single(X(:,:,kkk)), SearchW, SimilW, h_PB, lambdaPB/subsets); + end + else + X = PatchBased_Regul(single(X), SearchW, SimilW, h_PB, lambdaPB/subsets); + end + end + if (LambdaTGV > 0) + % Total Generalized variation (currently only 2D) + lamTGV1 = 1.1; % smoothing trade-off parameters, see Pock's paper + lamTGV2 = 0.5; % second-order term + for kkk = 1:SlicesZ + X(:,:,kkk) = TGV_PD(single(X(:,:,kkk)), LambdaTGV/subsets, lamTGV1, lamTGV2, IterationsRegul); + end + end + + if (lambdaR_L1 > 0) + r = max(abs(r)-lambdaR_L1, 0).*sign(r); % soft-thresholding operator for ring vector + end + + t = (1 + sqrt(1 + 4*t^2))/2; % updating t + X_t = X + ((t_old-1)/t).*(X - X_old); % updating X + + if (lambdaR_L1 > 0) + r_x = r + ((t_old-1)/t).*(r - r_old); % updating r + end + + counterInd = counterInd + numProjSub; + end + + if (show == 1) + figure(10); imshow(X(:,:,slice), [0 maxvalplot]); + if (lambdaR_L1 > 0) + figure(11); plot(r); title('Rings offset vector') + end + pause(0.01); + end + + if (strcmp(X_ideal, 'none' ) == 0) + Resid_error(i) = RMSE(X(ROI), X_ideal(ROI)); + fprintf('%s %i %s %s %.4f %s %s %f \n', 'Iteration Number:', i, '|', 'Error RMSE:', Resid_error(i), '|', 'Objective:', objective(i)); + else + fprintf('%s %i %s %s %f \n', 'Iteration Number:', i, '|', 'Objective:', objective(i)); end - pause(0.01); - end - if (strcmp(X_ideal, 'none' ) == 0) - Resid_error(i) = RMSE(X(ROI), X_ideal(ROI)); - fprintf('%s %i %s %s %.4f %s %s %f \n', 'Iteration Number:', i, '|', 'Error RMSE:', Resid_error(i), '|', 'Objective:', objective(i)); - else - fprintf('%s %i %s %s %f \n', 'Iteration Number:', i, '|', 'Objective:', objective(i)); end end + output.Resid_error = Resid_error; output.objective = objective; output.L_const = L_const; -output.sino = sino; -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + end diff --git a/main_func/regularizers_CPU/TGV_PD.c b/main_func/regularizers_CPU/TGV_PD.c index d4bb787..6593d8e 100644 --- a/main_func/regularizers_CPU/TGV_PD.c +++ b/main_func/regularizers_CPU/TGV_PD.c @@ -37,9 +37,9 @@ limitations under the License. * figure; * Im = double(imread('lena_gray_256.tif'))/255; % loading image * u0 = Im + .03*randn(size(Im)); % adding noise - * tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc; + * tic; u = TGV_PD(single(u0), 0.02, 1.3, 1, 550); toc; * - * to compile with OMP support: mex TGV_PD.c TGV_PD_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" + * to compile with OMP support: mex TGV_PD.c TGV_PD_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" * References: * K. Bredies "Total Generalized Variation" * @@ -92,7 +92,7 @@ void mexFunction( /*printf("%i \n", i);*/ - L2 = 12.0; /*Lipshitz constant*/ + L2 = 12.0f; /*Lipshitz constant*/ tau = 1.0/pow(L2,0.5); sigma = 1.0/pow(L2,0.5); -- cgit v1.2.3 From 69c2afc4670966c7e12420be7afd55ea69dfc8e8 Mon Sep 17 00:00:00 2001 From: Daniil Kazantsev Date: Thu, 24 Aug 2017 12:46:12 +0100 Subject: a minor big with TGV being constatly swtiched one removed --- main_func/FISTA_REC.m | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'main_func') diff --git a/main_func/FISTA_REC.m b/main_func/FISTA_REC.m index 381fa34..a6e0ae5 100644 --- a/main_func/FISTA_REC.m +++ b/main_func/FISTA_REC.m @@ -98,7 +98,7 @@ else for i = 1:niter [id,x1] = astra_create_backprojection3d_cuda(sqweight.*y, proj_geomT, vol_geomT); s = norm(x1(:)); - x1 = x1/s; + x1 = x1./s; [sino_id, y] = astra_create_sino3d_cuda(x1, proj_geomT, vol_geomT); y = sqweight.*y; astra_mex_data3d('delete', sino_id); @@ -196,7 +196,7 @@ end if (isfield(params,'Regul_LambdaTGV')) LambdaTGV = params.Regul_LambdaTGV; else - LambdaTGV = 0.01; + LambdaTGV = 0; end if (isfield(params,'Ring_LambdaR_L1')) lambdaR_L1 = params.Ring_LambdaR_L1; @@ -369,6 +369,7 @@ if (subsets == 0) X2 = LLT_model(single(X), lambdaHO, tauHO, iterHO, 3.0e-05, 0); end X = 0.5.*(X + X2); % averaged combination of two solutions + end if (lambdaPB > 0) % Patch-Based regularization (can be slow on CPU) @@ -384,7 +385,7 @@ if (subsets == 0) if (LambdaTGV > 0) % Total Generalized variation (currently only 2D) lamTGV1 = 1.1; % smoothing trade-off parameters, see Pock's paper - lamTGV2 = 0.5; % second-order term + lamTGV2 = 0.8; % second-order term for kkk = 1:SlicesZ X(:,:,kkk) = TGV_PD(single(X(:,:,kkk)), LambdaTGV, lamTGV1, lamTGV2, IterationsRegul); end -- cgit v1.2.3 From ac2d90b52b15127eadbbf0d2f300d9da31f755c7 Mon Sep 17 00:00:00 2001 From: algol Date: Thu, 7 Sep 2017 14:59:36 +0100 Subject: parallel3D updated for big data in Matlab --- main_func/FISTA_REC.m | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'main_func') diff --git a/main_func/FISTA_REC.m b/main_func/FISTA_REC.m index a6e0ae5..8dd569f 100644 --- a/main_func/FISTA_REC.m +++ b/main_func/FISTA_REC.m @@ -305,7 +305,17 @@ if (subsets == 0) t_old = t; r_old = r; + % if the geometry is parallel use slice-by-slice projection-backprojection routine + if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'parallel3d')) + sino_updt = zeros(size(sino),'single'); + for kkk = 1:SlicesZ + [sino_id, sino_updt(:,kkk,:)] = astra_create_sino3d_cuda(X_t(:,:,kkk), proj_geomT, vol_geomT); + astra_mex_data3d('delete', sino_id); + end + else + % for divergent 3D geometry (for Matlab watch the GPU memory overflow) [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geom, vol_geom); + end if (lambdaR_L1 > 0) % the ring removal part (Group-Huber fidelity) @@ -324,8 +334,16 @@ if (subsets == 0) objective(i) = (0.5*norm(residual(:))^2)/(Detectors*anglesNumb*SlicesZ); % for the objective function output + % if the geometry is parallel use slice-by-slice projection-backprojection routine + if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'parallel3d')) + x_temp = zeros(size(X),'single'); + for kkk = 1:SlicesZ + [id, x_temp(:,:,kkk)] = astra_create_backprojection3d_cuda(squeeze(residual(:,kkk,:)), proj_geomT, vol_geomT); + astra_mex_data3d('delete', id); + end + else [id, x_temp] = astra_create_backprojection3d_cuda(residual, proj_geom, vol_geom); - + end X = X_t - (1/L_const).*x_temp; astra_mex_data3d('delete', sino_id); astra_mex_data3d('delete', id); -- cgit v1.2.3 From 078b9e2db2e25d663a1140cc71ee4d16c36cc161 Mon Sep 17 00:00:00 2001 From: "Jakob Jorgensen, algol@harwell" Date: Thu, 7 Sep 2017 15:09:50 +0100 Subject: bugsf --- main_func/FISTA_REC.m | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'main_func') diff --git a/main_func/FISTA_REC.m b/main_func/FISTA_REC.m index 8dd569f..00e59ab 100644 --- a/main_func/FISTA_REC.m +++ b/main_func/FISTA_REC.m @@ -104,7 +104,7 @@ else astra_mex_data3d('delete', sino_id); astra_mex_data3d('delete', id); end - clear proj_geomT vol_geomT + %clear proj_geomT vol_geomT else % divergen beam geometry fprintf('%s \n', 'Calculating Lipshitz constant for divergen beam geometry... will take some time!'); @@ -309,7 +309,7 @@ if (subsets == 0) if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'parallel3d')) sino_updt = zeros(size(sino),'single'); for kkk = 1:SlicesZ - [sino_id, sino_updt(:,kkk,:)] = astra_create_sino3d_cuda(X_t(:,:,kkk), proj_geomT, vol_geomT); + [sino_id, sino_updt(:,:,kkk)] = astra_create_sino3d_cuda(X_t(:,:,kkk), proj_geomT, vol_geomT); astra_mex_data3d('delete', sino_id); end else @@ -338,7 +338,7 @@ if (subsets == 0) if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'parallel3d')) x_temp = zeros(size(X),'single'); for kkk = 1:SlicesZ - [id, x_temp(:,:,kkk)] = astra_create_backprojection3d_cuda(squeeze(residual(:,kkk,:)), proj_geomT, vol_geomT); + [id, x_temp(:,:,kkk)] = astra_create_backprojection3d_cuda(squeeze(residual(:,:,kkk)), proj_geomT, vol_geomT); astra_mex_data3d('delete', id); end else -- cgit v1.2.3 From 62ab6cd46c3f1c189328c8d41899db7444c7ac29 Mon Sep 17 00:00:00 2001 From: Daniil Kazantsev Date: Mon, 11 Sep 2017 09:36:13 +0100 Subject: 2 new GPU regularizers, FGP objective fixed, FISTA_REC updated --- main_func/FISTA_REC.m | 138 ++++++++--- main_func/regularizers_CPU/FGP_TV.c | 47 +--- main_func/regularizers_CPU/FGP_TV_core.c | 49 ++++ main_func/regularizers_CPU/FGP_TV_core.h | 2 + .../Diffus_HO/Diff4thHajiaboli_GPU.cpp | 114 +++++++++ .../Diffus_HO/Diff4th_GPU_kernel.cu | 270 +++++++++++++++++++++ .../Diffus_HO/Diff4th_GPU_kernel.h | 6 + main_func/regularizers_GPU/NL_Regul/NLM_GPU.cpp | 171 +++++++++++++ .../regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu | 239 ++++++++++++++++++ .../regularizers_GPU/NL_Regul/NLM_GPU_kernel.h | 6 + 10 files changed, 974 insertions(+), 68 deletions(-) create mode 100644 main_func/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp create mode 100644 main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu create mode 100644 main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h create mode 100644 main_func/regularizers_GPU/NL_Regul/NLM_GPU.cpp create mode 100644 main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu create mode 100644 main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h (limited to 'main_func') diff --git a/main_func/FISTA_REC.m b/main_func/FISTA_REC.m index 00e59ab..edccbe1 100644 --- a/main_func/FISTA_REC.m +++ b/main_func/FISTA_REC.m @@ -1,6 +1,15 @@ function [X, output] = FISTA_REC(params) % <<<< FISTA-based reconstruction routine using ASTRA-toolbox >>>> +% This code solves regularised PWLS problem using FISTA approach. +% The code contains multiple regularisation penalties as well as it can be +% accelerated by using ordered-subset version. Various projection +% geometries supported. + +% DISCLAIMER +% It is recommended to use ASTRA version 1.8 or later in order to avoid +% crashing due to GPU memory overflow for big datasets + % ___Input___: % params.[] file: %----------------General Parameters------------------------ @@ -18,11 +27,17 @@ function [X, output] = FISTA_REC(params) % 2 .Regul_Lambda_SBTV (SplitBregman-TV regularization parameter) % 3 .Regul_LambdaLLT (Higher order LLT regularization parameter) % 3.1 .Regul_tauLLT (time step parameter for LLT (HO) term) -% 4 .Regul_LambdaPatchBased (Patch-based nonlocal regularization parameter) +% 4 .Regul_LambdaPatchBased_CPU (Patch-based nonlocal regularization parameter) % 4.1 .Regul_PB_SearchW (ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window)) % 4.2 .Regul_PB_SimilW (ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window)) % 4.3 .Regul_PB_h (PB penalty function threshold) -% 5 .Regul_LambdaTGV (Total Generalized variation regularization parameter) +% 5 .Regul_LambdaPatchBased_GPU (Patch-based nonlocal regularization parameter) +% 5.1 .Regul_PB_SearchW (ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window)) +% 5.2 .Regul_PB_SimilW (ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window)) +% 5.3 .Regul_PB_h (PB penalty function threshold) +% 6 .Regul_LambdaDiffHO (Higher-Order Diffusion regularization parameter) +% 6.1 .Regul_DiffHO_EdgePar (edge-preserving noise related parameter) +% 7 .Regul_LambdaTGV (Total Generalized variation regularization parameter) % - .Regul_tol (tolerance to terminate regul iterations, default 1.0e-04) % - .Regul_Iterations (iterations for the selected penalty, default 25) % - .Regul_Dimension ('2D' or '3D' way to apply regularization, '3D' is the default) @@ -173,11 +188,16 @@ if (isfield(params,'Regul_tauLLT')) else tauHO = 0.0001; end -if (isfield(params,'Regul_LambdaPatchBased')) - lambdaPB = params.Regul_LambdaPatchBased; +if (isfield(params,'Regul_LambdaPatchBased_CPU')) + lambdaPB = params.Regul_LambdaPatchBased_CPU; else lambdaPB = 0; end +if (isfield(params,'Regul_LambdaPatchBased_GPU')) + lambdaPB_GPU = params.Regul_LambdaPatchBased_GPU; +else + lambdaPB_GPU = 0; +end if (isfield(params,'Regul_PB_SearchW')) SearchW = params.Regul_PB_SearchW; else @@ -193,6 +213,16 @@ if (isfield(params,'Regul_PB_h')) else h_PB = 0.1; % default end +if (isfield(params,'Regul_LambdaDiffHO')) + LambdaDiff_HO = params.Regul_LambdaDiffHO; +else + LambdaDiff_HO = 0; +end +if (isfield(params,'Regul_DiffHO_EdgePar')) + LambdaDiff_HO_EdgePar = params.Regul_DiffHO_EdgePar; +else + LambdaDiff_HO_EdgePar = 0.01; +end if (isfield(params,'Regul_LambdaTGV')) LambdaTGV = params.Regul_LambdaTGV; else @@ -305,16 +335,16 @@ if (subsets == 0) t_old = t; r_old = r; - % if the geometry is parallel use slice-by-slice projection-backprojection routine + % if the geometry is parallel use slice-by-slice projection-backprojection routine if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'parallel3d')) - sino_updt = zeros(size(sino),'single'); - for kkk = 1:SlicesZ - [sino_id, sino_updt(:,:,kkk)] = astra_create_sino3d_cuda(X_t(:,:,kkk), proj_geomT, vol_geomT); - astra_mex_data3d('delete', sino_id); - end - else - % for divergent 3D geometry (for Matlab watch the GPU memory overflow) - [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geom, vol_geom); + sino_updt = zeros(size(sino),'single'); + for kkk = 1:SlicesZ + [sino_id, sino_updt(:,:,kkk)] = astra_create_sino3d_cuda(X_t(:,:,kkk), proj_geomT, vol_geomT); + astra_mex_data3d('delete', sino_id); + end + else + % for divergent 3D geometry (watch the GPU memory overflow in earlier ASTRA versions < 1.8) + [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geom, vol_geom); end if (lambdaR_L1 > 0) @@ -332,17 +362,17 @@ if (subsets == 0) residual = weights.*(sino_updt - sino); end - objective(i) = (0.5*norm(residual(:))^2)/(Detectors*anglesNumb*SlicesZ); % for the objective function output + objective(i) = (0.5*sum(residual(:).^2)); % for the objective function output - % if the geometry is parallel use slice-by-slice projection-backprojection routine + % if the geometry is parallel use slice-by-slice projection-backprojection routine if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'parallel3d')) - x_temp = zeros(size(X),'single'); - for kkk = 1:SlicesZ - [id, x_temp(:,:,kkk)] = astra_create_backprojection3d_cuda(squeeze(residual(:,:,kkk)), proj_geomT, vol_geomT); - astra_mex_data3d('delete', id); - end + x_temp = zeros(size(X),'single'); + for kkk = 1:SlicesZ + [id, x_temp(:,:,kkk)] = astra_create_backprojection3d_cuda(squeeze(residual(:,:,kkk)), proj_geomT, vol_geomT); + astra_mex_data3d('delete', id); + end else - [id, x_temp] = astra_create_backprojection3d_cuda(residual, proj_geom, vol_geom); + [id, x_temp] = astra_create_backprojection3d_cuda(residual, proj_geom, vol_geom); end X = X_t - (1/L_const).*x_temp; astra_mex_data3d('delete', sino_id); @@ -360,7 +390,7 @@ if (subsets == 0) % 3D regularization [X, f_val] = FGP_TV(single(X), lambdaFGP_TV, IterationsRegul, tol, 'iso'); end - objective(i) = objective(i) + f_val; + objective(i) = (objective(i) + f_val)./(Detectors*anglesNumb*SlicesZ); end if (lambdaSB_TV > 0) % Split Bregman regularization @@ -390,7 +420,7 @@ if (subsets == 0) end if (lambdaPB > 0) - % Patch-Based regularization (can be slow on CPU) + % Patch-Based regularization (can be very slow on CPU) if ((strcmp('2D', Dimension) == 1)) % 2D regularization for kkk = 1:SlicesZ @@ -400,13 +430,35 @@ if (subsets == 0) X = PatchBased_Regul(single(X), SearchW, SimilW, h_PB, lambdaPB); end end - if (LambdaTGV > 0) - % Total Generalized variation (currently only 2D) - lamTGV1 = 1.1; % smoothing trade-off parameters, see Pock's paper - lamTGV2 = 0.8; % second-order term + if (lambdaPB_GPU > 0) + % Patch-Based regularization (GPU CUDA implementation) + if ((strcmp('2D', Dimension) == 1)) + % 2D regularization for kkk = 1:SlicesZ - X(:,:,kkk) = TGV_PD(single(X(:,:,kkk)), LambdaTGV, lamTGV1, lamTGV2, IterationsRegul); + X(:,:,kkk) = NLM_GPU(single(X(:,:,kkk)), SearchW, SimilW, h_PB, lambdaPB_GPU); end + else + X = NLM_GPU(single(X), SearchW, SimilW, h_PB, lambdaPB_GPU); + end + end + if (LambdaDiff_HO > 0) + % Higher-order diffusion penalty (GPU CUDA implementation) + if ((strcmp('2D', Dimension) == 1)) + % 2D regularization + for kkk = 1:SlicesZ + X(:,:,kkk) = Diff4thHajiaboli_GPU(single(X(:,:,kkk)), LambdaDiff_HO_EdgePar, LambdaDiff_HO, IterationsRegul); + end + else + X = Diff4thHajiaboli_GPU(X, LambdaDiff_HO_EdgePar, LambdaDiff_HO, IterationsRegul); + end + end + if (LambdaTGV > 0) + % Total Generalized variation (currently only 2D) + lamTGV1 = 1.1; % smoothing trade-off parameters, see Pock's paper + lamTGV2 = 0.8; % second-order term + for kkk = 1:SlicesZ + X(:,:,kkk) = TGV_PD(single(X(:,:,kkk)), LambdaTGV, lamTGV1, lamTGV2, IterationsRegul); + end end if (lambdaR_L1 > 0) @@ -470,7 +522,7 @@ else % the ring removal part (Group-Huber fidelity) % first 2 iterations do additional work reconstructing whole dataset to ensure - % stablility + % the stablility if (i < 3) [sino_id2, sino_updt2] = astra_create_sino3d_cuda(X_t, proj_geom, vol_geom); astra_mex_data3d('delete', sino_id2); @@ -546,7 +598,7 @@ else % 3D regularization X2 = LLT_model(single(X), lambdaHO/subsets, tauHO/subsets, iterHO, 2.0e-05, 0); end - X = 0.5.*(X + X2); % averaged combination of two solutions + X = 0.5.*(X + X2); % the averaged combination of two solutions end if (lambdaPB > 0) % Patch-Based regularization (can be slow on CPU) @@ -559,14 +611,36 @@ else X = PatchBased_Regul(single(X), SearchW, SimilW, h_PB, lambdaPB/subsets); end end + if (lambdaPB_GPU > 0) + % Patch-Based regularization (GPU CUDA implementation) + if ((strcmp('2D', Dimension) == 1)) + % 2D regularization + for kkk = 1:SlicesZ + X(:,:,kkk) = NLM_GPU(single(X(:,:,kkk)), SearchW, SimilW, h_PB, lambdaPB_GPU); + end + else + X = NLM_GPU(single(X), SearchW, SimilW, h_PB, lambdaPB_GPU); + end + end + if (LambdaDiff_HO > 0) + % Higher-order diffusion penalty (GPU CUDA implementation) + if ((strcmp('2D', Dimension) == 1)) + % 2D regularization + for kkk = 1:SlicesZ + X(:,:,kkk) = Diff4thHajiaboli_GPU(single(X(:,:,kkk)), LambdaDiff_HO_EdgePar, LambdaDiff_HO, round(IterationsRegul/subsets)); + end + else + X = Diff4thHajiaboli_GPU(X, LambdaDiff_HO_EdgePar, LambdaDiff_HO, round(IterationsRegul/subsets)); + end + end if (LambdaTGV > 0) % Total Generalized variation (currently only 2D) lamTGV1 = 1.1; % smoothing trade-off parameters, see Pock's paper lamTGV2 = 0.5; % second-order term for kkk = 1:SlicesZ - X(:,:,kkk) = TGV_PD(single(X(:,:,kkk)), LambdaTGV/subsets, lamTGV1, lamTGV2, IterationsRegul); + X(:,:,kkk) = TGV_PD(single(X(:,:,kkk)), LambdaTGV/subsets, lamTGV1, lamTGV2, IterationsRegul); end - end + end if (lambdaR_L1 > 0) r = max(abs(r)-lambdaR_L1, 0).*sign(r); % soft-thresholding operator for ring vector diff --git a/main_func/regularizers_CPU/FGP_TV.c b/main_func/regularizers_CPU/FGP_TV.c index 5d8cfb9..cfe5b9e 100644 --- a/main_func/regularizers_CPU/FGP_TV.c +++ b/main_func/regularizers_CPU/FGP_TV.c @@ -54,7 +54,7 @@ void mexFunction( { int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, methTV; const int *dim_array; - float *A, *D=NULL, *D_old=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_old=NULL, *P2_old=NULL, *P3_old=NULL, *R1=NULL, *R2=NULL, *R3=NULL, lambda, tk, tkp1, re, re1, re_old, epsil, funcval; + float *A, *D=NULL, *D_old=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_old=NULL, *P2_old=NULL, *P3_old=NULL, *R1=NULL, *R2=NULL, *R3=NULL, lambda, tk, tkp1, re, re1, re_old, epsil; number_of_dims = mxGetNumberOfDimensions(prhs[0]); dim_array = mxGetDimensions(prhs[0]); @@ -78,7 +78,6 @@ void mexFunction( mxFree(penalty_type); } /*output function value (last iteration) */ - funcval = 0.0f; plhs[1] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL); float *funcvalA = (float *) mxGetData(plhs[1]); @@ -117,7 +116,7 @@ void mexFunction( /*updating R and t*/ tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f; - Rupd_func2D(P1, P1_old, P2, P2_old, R1, R2, tkp1, tk, dimX, dimY); + Rupd_func2D(P1, P1_old, P2, P2_old, R1, R2, tkp1, tk, dimX, dimY); /* calculate norm */ re = 0.0f; re1 = 0.0f; @@ -129,23 +128,17 @@ void mexFunction( re = sqrt(re)/sqrt(re1); if (re < epsil) count++; if (count > 3) { - Obj_func2D(A, D, P1, P2, lambda, dimX, dimY); - funcval = 0.0f; - for(j=0; j 2) { if (re > re_old) { - Obj_func2D(A, D, P1, P2, lambda, dimX, dimY); - funcval = 0.0f; - for(j=0; j 3) { - Obj_func3D(A, D, P1, P2, P3,lambda, dimX, dimY, dimZ); - funcval = 0.0f; - for(j=0; j 2) { if (re > re_old) { - Obj_func3D(A, D, P1, P2, P3,lambda, dimX, dimY, dimZ); - funcval = 0.0f; - for(j=0; j +#include +#include +#include +#include +#include +#include "Diff4th_GPU_kernel.h" + +/* + * 2D and 3D CUDA implementation of the 4th order PDE denoising model by Hajiaboli + * + * Reference : + * "An anisotropic fourth-order diffusion filter for image noise removal" by M. Hajiaboli + * + * Example + * figure; + * Im = double(imread('lena_gray_256.tif'))/255; % loading image + * u0 = Im + .05*randn(size(Im)); % adding noise + * u = Diff4thHajiaboli_GPU(single(u0), 0.02, 150); + * subplot (1,2,1); imshow(u0,[ ]); title('Noisy Image') + * subplot (1,2,2); imshow(u,[ ]); title('Denoised Image') + * + * + * Linux/Matlab compilation: + * compile in terminal: nvcc -Xcompiler -fPIC -shared -o Diff4th_GPU_kernel.o Diff4th_GPU_kernel.cu + * then compile in Matlab: mex -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart Diff4thHajiaboli_GPU.cpp Diff4th_GPU_kernel.o + */ + +void mexFunction( + int nlhs, mxArray *plhs[], + int nrhs, const mxArray *prhs[]) +{ + int numdims, dimZ, size; + float *A, *B, *A_L, *B_L; + const int *dims; + + numdims = mxGetNumberOfDimensions(prhs[0]); + dims = mxGetDimensions(prhs[0]); + + float sigma = (float)mxGetScalar(prhs[1]); /* edge-preserving parameter */ + float lambda = (float)mxGetScalar(prhs[2]); /* regularization parameter */ + int iter = (int)mxGetScalar(prhs[3]); /* iterations number */ + + if (numdims == 2) { + + int N, M, Z, i, j; + Z = 0; // for the 2D case + float tau = 0.01; // time step is sufficiently small for an explicit methods + + /*Input data*/ + A = (float*)mxGetData(prhs[0]); + N = dims[0] + 2; + M = dims[1] + 2; + A_L = (float*)mxGetData(mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL)); + B_L = (float*)mxGetData(mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL)); + + /*Output data*/ + B = (float*)mxGetData(plhs[0] = mxCreateNumericMatrix(dims[0], dims[1], mxSINGLE_CLASS, mxREAL)); + + // copy A to the bigger A_L with boundaries + #pragma omp parallel for shared(A_L, A) private(i,j) + for (i=0; i < N; i++) { + for (j=0; j < M; j++) { + if (((i > 0) && (i < N-1)) && ((j > 0) && (j < M-1))) A_L[i*M+j] = A[(i-1)*(dims[1])+(j-1)]; + }} + + // Running CUDA code here + Diff4th_GPU_kernel(A_L, B_L, N, M, Z, (float)sigma, iter, (float)tau, lambda); + + // copy the processed B_L to a smaller B + #pragma omp parallel for shared(B_L, B) private(i,j) + for (i=0; i < N; i++) { + for (j=0; j < M; j++) { + if (((i > 0) && (i < N-1)) && ((j > 0) && (j < M-1))) B[(i-1)*(dims[1])+(j-1)] = B_L[i*M+j]; + }} + } + if (numdims == 3) { + // 3D image denoising / regularization + int N, M, Z, i, j, k; + float tau = 0.0007; // Time Step is small for an explicit methods + A = (float*)mxGetData(prhs[0]); + N = dims[0] + 2; + M = dims[1] + 2; + Z = dims[2] + 2; + int N_dims[] = {N, M, Z}; + A_L = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL)); + B_L = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL)); + + /* output data */ + B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dims, mxSINGLE_CLASS, mxREAL)); + + // copy A to the bigger A_L with boundaries + #pragma omp parallel for shared(A_L, A) private(i,j,k) + for (i=0; i < N; i++) { + for (j=0; j < M; j++) { + for (k=0; k < Z; k++) { + if (((i > 0) && (i < N-1)) && ((j > 0) && (j < M-1)) && ((k > 0) && (k < Z-1))) { + A_L[(N*M)*(k)+(i)*M+(j)] = A[(dims[0]*dims[1])*(k-1)+(i-1)*dims[1]+(j-1)]; + }}}} + + // Running CUDA kernel here for diffusivity + Diff4th_GPU_kernel(A_L, B_L, N, M, Z, (float)sigma, iter, (float)tau, lambda); + + // copy the processed B_L to a smaller B + #pragma omp parallel for shared(B_L, B) private(i,j,k) + for (i=0; i < N; i++) { + for (j=0; j < M; j++) { + for (k=0; k < Z; k++) { + if (((i > 0) && (i < N-1)) && ((j > 0) && (j < M-1)) && ((k > 0) && (k < Z-1))) { + B[(dims[0]*dims[1])*(k-1)+(i-1)*dims[1]+(j-1)] = B_L[(N*M)*(k)+(i)*M+(j)]; + }}}} + } +} \ No newline at end of file diff --git a/main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu b/main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu new file mode 100644 index 0000000..178af00 --- /dev/null +++ b/main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu @@ -0,0 +1,270 @@ +#include +#include +#include +#include "Diff4th_GPU_kernel.h" + +#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) + +inline void __checkCudaErrors(cudaError err, const char *file, const int line) +{ + if (cudaSuccess != err) + { + fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", + file, line, (int)err, cudaGetErrorString(err)); + exit(EXIT_FAILURE); + } +} + +#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) +#define sizeT (sizeX*sizeY*sizeZ) +#define epsilon 0.00000001 + +///////////////////////////////////////////////// +// 2D Image denosing - Second Step (The second derrivative) +__global__ void Diff4th2D_derriv(float* B, float* A, float *A0, int N, int M, float sigma, int iter, float tau, float lambda) +{ + float gradXXc = 0, gradYYc = 0; + int i = blockIdx.x*blockDim.x + threadIdx.x; + int j = blockIdx.y*blockDim.y + threadIdx.y; + + int index = j + i*N; + + if (((i < 1) || (i > N-2)) || ((j < 1) || (j > M-2))) { + return; } + + int indexN = (j)+(i-1)*(N); if (A[indexN] == 0) indexN = index; + int indexS = (j)+(i+1)*(N); if (A[indexS] == 0) indexS = index; + int indexW = (j-1)+(i)*(N); if (A[indexW] == 0) indexW = index; + int indexE = (j+1)+(i)*(N); if (A[indexE] == 0) indexE = index; + + gradXXc = B[indexN] + B[indexS] - 2*B[index] ; + gradYYc = B[indexW] + B[indexE] - 2*B[index] ; + A[index] = A[index] - tau*((A[index] - A0[index]) + lambda*(gradXXc + gradYYc)); +} + +// 2D Image denosing - The First Step +__global__ void Diff4th2D(float* A, float* B, int N, int M, float sigma, int iter, float tau) +{ + float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, sq_sum, xy_2, V_norm, V_orth, c, c_sq; + + int i = blockIdx.x*blockDim.x + threadIdx.x; + int j = blockIdx.y*blockDim.y + threadIdx.y; + + int index = j + i*N; + + V_norm = 0.0f; V_orth = 0.0f; + + if (((i < 1) || (i > N-2)) || ((j < 1) || (j > M-2))) { + return; } + + int indexN = (j)+(i-1)*(N); if (A[indexN] == 0) indexN = index; + int indexS = (j)+(i+1)*(N); if (A[indexS] == 0) indexS = index; + int indexW = (j-1)+(i)*(N); if (A[indexW] == 0) indexW = index; + int indexE = (j+1)+(i)*(N); if (A[indexE] == 0) indexE = index; + int indexNW = (j-1)+(i-1)*(N); if (A[indexNW] == 0) indexNW = index; + int indexNE = (j+1)+(i-1)*(N); if (A[indexNE] == 0) indexNE = index; + int indexWS = (j-1)+(i+1)*(N); if (A[indexWS] == 0) indexWS = index; + int indexES = (j+1)+(i+1)*(N); if (A[indexES] == 0) indexES = index; + + gradX = 0.5f*(A[indexN]-A[indexS]); + gradX_sq = gradX*gradX; + gradXX = A[indexN] + A[indexS] - 2*A[index]; + + gradY = 0.5f*(A[indexW]-A[indexE]); + gradY_sq = gradY*gradY; + gradYY = A[indexW] + A[indexE] - 2*A[index]; + + gradXY = 0.25f*(A[indexNW] - A[indexNE] - A[indexWS] + A[indexES]); + xy_2 = 2.0f*gradX*gradY*gradXY; + sq_sum = gradX_sq + gradY_sq; + + if (sq_sum <= epsilon) { + V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/epsilon; + V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/epsilon; } + else { + V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/sq_sum; + V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/sq_sum; } + + c = 1.0f/(1.0f + sq_sum/sigma); + c_sq = c*c; + B[index] = c_sq*V_norm + c*V_orth; +} + +///////////////////////////////////////////////// +// 3D data parocerssing +__global__ void Diff4th3D_derriv(float *B, float *A, float *A0, int N, int M, int Z, float sigma, int iter, float tau, float lambda) +{ + float gradXXc = 0, gradYYc = 0, gradZZc = 0; + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + int zIndex = blockDim.z * blockIdx.z + threadIdx.z; + + int index = xIndex + M*yIndex + N*M*zIndex; + + if (((xIndex < 1) || (xIndex > N-2)) || ((yIndex < 1) || (yIndex > M-2)) || ((zIndex < 1) || (zIndex > Z-2))) { + return; } + + int indexN = (xIndex-1) + M*yIndex + N*M*zIndex; if (A[indexN] == 0) indexN = index; + int indexS = (xIndex+1) + M*yIndex + N*M*zIndex; if (A[indexS] == 0) indexS = index; + int indexW = xIndex + M*(yIndex-1) + N*M*zIndex; if (A[indexW] == 0) indexW = index; + int indexE = xIndex + M*(yIndex+1) + N*M*zIndex; if (A[indexE] == 0) indexE = index; + int indexU = xIndex + M*yIndex + N*M*(zIndex-1); if (A[indexU] == 0) indexU = index; + int indexD = xIndex + M*yIndex + N*M*(zIndex+1); if (A[indexD] == 0) indexD = index; + + gradXXc = B[indexN] + B[indexS] - 2*B[index] ; + gradYYc = B[indexW] + B[indexE] - 2*B[index] ; + gradZZc = B[indexU] + B[indexD] - 2*B[index] ; + + A[index] = A[index] - tau*((A[index] - A0[index]) + lambda*(gradXXc + gradYYc + gradZZc)); +} + +__global__ void Diff4th3D(float* A, float* B, int N, int M, int Z, float sigma, int iter, float tau) +{ + float gradX, gradX_sq, gradY, gradY_sq, gradZ, gradZ_sq, gradXX, gradYY, gradZZ, gradXY, gradXZ, gradYZ, sq_sum, xy_2, xyz_1, xyz_2, V_norm, V_orth, c, c_sq; + + int xIndex = blockDim.x * blockIdx.x + threadIdx.x; + int yIndex = blockDim.y * blockIdx.y + threadIdx.y; + int zIndex = blockDim.z * blockIdx.z + threadIdx.z; + + int index = xIndex + M*yIndex + N*M*zIndex; + V_norm = 0.0f; V_orth = 0.0f; + + if (((xIndex < 1) || (xIndex > N-2)) || ((yIndex < 1) || (yIndex > M-2)) || ((zIndex < 1) || (zIndex > Z-2))) { + return; } + + B[index] = 0; + + int indexN = (xIndex-1) + M*yIndex + N*M*zIndex; if (A[indexN] == 0) indexN = index; + int indexS = (xIndex+1) + M*yIndex + N*M*zIndex; if (A[indexS] == 0) indexS = index; + int indexW = xIndex + M*(yIndex-1) + N*M*zIndex; if (A[indexW] == 0) indexW = index; + int indexE = xIndex + M*(yIndex+1) + N*M*zIndex; if (A[indexE] == 0) indexE = index; + int indexU = xIndex + M*yIndex + N*M*(zIndex-1); if (A[indexU] == 0) indexU = index; + int indexD = xIndex + M*yIndex + N*M*(zIndex+1); if (A[indexD] == 0) indexD = index; + + int indexNW = (xIndex-1) + M*(yIndex-1) + N*M*zIndex; if (A[indexNW] == 0) indexNW = index; + int indexNE = (xIndex-1) + M*(yIndex+1) + N*M*zIndex; if (A[indexNE] == 0) indexNE = index; + int indexWS = (xIndex+1) + M*(yIndex-1) + N*M*zIndex; if (A[indexWS] == 0) indexWS = index; + int indexES = (xIndex+1) + M*(yIndex+1) + N*M*zIndex; if (A[indexES] == 0) indexES = index; + + int indexUW = (xIndex-1) + M*(yIndex) + N*M*(zIndex-1); if (A[indexUW] == 0) indexUW = index; + int indexUE = (xIndex+1) + M*(yIndex) + N*M*(zIndex-1); if (A[indexUE] == 0) indexUE = index; + int indexDW = (xIndex-1) + M*(yIndex) + N*M*(zIndex+1); if (A[indexDW] == 0) indexDW = index; + int indexDE = (xIndex+1) + M*(yIndex) + N*M*(zIndex+1); if (A[indexDE] == 0) indexDE = index; + + int indexUN = (xIndex) + M*(yIndex-1) + N*M*(zIndex-1); if (A[indexUN] == 0) indexUN = index; + int indexUS = (xIndex) + M*(yIndex+1) + N*M*(zIndex-1); if (A[indexUS] == 0) indexUS = index; + int indexDN = (xIndex) + M*(yIndex-1) + N*M*(zIndex+1); if (A[indexDN] == 0) indexDN = index; + int indexDS = (xIndex) + M*(yIndex+1) + N*M*(zIndex+1); if (A[indexDS] == 0) indexDS = index; + + gradX = 0.5f*(A[indexN]-A[indexS]); + gradX_sq = gradX*gradX; + gradXX = A[indexN] + A[indexS] - 2*A[index]; + + gradY = 0.5f*(A[indexW]-A[indexE]); + gradY_sq = gradY*gradY; + gradYY = A[indexW] + A[indexE] - 2*A[index]; + + gradZ = 0.5f*(A[indexU]-A[indexD]); + gradZ_sq = gradZ*gradZ; + gradZZ = A[indexU] + A[indexD] - 2*A[index]; + + gradXY = 0.25f*(A[indexNW] - A[indexNE] - A[indexWS] + A[indexES]); + gradXZ = 0.25f*(A[indexUW] - A[indexUE] - A[indexDW] + A[indexDE]); + gradYZ = 0.25f*(A[indexUN] - A[indexUS] - A[indexDN] + A[indexDS]); + + xy_2 = 2.0f*gradX*gradY*gradXY; + xyz_1 = 2.0f*gradX*gradZ*gradXZ; + xyz_2 = 2.0f*gradY*gradZ*gradYZ; + + sq_sum = gradX_sq + gradY_sq + gradZ_sq; + + if (sq_sum <= epsilon) { + V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/epsilon; + V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/epsilon; } + else { + V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/sq_sum; + V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/sq_sum; } + + c = 1; + if ((1.0f + sq_sum/sigma) != 0.0f) {c = 1.0f/(1.0f + sq_sum/sigma);} + + c_sq = c*c; + B[index] = c_sq*V_norm + c*V_orth; +} + +/******************************************************/ +/********* HOST FUNCTION*************/ +extern "C" void Diff4th_GPU_kernel(float* A, float* B, int N, int M, int Z, float sigma, int iter, float tau, float lambda) +{ + int deviceCount = -1; // number of devices + cudaGetDeviceCount(&deviceCount); + if (deviceCount == 0) { + fprintf(stderr, "No CUDA devices found\n"); + return; + } + + int BLKXSIZE, BLKYSIZE,BLKZSIZE; + float *Ad, *Bd, *Cd; + sigma = sigma*sigma; + + if (Z == 0){ + // 4th order diffusion for 2D case + BLKXSIZE = 8; + BLKYSIZE = 16; + + dim3 dimBlock(BLKXSIZE,BLKYSIZE); + dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE)); + + checkCudaErrors(cudaMalloc((void**)&Ad,N*M*sizeof(float))); + checkCudaErrors(cudaMalloc((void**)&Bd,N*M*sizeof(float))); + checkCudaErrors(cudaMalloc((void**)&Cd,N*M*sizeof(float))); + + checkCudaErrors(cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(Bd,A,N*M*sizeof(float),cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(Cd,A,N*M*sizeof(float),cudaMemcpyHostToDevice)); + + int n = 1; + while (n <= iter) { + Diff4th2D<<>>(Bd, Cd, N, M, sigma, iter, tau); + cudaDeviceSynchronize(); + checkCudaErrors( cudaPeekAtLastError() ); + Diff4th2D_derriv<<>>(Cd, Bd, Ad, N, M, sigma, iter, tau, lambda); + cudaDeviceSynchronize(); + checkCudaErrors( cudaPeekAtLastError() ); + n++; + } + checkCudaErrors(cudaMemcpy(B,Bd,N*M*sizeof(float),cudaMemcpyDeviceToHost)); + cudaFree(Ad); cudaFree(Bd); cudaFree(Cd); + } + + if (Z != 0){ + // 4th order diffusion for 3D case + BLKXSIZE = 8; + BLKYSIZE = 8; + BLKZSIZE = 8; + + dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE); + dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE)); + + checkCudaErrors(cudaMalloc((void**)&Ad,N*M*Z*sizeof(float))); + checkCudaErrors(cudaMalloc((void**)&Bd,N*M*Z*sizeof(float))); + checkCudaErrors(cudaMalloc((void**)&Cd,N*M*Z*sizeof(float))); + + checkCudaErrors(cudaMemcpy(Ad,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(Bd,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(Cd,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice)); + + int n = 1; + while (n <= iter) { + Diff4th3D<<>>(Bd, Cd, N, M, Z, sigma, iter, tau); + cudaDeviceSynchronize(); + checkCudaErrors( cudaPeekAtLastError() ); + Diff4th3D_derriv<<>>(Cd, Bd, Ad, N, M, Z, sigma, iter, tau, lambda); + cudaDeviceSynchronize(); + checkCudaErrors( cudaPeekAtLastError() ); + n++; + } + checkCudaErrors(cudaMemcpy(B,Bd,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost)); + cudaFree(Ad); cudaFree(Bd); cudaFree(Cd); + } +} \ No newline at end of file diff --git a/main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h b/main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h new file mode 100644 index 0000000..cfbb45a --- /dev/null +++ b/main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h @@ -0,0 +1,6 @@ +#ifndef __DIFF_HO_H_ +#define __DIFF_HO_H_ + +extern "C" void Diff4th_GPU_kernel(float* A, float* B, int N, int M, int Z, float sigma, int iter, float tau, float lambda); + +#endif diff --git a/main_func/regularizers_GPU/NL_Regul/NLM_GPU.cpp b/main_func/regularizers_GPU/NL_Regul/NLM_GPU.cpp new file mode 100644 index 0000000..ff0cc90 --- /dev/null +++ b/main_func/regularizers_GPU/NL_Regul/NLM_GPU.cpp @@ -0,0 +1,171 @@ +#include "mex.h" +#include +#include +#include +#include +#include +#include +#include "NLM_GPU_kernel.h" + +/* CUDA implementation of the patch-based (PB) regularization for 2D and 3D images/volumes + * This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function + * + * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems" + * 2. Kazantsev D. at. all "4D-CT reconstruction with unified spatial-temporal patch-based regularization" + * + * Input Parameters (mandatory): + * 1. Image/volume (2D/3D) + * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window) + * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window) + * 4. h - parameter for the PB penalty function + * 5. lambda - regularization parameter + + * Output: + * 1. regularized (denoised) Image/volume (N x N x N) + * + * In matlab check what kind of GPU you have with "gpuDevice" command, + * then set your ComputeCapability, here I use -arch compute_35 + * + * Quick 2D denoising example in Matlab: + Im = double(imread('lena_gray_256.tif'))/255; % loading image + u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise + ImDen = NLM_GPU(single(u0), 3, 2, 0.15, 1); + + * Linux/Matlab compilation: + * compile in terminal: nvcc -Xcompiler -fPIC -shared -o NLM_GPU_kernel.o NLM_GPU_kernel.cu + * then compile in Matlab: mex -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart NLM_GPU.cpp NLM_GPU_kernel.o + * + * D. Kazantsev + * 2014-17 + * Harwell/Manchester UK + */ + +float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop); + +void mexFunction( + int nlhs, mxArray *plhs[], + int nrhs, const mxArray *prhs[]) +{ + int N, M, Z, i_n, j_n, k_n, numdims, SearchW, SimilW, SearchW_real, padXY, newsizeX, newsizeY, newsizeZ, switchpad_crop, count, SearchW_full, SimilW_full; + const int *dims; + float *A, *B=NULL, *Ap=NULL, *Bp=NULL, *Eucl_Vec, h, h2, lambda, val, denh2; + + numdims = mxGetNumberOfDimensions(prhs[0]); + dims = mxGetDimensions(prhs[0]); + + N = dims[0]; + M = dims[1]; + Z = dims[2]; + + if ((numdims < 2) || (numdims > 3)) {mexErrMsgTxt("The input should be 2D image or 3D volume");} + if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); } + + if(nrhs != 5) mexErrMsgTxt("Five inputs reqired: Image(2D,3D), SearchW, SimilW, Threshold, Regularization parameter"); + + /*Handling inputs*/ + A = (float *) mxGetData(prhs[0]); /* the image to regularize/filter */ + SearchW_real = (int) mxGetScalar(prhs[1]); /* the searching window ratio */ + SimilW = (int) mxGetScalar(prhs[2]); /* the similarity window ratio */ + h = (float) mxGetScalar(prhs[3]); /* parameter for the PB filtering function */ + lambda = (float) mxGetScalar(prhs[4]); + + if (h <= 0) mexErrMsgTxt("Parmeter for the PB penalty function should be > 0"); + + SearchW = SearchW_real + 2*SimilW; + + SearchW_full = 2*SearchW + 1; /* the full searching window size */ + SimilW_full = 2*SimilW + 1; /* the full similarity window size */ + h2 = h*h; + + padXY = SearchW + 2*SimilW; /* padding sizes */ + newsizeX = N + 2*(padXY); /* the X size of the padded array */ + newsizeY = M + 2*(padXY); /* the Y size of the padded array */ + newsizeZ = Z + 2*(padXY); /* the Z size of the padded array */ + int N_dims[] = {newsizeX, newsizeY, newsizeZ}; + + /******************************2D case ****************************/ + if (numdims == 2) { + /*Handling output*/ + B = (float*)mxGetData(plhs[0] = mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL)); + /*allocating memory for the padded arrays */ + Ap = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL)); + Bp = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL)); + Eucl_Vec = (float*)mxGetData(mxCreateNumericMatrix(SimilW_full*SimilW_full, 1, mxSINGLE_CLASS, mxREAL)); + + /*Gaussian kernel */ + count = 0; + for(i_n=-SimilW; i_n<=SimilW; i_n++) { + for(j_n=-SimilW; j_n<=SimilW; j_n++) { + val = (float)(i_n*i_n + j_n*j_n)/(2*SimilW*SimilW); + Eucl_Vec[count] = exp(-val); + count = count + 1; + }} /*main neighb loop */ + + /**************************************************************************/ + /*Perform padding of image A to the size of [newsizeX * newsizeY] */ + switchpad_crop = 0; /*padding*/ + pad_crop(A, Ap, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop); + + /* Do PB regularization with the padded array */ + NLM_GPU_kernel(Ap, Bp, Eucl_Vec, newsizeY, newsizeX, 0, numdims, SearchW, SimilW, SearchW_real, (float)h2, (float)lambda); + + switchpad_crop = 1; /*cropping*/ + pad_crop(Bp, B, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop); + } + else + { + /******************************3D case ****************************/ + /*Handling output*/ + B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dims, mxSINGLE_CLASS, mxREAL)); + /*allocating memory for the padded arrays */ + Ap = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL)); + Bp = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL)); + Eucl_Vec = (float*)mxGetData(mxCreateNumericMatrix(SimilW_full*SimilW_full*SimilW_full, 1, mxSINGLE_CLASS, mxREAL)); + + /*Gaussian kernel */ + count = 0; + for(i_n=-SimilW; i_n<=SimilW; i_n++) { + for(j_n=-SimilW; j_n<=SimilW; j_n++) { + for(k_n=-SimilW; k_n<=SimilW; k_n++) { + val = (float)(i_n*i_n + j_n*j_n + k_n*k_n)/(2*SimilW*SimilW*SimilW); + Eucl_Vec[count] = exp(-val); + count = count + 1; + }}} /*main neighb loop */ + /**************************************************************************/ + /*Perform padding of image A to the size of [newsizeX * newsizeY * newsizeZ] */ + switchpad_crop = 0; /*padding*/ + pad_crop(A, Ap, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop); + + /* Do PB regularization with the padded array */ + NLM_GPU_kernel(Ap, Bp, Eucl_Vec, newsizeY, newsizeX, newsizeZ, numdims, SearchW, SimilW, SearchW_real, (float)h2, (float)lambda); + + switchpad_crop = 1; /*cropping*/ + pad_crop(Bp, B, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop); + } /*end else ndims*/ +} + +float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop) +{ + /* padding-cropping function */ + int i,j,k; + if (NewSizeZ > 1) { + for (i=0; i < NewSizeX; i++) { + for (j=0; j < NewSizeY; j++) { + for (k=0; k < NewSizeZ; k++) { + if (((i >= padXY) && (i < NewSizeX-padXY)) && ((j >= padXY) && (j < NewSizeY-padXY)) && ((k >= padXY) && (k < NewSizeZ-padXY))) { + if (switchpad_crop == 0) Ap[NewSizeX*NewSizeY*k + i*NewSizeY+j] = A[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)]; + else Ap[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)] = A[NewSizeX*NewSizeY*k + i*NewSizeY+j]; + } + }}} + } + else { + for (i=0; i < NewSizeX; i++) { + for (j=0; j < NewSizeY; j++) { + if (((i >= padXY) && (i < NewSizeX-padXY)) && ((j >= padXY) && (j < NewSizeY-padXY))) { + if (switchpad_crop == 0) Ap[i*NewSizeY+j] = A[(i-padXY)*(OldSizeY)+(j-padXY)]; + else Ap[(i-padXY)*(OldSizeY)+(j-padXY)] = A[i*NewSizeY+j]; + } + }} + } + return *Ap; +} \ No newline at end of file diff --git a/main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu b/main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu new file mode 100644 index 0000000..17da3a8 --- /dev/null +++ b/main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu @@ -0,0 +1,239 @@ +#include +#include +#include +#include "NLM_GPU_kernel.h" + +#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) + +inline void __checkCudaErrors(cudaError err, const char *file, const int line) +{ + if (cudaSuccess != err) + { + fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", + file, line, (int)err, cudaGetErrorString(err)); + exit(EXIT_FAILURE); + } +} + +extern __shared__ float sharedmem[]; + +// run PB den kernel here +__global__ void NLM_kernel(float *Ad, float* Bd, float *Eucl_Vec_d, int N, int M, int Z, int SearchW, int SimilW, int SearchW_real, int SearchW_full, int SimilW_full, int padXY, float h2, float lambda, dim3 imagedim, dim3 griddim, dim3 kerneldim, dim3 sharedmemdim, int nUpdatePerThread, float neighborsize) +{ + + int i1, j1, k1, i2, j2, k2, i3, j3, k3, i_l, j_l, k_l, count; + float value, Weight_norm, normsum, Weight; + + int bidx = blockIdx.x; + int bidy = blockIdx.y%griddim.y; + int bidz = (int)((blockIdx.y)/griddim.y); + + // global index for block endpoint + int beidx = __mul24(bidx,blockDim.x); + int beidy = __mul24(bidy,blockDim.y); + int beidz = __mul24(bidz,blockDim.z); + + int tid = __mul24(threadIdx.z,__mul24(blockDim.x,blockDim.y)) + + __mul24(threadIdx.y,blockDim.x) + threadIdx.x; + + #ifdef __DEVICE_EMULATION__ + printf("tid : %d", tid); + #endif + + // update shared memory + int nthreads = blockDim.x*blockDim.y*blockDim.z; + int sharedMemSize = sharedmemdim.x * sharedmemdim.y * sharedmemdim.z; + for(int i=0; i= padXY && idx < (imagedim.x - padXY) && + idy >= padXY && idy < (imagedim.y - padXY)) + { + int i_centr = threadIdx.x + (SearchW); /*indices of the centrilized (main) pixel */ + int j_centr = threadIdx.y + (SearchW); /*indices of the centrilized (main) pixel */ + + if ((i_centr > 0) && (i_centr < N) && (j_centr > 0) && (j_centr < M)) { + + Weight_norm = 0; value = 0.0; + /* Massive Search window loop */ + for(i1 = i_centr - SearchW_real ; i1 <= i_centr + SearchW_real; i1++) { + for(j1 = j_centr - SearchW_real ; j1<= j_centr + SearchW_real ; j1++) { + /* if inside the searching window */ + count = 0; normsum = 0.0; + for(i_l=-SimilW; i_l<=SimilW; i_l++) { + for(j_l=-SimilW; j_l<=SimilW; j_l++) { + i2 = i1+i_l; j2 = j1+j_l; + i3 = i_centr+i_l; j3 = j_centr+j_l; /*coordinates of the inner patch loop */ + if ((i2 > 0) && (i2 < N) && (j2 > 0) && (j2 < M)) { + if ((i3 > 0) && (i3 < N) && (j3 > 0) && (j3 < M)) { + normsum += Eucl_Vec_d[count]*pow((sharedmem[(j3)*sharedmemdim.x+(i3)] - sharedmem[j2*sharedmemdim.x+i2]), 2); + }} + count++; + }} + if (normsum != 0) Weight = (expf(-normsum/h2)); + else Weight = 0.0; + Weight_norm += Weight; + value += sharedmem[j1*sharedmemdim.x+i1]*Weight; + }} + + if (Weight_norm != 0) Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = value/Weight_norm; + else Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = Ad[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx]; + } + } /*boundary conditions end*/ + } + else { + /*3D case*/ + /*checking boundaries to be within the image and avoid padded spaces */ + if( idx >= padXY && idx < (imagedim.x - padXY) && + idy >= padXY && idy < (imagedim.y - padXY) && + idz >= padXY && idz < (imagedim.z - padXY) ) + { + int i_centr = threadIdx.x + SearchW; /*indices of the centrilized (main) pixel */ + int j_centr = threadIdx.y + SearchW; /*indices of the centrilized (main) pixel */ + int k_centr = threadIdx.z + SearchW; /*indices of the centrilized (main) pixel */ + + if ((i_centr > 0) && (i_centr < N) && (j_centr > 0) && (j_centr < M) && (k_centr > 0) && (k_centr < Z)) { + + Weight_norm = 0; value = 0.0; + /* Massive Search window loop */ + for(i1 = i_centr - SearchW_real ; i1 <= i_centr + SearchW_real; i1++) { + for(j1 = j_centr - SearchW_real ; j1<= j_centr + SearchW_real ; j1++) { + for(k1 = k_centr - SearchW_real ; k1<= k_centr + SearchW_real ; k1++) { + /* if inside the searching window */ + count = 0; normsum = 0.0; + for(i_l=-SimilW; i_l<=SimilW; i_l++) { + for(j_l=-SimilW; j_l<=SimilW; j_l++) { + for(k_l=-SimilW; k_l<=SimilW; k_l++) { + i2 = i1+i_l; j2 = j1+j_l; k2 = k1+k_l; + i3 = i_centr+i_l; j3 = j_centr+j_l; k3 = k_centr+k_l; /*coordinates of the inner patch loop */ + if ((i2 > 0) && (i2 < N) && (j2 > 0) && (j2 < M) && (k2 > 0) && (k2 < Z)) { + if ((i3 > 0) && (i3 < N) && (j3 > 0) && (j3 < M) && (k3 > 0) && (k3 < Z)) { + normsum += Eucl_Vec_d[count]*pow((sharedmem[(k3)*sharedmemdim.x*sharedmemdim.y + (j3)*sharedmemdim.x+(i3)] - sharedmem[(k2)*sharedmemdim.x*sharedmemdim.y + j2*sharedmemdim.x+i2]), 2); + }} + count++; + }}} + if (normsum != 0) Weight = (expf(-normsum/h2)); + else Weight = 0.0; + Weight_norm += Weight; + value += sharedmem[k1*sharedmemdim.x*sharedmemdim.y + j1*sharedmemdim.x+i1]*Weight; + }}} /* BIG search window loop end*/ + + + if (Weight_norm != 0) Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = value/Weight_norm; + else Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = Ad[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx]; + } + } /* boundary conditions end */ + } +} + +///////////////////////////////////////////////// +// HOST FUNCTION +extern "C" void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int N, int M, int Z, int dimension, int SearchW, int SimilW, int SearchW_real, float h2, float lambda) +{ + int deviceCount = -1; // number of devices + cudaGetDeviceCount(&deviceCount); + if (deviceCount == 0) { + fprintf(stderr, "No CUDA devices found\n"); + return; + } + +// cudaDeviceReset(); + + int padXY, SearchW_full, SimilW_full, blockWidth, blockHeight, blockDepth, nBlockX, nBlockY, nBlockZ, kernel_depth; + float *Ad, *Bd, *Eucl_Vec_d; + + if (dimension == 2) { + blockWidth = 16; + blockHeight = 16; + blockDepth = 1; + Z = 1; + kernel_depth = 0; + } + else { + blockWidth = 8; + blockHeight = 8; + blockDepth = 8; + kernel_depth = SearchW; + } + + // compute how many blocks are needed + nBlockX = ceil((float)N / (float)blockWidth); + nBlockY = ceil((float)M / (float)blockHeight); + nBlockZ = ceil((float)Z / (float)blockDepth); + + dim3 dimGrid(nBlockX,nBlockY*nBlockZ); + dim3 dimBlock(blockWidth, blockHeight, blockDepth); + dim3 imagedim(N,M,Z); + dim3 griddim(nBlockX,nBlockY,nBlockZ); + + dim3 kerneldim(SearchW,SearchW,kernel_depth); + dim3 sharedmemdim((SearchW*2)+blockWidth,(SearchW*2)+blockHeight,(kernel_depth*2)+blockDepth); + int sharedmemsize = sizeof(float)*sharedmemdim.x*sharedmemdim.y*sharedmemdim.z; + int updateperthread = ceil((float)(sharedmemdim.x*sharedmemdim.y*sharedmemdim.z)/(float)(blockWidth*blockHeight*blockDepth)); + float neighborsize = (2*SearchW+1)*(2*SearchW+1)*(2*kernel_depth+1); + + padXY = SearchW + 2*SimilW; /* padding sizes */ + + SearchW_full = 2*SearchW + 1; /* the full searching window size */ + SimilW_full = 2*SimilW + 1; /* the full similarity window size */ + + /*allocate space for images on device*/ + checkCudaErrors( cudaMalloc((void**)&Ad,N*M*Z*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&Bd,N*M*Z*sizeof(float)) ); + /*allocate space for vectors on device*/ + if (dimension == 2) { + checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d,SimilW_full*SimilW_full*sizeof(float)) ); + checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*SimilW_full*sizeof(float),cudaMemcpyHostToDevice) ); + } + else { + checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d,SimilW_full*SimilW_full*SimilW_full*sizeof(float)) ); + checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*SimilW_full*SimilW_full*sizeof(float),cudaMemcpyHostToDevice) ); + } + + /* copy data from the host to device */ + checkCudaErrors( cudaMemcpy(Ad,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice) ); + + // Run CUDA kernel here + NLM_kernel<<>>(Ad, Bd, Eucl_Vec_d, M, N, Z, SearchW, SimilW, SearchW_real, SearchW_full, SimilW_full, padXY, h2, lambda, imagedim, griddim, kerneldim, sharedmemdim, updateperthread, neighborsize); + + checkCudaErrors( cudaPeekAtLastError() ); +// gpuErrchk( cudaDeviceSynchronize() ); + + checkCudaErrors( cudaMemcpy(B,Bd,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost) ); + cudaFree(Ad); cudaFree(Bd); cudaFree(Eucl_Vec_d); +} diff --git a/main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h b/main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h new file mode 100644 index 0000000..bc9d4a3 --- /dev/null +++ b/main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h @@ -0,0 +1,6 @@ +#ifndef __NLMREG_KERNELS_H_ +#define __NLMREG_KERNELS_H_ + +extern "C" void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int N, int M, int Z, int dimension, int SearchW, int SimilW, int SearchW_real, float denh2, float lambda); + +#endif -- cgit v1.2.3 From 6fb8f5d188ed31d7a7077cba8ab7aea17b25b8bf Mon Sep 17 00:00:00 2001 From: algol Date: Mon, 11 Sep 2017 12:23:44 +0100 Subject: StudentT fidelity added, can be skipped during pythonising --- main_func/FISTA_REC.m | 56 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 9 deletions(-) (limited to 'main_func') diff --git a/main_func/FISTA_REC.m b/main_func/FISTA_REC.m index edccbe1..6987dca 100644 --- a/main_func/FISTA_REC.m +++ b/main_func/FISTA_REC.m @@ -19,7 +19,8 @@ function [X, output] = FISTA_REC(params) % - .iterFISTA (iterations for the main loop, default 40) % - .L_const (Lipschitz constant, default Power method) ) % - .X_ideal (ideal image, if given) -% - .weights (statisitcal weights, size of the sinogram) +% - .weights (statisitcal weights for the PWLS model, size of the sinogram) +% - .fidelity (use 'studentt' fidelity) % - .ROI (Region-of-interest, only if X_ideal is given) % - .initialize (a 'warm start' using SIRT method from ASTRA) %----------------Regularization choices------------------------ @@ -92,6 +93,15 @@ if (isfield(params,'weights')) else weights = ones(size(sino)); end +if (isfield(params,'fidelity')) + studentt = 0; + if (strcmp(params.fidelity,'studentt') == 1) + studentt = 1; + lambdaR_L1 = 0; + end +else + studentt = 0; +end if (isfield(params,'L_const')) L_const = params.L_const; else @@ -357,12 +367,25 @@ if (subsets == 0) vec = squeeze(vec(:,1,:)); end r = r_x - (1./L_const).*vec; - else - % no ring removal + objective(i) = (0.5*sum(residual(:).^2)); % for the objective function output + else + if (studentt == 1) + % artifacts removal with Students t penalty + residual = weights.*(sino_updt - sino); + for kkk = 1:SlicesZ + res_vec = reshape(residual(:,:,kkk), Detectors*anglesNumb, 1); % 1D vectorized sinogram + %s = 100; + %gr = (2)*res_vec./(s*2 + conj(res_vec).*res_vec); + [ff, gr] = studentst(res_vec, 1); + residual(:,:,kkk) = reshape(gr, Detectors, anglesNumb); + end + objective(i) = ff; % for the objective function output + else + % no ring removal (LS model) residual = weights.*(sino_updt - sino); - end - - objective(i) = (0.5*sum(residual(:).^2)); % for the objective function output + objective(i) = (0.5*sum(residual(:).^2)); % for the objective function output + end + end % if the geometry is parallel use slice-by-slice projection-backprojection routine if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'parallel3d')) @@ -549,9 +572,24 @@ else end r = r_x - (1./L_const).*vec; else - [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geomSUB, vol_geom); - % no ring removal - residual = squeeze(weights(:,CurrSubIndeces,:)).*(sino_updt - squeeze(sino(:,CurrSubIndeces,:))); + [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geomSUB, vol_geom); + + if (studentt == 1) + % artifacts removal with Students t penalty + residual = squeeze(weights(:,CurrSubIndeces,:)).*(sino_updt - squeeze(sino(:,CurrSubIndeces,:))); + + for kkk = 1:SlicesZ + res_vec = reshape(residual(:,:,kkk), Detectors*numProjSub, 1); % 1D vectorized sinogram + %s = 100; + %gr = (2)*res_vec./(s*2 + conj(res_vec).*res_vec); + [ff, gr] = studentst(res_vec, 1); + residual(:,:,kkk) = reshape(gr, Detectors, numProjSub); + end + objective(i) = ff; % for the objective function output + else + % no ring removal (LS model) + residual = squeeze(weights(:,CurrSubIndeces,:)).*(sino_updt - squeeze(sino(:,CurrSubIndeces,:))); + end end [id, x_temp] = astra_create_backprojection3d_cuda(residual, proj_geomSUB, vol_geom); -- cgit v1.2.3 From 33da5e128e51e5adc1f6085b4772b55b2cd76e2e Mon Sep 17 00:00:00 2001 From: Edoardo Pasca Date: Wed, 2 Aug 2017 16:51:14 +0100 Subject: added utils --- main_func/regularizers_CPU/FGP_TV_core.h | 30 ++++++++++++++++++++++- main_func/regularizers_CPU/LLT_model_core.h | 3 ++- main_func/regularizers_CPU/SplitBregman_TV_core.h | 28 ++++++++++++++++++++- main_func/regularizers_CPU/TGV_PD_core.h | 3 ++- 4 files changed, 60 insertions(+), 4 deletions(-) (limited to 'main_func') diff --git a/main_func/regularizers_CPU/FGP_TV_core.h b/main_func/regularizers_CPU/FGP_TV_core.h index 697fd84..d256c9a 100644 --- a/main_func/regularizers_CPU/FGP_TV_core.h +++ b/main_func/regularizers_CPU/FGP_TV_core.h @@ -23,8 +23,36 @@ limitations under the License. #include #include #include "omp.h" +#include "utils.h" -float copyIm(float *A, float *B, int dimX, int dimY, int dimZ); +/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case) +* +* Input Parameters: +* 1. Noisy image/volume [REQUIRED] +* 2. lambda - regularization parameter [REQUIRED] +* 3. Number of iterations [OPTIONAL parameter] +* 4. eplsilon: tolerance constant [OPTIONAL parameter] +* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter] +* +* Output: +* [1] Filtered/regularized image +* [2] last function value +* +* Example of image denoising: +* figure; +* Im = double(imread('lena_gray_256.tif'))/255; % loading image +* u0 = Im + .05*randn(size(Im)); % adding noise +* u = FGP_TV(single(u0), 0.05, 100, 1e-04); +* +* to compile with OMP support: mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" +* This function is based on the Matlab's code and paper by +* [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems" +* +* D. Kazantsev, 2016-17 +* +*/ + +//float copyIm(float *A, float *B, int dimX, int dimY, int dimZ); float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY); float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY); float Proj_func2D(float *P1, float *P2, int methTV, int dimX, int dimY); diff --git a/main_func/regularizers_CPU/LLT_model_core.h b/main_func/regularizers_CPU/LLT_model_core.h index 10f52fe..560bb9c 100644 --- a/main_func/regularizers_CPU/LLT_model_core.h +++ b/main_func/regularizers_CPU/LLT_model_core.h @@ -23,6 +23,7 @@ limitations under the License. #include #include #include "omp.h" +#include "utils.h" #define EPS 0.01 @@ -36,4 +37,4 @@ float div_upd3D(float *U0, float *U, float *D1, float *D2, float *D3, unsigned s float calcMap(float *U, unsigned short *Map, int dimX, int dimY, int dimZ); float cleanMap(unsigned short *Map, int dimX, int dimY, int dimZ); -float copyIm(float *A, float *U, int dimX, int dimY, int dimZ); +//float copyIm(float *A, float *U, int dimX, int dimY, int dimZ); diff --git a/main_func/regularizers_CPU/SplitBregman_TV_core.h b/main_func/regularizers_CPU/SplitBregman_TV_core.h index a7aaabb..78aef09 100644 --- a/main_func/regularizers_CPU/SplitBregman_TV_core.h +++ b/main_func/regularizers_CPU/SplitBregman_TV_core.h @@ -23,7 +23,33 @@ limitations under the License. #include #include "omp.h" -float copyIm(float *A, float *B, int dimX, int dimY, int dimZ); +#include "utils.h" + +/* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D) +* +* Input Parameters: +* 1. Noisy image/volume +* 2. lambda - regularization parameter +* 3. Number of iterations [OPTIONAL parameter] +* 4. eplsilon - tolerance constant [OPTIONAL parameter] +* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter] +* +* Output: +* Filtered/regularized image +* +* Example: +* figure; +* Im = double(imread('lena_gray_256.tif'))/255; % loading image +* u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0; +* u = SplitBregman_TV(single(u0), 10, 30, 1e-04); +* +* to compile with OMP support: mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" +* References: +* The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher. +* D. Kazantsev, 2016* +*/ + +//float copyIm(float *A, float *B, int dimX, int dimY, int dimZ); float gauss_seidel2D(float *U, float *A, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu); float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda); float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda); diff --git a/main_func/regularizers_CPU/TGV_PD_core.h b/main_func/regularizers_CPU/TGV_PD_core.h index 04ba95c..cbe7ea4 100644 --- a/main_func/regularizers_CPU/TGV_PD_core.h +++ b/main_func/regularizers_CPU/TGV_PD_core.h @@ -23,6 +23,7 @@ limitations under the License. #include #include #include "omp.h" +#include "utils.h" /* 2D functions */ float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, int dimZ, float sigma); @@ -32,4 +33,4 @@ float ProjQ_2D(float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, fl float DivProjP_2D(float *U, float *A, float *P1, float *P2, int dimX, int dimY, int dimZ, float lambda, float tau); float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float tau); float newU(float *U, float *U_old, int dimX, int dimY, int dimZ); -float copyIm(float *A, float *U, int dimX, int dimY, int dimZ); +//float copyIm(float *A, float *U, int dimX, int dimY, int dimZ); -- cgit v1.2.3 From 068380a2a2c84b46e486f59c397329f53a2eae1f Mon Sep 17 00:00:00 2001 From: Edoardo Pasca Date: Wed, 2 Aug 2017 16:51:37 +0100 Subject: Added utils.c utils.h a few regularizers defined the same copyIm function. I moved it into this new common utils. --- main_func/regularizers_CPU/utils.c | 29 +++++++++++++++++++++++++++++ main_func/regularizers_CPU/utils.h | 27 +++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 main_func/regularizers_CPU/utils.c create mode 100644 main_func/regularizers_CPU/utils.h (limited to 'main_func') diff --git a/main_func/regularizers_CPU/utils.c b/main_func/regularizers_CPU/utils.c new file mode 100644 index 0000000..0e83d2c --- /dev/null +++ b/main_func/regularizers_CPU/utils.c @@ -0,0 +1,29 @@ +/* +This work is part of the Core Imaging Library developed by +Visual Analytics and Imaging System Group of the Science Technology +Facilities Council, STFC + +Copyright 2017 Daniil Kazanteev +Copyright 2017 Srikanth Nagella, Edoardo Pasca + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "utils.h" + +/* Copy Image */ +float copyIm(float *A, float *U, int dimX, int dimY, int dimZ) +{ + int j; +#pragma omp parallel for shared(A, U) private(j) + for (j = 0; j +#include +#include +#include +#include +#include "omp.h" + +float copyIm(float *A, float *U, int dimX, int dimY, int dimZ); -- cgit v1.2.3 From 846e09d5c27736a4af80af7f9a3adcda3c6d4bf0 Mon Sep 17 00:00:00 2001 From: Edoardo Pasca Date: Fri, 4 Aug 2017 16:12:27 +0100 Subject: fix typo and add include "matrix.h" --- main_func/regularizers_CPU/FGP_TV.c | 3 ++- main_func/regularizers_CPU/LLT_model.c | 10 ++++++++-- main_func/regularizers_CPU/SplitBregman_TV.c | 3 ++- main_func/regularizers_CPU/SplitBregman_TV_core.c | 12 +----------- 4 files changed, 13 insertions(+), 15 deletions(-) (limited to 'main_func') diff --git a/main_func/regularizers_CPU/FGP_TV.c b/main_func/regularizers_CPU/FGP_TV.c index cfe5b9e..66442c9 100644 --- a/main_func/regularizers_CPU/FGP_TV.c +++ b/main_func/regularizers_CPU/FGP_TV.c @@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by Visual Analytics and Imaging System Group of the Science Technology Facilities Council, STFC -Copyright 2017 Daniil Kazanteev +Copyright 2017 Daniil Kazantsev Copyright 2017 Srikanth Nagella, Edoardo Pasca Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,6 +16,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "matrix.h" #include "mex.h" #include "FGP_TV_core.h" diff --git a/main_func/regularizers_CPU/LLT_model.c b/main_func/regularizers_CPU/LLT_model.c index 47146a5..0050654 100644 --- a/main_func/regularizers_CPU/LLT_model.c +++ b/main_func/regularizers_CPU/LLT_model.c @@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by Visual Analytics and Imaging System Group of the Science Technology Facilities Council, STFC -Copyright 2017 Daniil Kazanteev +Copyright 2017 Daniil Kazantsev Copyright 2017 Srikanth Nagella, Edoardo Pasca Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,12 +18,19 @@ limitations under the License. */ #include "mex.h" +#include "matrix.h" #include "LLT_model_core.h" /* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty * * Input Parameters: * 1. U0 - origanal noise image/volume +======= +/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty +* +* Input Parameters: +* 1. U0 - original noise image/volume +>>>>>>> fix typo and add include "matrix.h" * 2. lambda - regularization parameter * 3. tau - time-step for explicit scheme * 4. iter - iterations number @@ -46,7 +53,6 @@ limitations under the License. * 28.11.16/Harwell */ - void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) diff --git a/main_func/regularizers_CPU/SplitBregman_TV.c b/main_func/regularizers_CPU/SplitBregman_TV.c index 0dc638d..38f6a9d 100644 --- a/main_func/regularizers_CPU/SplitBregman_TV.c +++ b/main_func/regularizers_CPU/SplitBregman_TV.c @@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by Visual Analytics and Imaging System Group of the Science Technology Facilities Council, STFC -Copyright 2017 Daniil Kazanteev +Copyright 2017 Daniil Kazantsev Copyright 2017 Srikanth Nagella, Edoardo Pasca Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,6 +18,7 @@ limitations under the License. */ #include "mex.h" +#include #include "SplitBregman_TV_core.h" /* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D) diff --git a/main_func/regularizers_CPU/SplitBregman_TV_core.c b/main_func/regularizers_CPU/SplitBregman_TV_core.c index 283dd43..4109a4b 100644 --- a/main_func/regularizers_CPU/SplitBregman_TV_core.c +++ b/main_func/regularizers_CPU/SplitBregman_TV_core.c @@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by Visual Analytics and Imaging System Group of the Science Technology Facilities Council, STFC -Copyright 2017 Daniil Kazanteev +Copyright 2017 Daniil Kazantsev Copyright 2017 Srikanth Nagella, Edoardo Pasca Licensed under the Apache License, Version 2.0 (the "License"); @@ -257,13 +257,3 @@ float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *B }}} return 1; } -/* General Functions */ -/*****************************************************************/ -/* Copy Image */ -float copyIm(float *A, float *B, int dimX, int dimY, int dimZ) -{ - int j; -#pragma omp parallel for shared(A, B) private(j) - for(j=0; j Date: Fri, 4 Aug 2017 16:13:19 +0100 Subject: removed definition of CopyImage --- main_func/regularizers_CPU/FGP_TV_core.c | 13 ++----------- main_func/regularizers_CPU/LLT_model_core.c | 11 ++--------- 2 files changed, 4 insertions(+), 20 deletions(-) (limited to 'main_func') diff --git a/main_func/regularizers_CPU/FGP_TV_core.c b/main_func/regularizers_CPU/FGP_TV_core.c index 9cde327..03cd445 100644 --- a/main_func/regularizers_CPU/FGP_TV_core.c +++ b/main_func/regularizers_CPU/FGP_TV_core.c @@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by Visual Analytics and Imaging System Group of the Science Technology Facilities Council, STFC -Copyright 2017 Daniil Kazanteev +Copyright 2017 Daniil Kazantsev Copyright 2017 Srikanth Nagella, Edoardo Pasca Licensed under the Apache License, Version 2.0 (the "License"); @@ -263,13 +263,4 @@ float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, return 1; } -/* General Functions */ -/*****************************************************************/ -/* Copy Image */ -float copyIm(float *A, float *B, int dimX, int dimY, int dimZ) -{ - int j; -#pragma omp parallel for shared(A, B) private(j) - for (j = 0; j Date: Fri, 4 Aug 2017 16:14:24 +0100 Subject: extern C function called by C++ adds extern "C" to the definition of the functions if the code is called from C++ rather than from C/Matlab --- main_func/regularizers_CPU/FGP_TV_core.h | 11 ++++-- main_func/regularizers_CPU/LLT_model_core.h | 10 ++++-- main_func/regularizers_CPU/PatchBased_Regul_core.h | 40 +++++++++++++++++++++- main_func/regularizers_CPU/SplitBregman_TV_core.h | 12 +++++-- main_func/regularizers_CPU/TGV_PD_core.h | 35 +++++++++++++++++-- 5 files changed, 98 insertions(+), 10 deletions(-) (limited to 'main_func') diff --git a/main_func/regularizers_CPU/FGP_TV_core.h b/main_func/regularizers_CPU/FGP_TV_core.h index d256c9a..7e7229a 100644 --- a/main_func/regularizers_CPU/FGP_TV_core.h +++ b/main_func/regularizers_CPU/FGP_TV_core.h @@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by Visual Analytics and Imaging System Group of the Science Technology Facilities Council, STFC -Copyright 2017 Daniil Kazanteev +Copyright 2017 Daniil Kazantsev Copyright 2017 Srikanth Nagella, Edoardo Pasca Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,7 +17,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include +//#include #include #include #include @@ -51,7 +51,9 @@ limitations under the License. * D. Kazantsev, 2016-17 * */ - +#ifdef __cplusplus +extern "C" { +#endif //float copyIm(float *A, float *B, int dimX, int dimY, int dimZ); float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY); float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY); @@ -64,3 +66,6 @@ float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R float Proj_func3D(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ); float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int dimX, int dimY, int dimZ); float Obj_func_CALC3D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY, int dimZ); +#ifdef __cplusplus +} +#endif diff --git a/main_func/regularizers_CPU/LLT_model_core.h b/main_func/regularizers_CPU/LLT_model_core.h index 560bb9c..13fce5a 100644 --- a/main_func/regularizers_CPU/LLT_model_core.h +++ b/main_func/regularizers_CPU/LLT_model_core.h @@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by Visual Analytics and Imaging System Group of the Science Technology Facilities Council, STFC -Copyright 2017 Daniil Kazanteev +Copyright 2017 Daniil Kazantsev Copyright 2017 Srikanth Nagella, Edoardo Pasca Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,7 +17,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include +//#include #include #include #include @@ -28,6 +28,9 @@ limitations under the License. #define EPS 0.01 /* 2D functions */ +#ifdef __cplusplus +extern "C" { +#endif float der2D(float *U, float *D1, float *D2, int dimX, int dimY, int dimZ); float div_upd2D(float *U0, float *U, float *D1, float *D2, int dimX, int dimY, int dimZ, float lambda, float tau); @@ -38,3 +41,6 @@ float calcMap(float *U, unsigned short *Map, int dimX, int dimY, int dimZ); float cleanMap(unsigned short *Map, int dimX, int dimY, int dimZ); //float copyIm(float *A, float *U, int dimX, int dimY, int dimZ); +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/main_func/regularizers_CPU/PatchBased_Regul_core.h b/main_func/regularizers_CPU/PatchBased_Regul_core.h index 5aa6415..d4a8a46 100644 --- a/main_func/regularizers_CPU/PatchBased_Regul_core.h +++ b/main_func/regularizers_CPU/PatchBased_Regul_core.h @@ -19,13 +19,51 @@ limitations under the License. #define _USE_MATH_DEFINES -#include +//#include #include #include #include #include #include "omp.h" +/* C-OMP implementation of patch-based (PB) regularization (2D and 3D cases). +* This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function +* +* References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems" +* 2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization" +* +* Input Parameters (mandatory): +* 1. Image (2D or 3D) +* 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window) +* 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window) +* 4. h - parameter for the PB penalty function +* 5. lambda - regularization parameter + +* Output: +* 1. regularized (denoised) Image (N x N)/volume (N x N x N) +* +* Quick 2D denoising example in Matlab: +Im = double(imread('lena_gray_256.tif'))/255; % loading image +u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise +ImDen = PB_Regul_CPU(single(u0), 3, 1, 0.08, 0.05); +* +* Please see more tests in a file: +TestTemporalSmoothing.m + +* +* Matlab + C/mex compilers needed +* to compile with OMP support: mex PB_Regul_CPU.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp" +* +* D. Kazantsev * +* 02/07/2014 +* Harwell, UK +*/ +#ifdef __cplusplus +extern "C" { +#endif float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop); float PB_FUNC2D(float *A, float *B, int dimX, int dimY, int padXY, int SearchW, int SimilW, float h, float lambda); float PB_FUNC3D(float *A, float *B, int dimX, int dimY, int dimZ, int padXY, int SearchW, int SimilW, float h, float lambda); +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/main_func/regularizers_CPU/SplitBregman_TV_core.h b/main_func/regularizers_CPU/SplitBregman_TV_core.h index 78aef09..6ed3ff9 100644 --- a/main_func/regularizers_CPU/SplitBregman_TV_core.h +++ b/main_func/regularizers_CPU/SplitBregman_TV_core.h @@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by Visual Analytics and Imaging System Group of the Science Technology Facilities Council, STFC -Copyright 2017 Daniil Kazanteev +Copyright 2017 Daniil Kazantsev Copyright 2017 Srikanth Nagella, Edoardo Pasca Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,7 +16,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +//#include #include #include #include @@ -49,6 +49,10 @@ limitations under the License. * D. Kazantsev, 2016* */ +#ifdef __cplusplus +extern "C" { +#endif + //float copyIm(float *A, float *B, int dimX, int dimY, int dimZ); float gauss_seidel2D(float *U, float *A, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu); float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda); @@ -59,3 +63,7 @@ float gauss_seidel3D(float *U, float *A, float *Dx, float *Dy, float *Dz, float float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda); float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda); float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/main_func/regularizers_CPU/TGV_PD_core.h b/main_func/regularizers_CPU/TGV_PD_core.h index cbe7ea4..d5378df 100644 --- a/main_func/regularizers_CPU/TGV_PD_core.h +++ b/main_func/regularizers_CPU/TGV_PD_core.h @@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by Visual Analytics and Imaging System Group of the Science Technology Facilities Council, STFC -Copyright 2017 Daniil Kazanteev +Copyright 2017 Daniil Kazantsev Copyright 2017 Srikanth Nagella, Edoardo Pasca Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,7 +17,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include +//#include #include #include #include @@ -25,6 +25,34 @@ limitations under the License. #include "omp.h" #include "utils.h" +/* C-OMP implementation of Primal-Dual denoising method for +* Total Generilized Variation (TGV)-L2 model (2D case only) +* +* Input Parameters: +* 1. Noisy image/volume (2D) +* 2. lambda - regularization parameter +* 3. parameter to control first-order term (alpha1) +* 4. parameter to control the second-order term (alpha0) +* 5. Number of CP iterations +* +* Output: +* Filtered/regularized image +* +* Example: +* figure; +* Im = double(imread('lena_gray_256.tif'))/255; % loading image +* u0 = Im + .03*randn(size(Im)); % adding noise +* tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc; +* +* to compile with OMP support: mex TGV_PD.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp" +* References: +* K. Bredies "Total Generalized Variation" +* +* 28.11.16/Harwell +*/ +#ifdef __cplusplus +extern "C" { +#endif /* 2D functions */ float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, int dimZ, float sigma); float ProjP_2D(float *P1, float *P2, int dimX, int dimY, int dimZ, float alpha1); @@ -34,3 +62,6 @@ float DivProjP_2D(float *U, float *A, float *P1, float *P2, int dimX, int dimY, float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float tau); float newU(float *U, float *U_old, int dimX, int dimY, int dimZ); //float copyIm(float *A, float *U, int dimX, int dimY, int dimZ); +#ifdef __cplusplus +} +#endif -- cgit v1.2.3 From c9834452a55de7e55301fa95ffdf8768694400b3 Mon Sep 17 00:00:00 2001 From: Edoardo Pasca Date: Fri, 4 Aug 2017 16:14:41 +0100 Subject: extern C fix --- main_func/regularizers_CPU/utils.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'main_func') diff --git a/main_func/regularizers_CPU/utils.h b/main_func/regularizers_CPU/utils.h index c720006..53463a3 100644 --- a/main_func/regularizers_CPU/utils.h +++ b/main_func/regularizers_CPU/utils.h @@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by Visual Analytics and Imaging System Group of the Science Technology Facilities Council, STFC -Copyright 2017 Daniil Kazanteev +Copyright 2017 Daniil Kazantsev Copyright 2017 Srikanth Nagella, Edoardo Pasca Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,11 +17,16 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include -#include +//#include +//#include #include #include -#include +//#include #include "omp.h" - +#ifdef __cplusplus +extern "C" { +#endif float copyIm(float *A, float *U, int dimX, int dimY, int dimZ); +#ifdef __cplusplus +} +#endif -- cgit v1.2.3 From 927e1caa9b1d0df9f4f2eb95170f6bd8be657656 Mon Sep 17 00:00:00 2001 From: Edoardo Pasca Date: Fri, 4 Aug 2017 16:48:41 +0100 Subject: typo and include matrix.h --- main_func/regularizers_CPU/PatchBased_Regul.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'main_func') diff --git a/main_func/regularizers_CPU/PatchBased_Regul.c b/main_func/regularizers_CPU/PatchBased_Regul.c index 59eb3b3..9c925df 100644 --- a/main_func/regularizers_CPU/PatchBased_Regul.c +++ b/main_func/regularizers_CPU/PatchBased_Regul.c @@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by Visual Analytics and Imaging System Group of the Science Technology Facilities Council, STFC -Copyright 2017 Daniil Kazanteev +Copyright 2017 Daniil Kazantsev Copyright 2017 Srikanth Nagella, Edoardo Pasca Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,6 +18,7 @@ limitations under the License. */ #include "mex.h" +#include "matrix.h" #include "PatchBased_Regul_core.h" -- cgit v1.2.3 From c20554d5bec0168a54a6c472c0cc2f378511eccc Mon Sep 17 00:00:00 2001 From: Edoardo Pasca Date: Mon, 7 Aug 2017 16:46:51 +0100 Subject: typo --- main_func/regularizers_CPU/TGV_PD.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'main_func') diff --git a/main_func/regularizers_CPU/TGV_PD.c b/main_func/regularizers_CPU/TGV_PD.c index 6593d8e..c9cb440 100644 --- a/main_func/regularizers_CPU/TGV_PD.c +++ b/main_func/regularizers_CPU/TGV_PD.c @@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by Visual Analytics and Imaging System Group of the Science Technology Facilities Council, STFC -Copyright 2017 Daniil Kazanteev +Copyright 2017 Daniil Kazantsev Copyright 2017 Srikanth Nagella, Edoardo Pasca Licensed under the Apache License, Version 2.0 (the "License"); -- cgit v1.2.3 From 8094acfff8659e5c959066495936a8d5aff56fd5 Mon Sep 17 00:00:00 2001 From: Edoardo Pasca Date: Mon, 7 Aug 2017 16:47:28 +0100 Subject: removed copyIm --- main_func/regularizers_CPU/TGV_PD_core.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'main_func') diff --git a/main_func/regularizers_CPU/TGV_PD_core.c b/main_func/regularizers_CPU/TGV_PD_core.c index 1164b73..4139d10 100644 --- a/main_func/regularizers_CPU/TGV_PD_core.c +++ b/main_func/regularizers_CPU/TGV_PD_core.c @@ -186,14 +186,6 @@ float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, }} return 1; } -/* Copy Image */ -float copyIm(float *A, float *U, int dimX, int dimY, int dimZ) -{ - int j; -#pragma omp parallel for shared(A, U) private(j) - for(j=0; j