From cbeb8a1174498751e38a3de8cd6fe55abae20192 Mon Sep 17 00:00:00 2001
From: Daniil Kazantsev <dkazanc@hotmail.com>
Date: Thu, 3 Aug 2017 00:10:53 +0100
Subject: some cleaning inside C functions, updated mex_compile file and the
 work on ordered-subset FISTA started

---
 main_func/FISTA_REC.m                              | 40 ++++++++++++-
 main_func/compile_mex.m                            | 11 ++--
 main_func/regularizers_CPU/FGP_TV_core.c           | 26 +++++++++
 main_func/regularizers_CPU/FGP_TV_core.h           | 27 ---------
 main_func/regularizers_CPU/LLT_model.c             | 26 +++++++++
 main_func/regularizers_CPU/LLT_model_core.c        | 25 ++++++++
 main_func/regularizers_CPU/LLT_model_core.h        | 25 --------
 main_func/regularizers_CPU/PatchBased_Regul.c      | 43 +++++++-------
 main_func/regularizers_CPU/PatchBased_Regul_core.c | 57 ++++++++----------
 main_func/regularizers_CPU/PatchBased_Regul_core.h | 33 -----------
 main_func/regularizers_CPU/SplitBregman_TV_core.c  |  1 -
 main_func/regularizers_CPU/SplitBregman_TV_core.h  | 24 --------
 main_func/regularizers_CPU/TGV_PD.c                | 68 +++++-----------------
 main_func/regularizers_CPU/TGV_PD_core.c           |  1 -
 main_func/regularizers_CPU/TGV_PD_core.h           | 29 ---------
 15 files changed, 180 insertions(+), 256 deletions(-)

(limited to 'main_func')

diff --git a/main_func/FISTA_REC.m b/main_func/FISTA_REC.m
index 2823691..1e93719 100644
--- a/main_func/FISTA_REC.m
+++ b/main_func/FISTA_REC.m
@@ -99,7 +99,7 @@ else
         clear proj_geomT vol_geomT
     else
         % divergen beam geometry
-        fprintf('%s \n', 'Calculating Lipshitz constant for divergen beam geometry...');
+        fprintf('%s \n', 'Calculating Lipshitz constant for divergen beam geometry... will take some time!');
         niter = 8; % number of iteration for PM
         x1 = rand(N,N,SlicesZ);
         sqweight = sqrt(weights);
@@ -216,6 +216,39 @@ if (isfield(params,'initialize'))
 else
     X = zeros(N,N,SlicesZ, 'single'); % storage for the solution
 end
+if (isfield(params,'OS'))
+    % Ordered Subsets reorganisation of data and angles
+    OS = 1;
+    subsets = 8;
+    angles = proj_geom.ProjectionAngles;
+    binEdges = linspace(min(angles),max(angles),subsets+1);
+    
+    % assign values to bins
+    [binsDiscr,~] = histc(angles, [binEdges(1:end-1) Inf]);
+    
+    % rearrange angles into subsets
+    AnglesReorg = zeros(length(angles),1);
+    SinoReorg = zeros(Detectors, anglesNumb, SlicesZ, 'single');
+    
+    counterM = 0;
+    for ii = 1:max(binsDiscr(:))
+        counter = 0;
+        for jj = 1:subsets
+            curr_index = ii+jj-1 + counter;
+            if (binsDiscr(jj) >= ii)
+                counterM = counterM + 1;
+                AnglesReorg(counterM) = angles(curr_index);
+                SinoReorg(:,counterM,:) = squeeze(sino(:,curr_index,:));
+            end
+            counter = (counter + binsDiscr(jj)) - 1;
+        end
+    end
+    sino = SinoReorg;
+    clear SinoReorg;
+else 
+    OS = 0; % normal FISTA
+end
+
 
 %----------------Reconstruction part------------------------
 Resid_error = zeros(iterFISTA,1); % errors vector (if the ground truth is given)
@@ -241,7 +274,7 @@ for i = 1:iterFISTA
         % ring removal part (Group-Huber fidelity)
         for kkk = 1:anglesNumb
             residual(:,kkk,:) =  squeeze(weights(:,kkk,:)).*(squeeze(sino_updt(:,kkk,:)) - (squeeze(sino(:,kkk,:)) - alpha_ring.*r_x));
-        end        
+        end
         vec = sum(residual,2);
         if (SlicesZ > 1)
             vec = squeeze(vec(:,1,:));
@@ -249,7 +282,7 @@ for i = 1:iterFISTA
         r = r_x - (1./L_const).*vec;
     else
         % no ring removal
-        residual = weights.*(sino_updt - sino);        
+        residual = weights.*(sino_updt - sino);
     end
     
     objective(i) = (0.5*norm(residual(:))^2)/(Detectors*anglesNumb*SlicesZ); % for the objective function output
@@ -305,5 +338,6 @@ end
 output.Resid_error = Resid_error;
 output.objective = objective;
 output.L_const = L_const;
+output.sino = sino;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 end
diff --git a/main_func/compile_mex.m b/main_func/compile_mex.m
index 7bfa8eb..66c05da 100644
--- a/main_func/compile_mex.m
+++ b/main_func/compile_mex.m
@@ -1,10 +1,11 @@
-% compile mex's once
+% compile mex's in Matlab once
 cd regularizers_CPU/
 
-mex LLT_model.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-mex TGV_PD.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+mex LLT_model.c LLT_model_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+mex FGP_TV.c FGP_TV_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+mex SplitBregman_TV.c SplitBregman_TV_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+mex TGV_PD.c TGV_PD_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+mex PatchBased_Regul.c PatchBased_Regul_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
 
 cd ../../
 cd demos
diff --git a/main_func/regularizers_CPU/FGP_TV_core.c b/main_func/regularizers_CPU/FGP_TV_core.c
index c383a71..a55991c 100644
--- a/main_func/regularizers_CPU/FGP_TV_core.c
+++ b/main_func/regularizers_CPU/FGP_TV_core.c
@@ -19,6 +19,32 @@ limitations under the License.
 
 #include "FGP_TV_core.h"
 
+/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. lambda - regularization parameter [REQUIRED]
+ * 3. Number of iterations [OPTIONAL parameter]
+ * 4. eplsilon: tolerance constant [OPTIONAL parameter]
+ * 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+ *
+ * Output:
+ * [1] Filtered/regularized image
+ * [2] last function value 
+ *
+ * Example of image denoising:
+ * figure;
+ * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+ * u0 = Im + .05*randn(size(Im)); % adding noise
+ * u = FGP_TV(single(u0), 0.05, 100, 1e-04);
+ *
+ * This function is based on the Matlab's code and paper by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ *
+ * D. Kazantsev, 2016-17
+ *
+ */
+
 /* 2D-case related Functions */
 /*****************************************************************/
 float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY)
diff --git a/main_func/regularizers_CPU/FGP_TV_core.h b/main_func/regularizers_CPU/FGP_TV_core.h
index 8d6af3e..e5328fb 100644
--- a/main_func/regularizers_CPU/FGP_TV_core.h
+++ b/main_func/regularizers_CPU/FGP_TV_core.h
@@ -24,33 +24,6 @@ limitations under the License.
 #include <stdio.h>
 #include "omp.h"
 
-/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
-*
-* Input Parameters:
-* 1. Noisy image/volume [REQUIRED]
-* 2. lambda - regularization parameter [REQUIRED]
-* 3. Number of iterations [OPTIONAL parameter]
-* 4. eplsilon: tolerance constant [OPTIONAL parameter]
-* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
-*
-* Output:
-* [1] Filtered/regularized image
-* [2] last function value
-*
-* Example of image denoising:
-* figure;
-* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-* u0 = Im + .05*randn(size(Im)); % adding noise
-* u = FGP_TV(single(u0), 0.05, 100, 1e-04);
-*
-* to compile with OMP support: mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-* This function is based on the Matlab's code and paper by
-* [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
-*
-* D. Kazantsev, 2016-17
-*
-*/
-
 float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
 float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
 float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
diff --git a/main_func/regularizers_CPU/LLT_model.c b/main_func/regularizers_CPU/LLT_model.c
index 6b50d33..47146a5 100644
--- a/main_func/regularizers_CPU/LLT_model.c
+++ b/main_func/regularizers_CPU/LLT_model.c
@@ -20,6 +20,32 @@ limitations under the License.
 #include "mex.h"
 #include "LLT_model_core.h"
 
+/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty
+*
+* Input Parameters:
+* 1. U0 - origanal noise image/volume
+* 2. lambda - regularization parameter
+* 3. tau - time-step  for explicit scheme
+* 4. iter - iterations number
+* 5. epsil  - tolerance constant (to terminate earlier)
+* 6. switcher - default is 0, switch to (1) to restrictive smoothing in Z dimension (in test)
+*
+* Output:
+* Filtered/regularized image
+*
+* Example:
+* figure;
+* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+* u0 = Im + .03*randn(size(Im)); % adding noise
+* [Den] = LLT_model(single(u0), 10, 0.1, 1);
+*
+*
+* to compile with OMP support: mex LLT_model.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+* References: Lysaker, Lundervold and Tai (LLT) 2003, IEEE
+*
+* 28.11.16/Harwell
+*/
+
 
 void mexFunction(
         int nlhs, mxArray *plhs[],
diff --git a/main_func/regularizers_CPU/LLT_model_core.c b/main_func/regularizers_CPU/LLT_model_core.c
index 3098269..7a1cdbe 100644
--- a/main_func/regularizers_CPU/LLT_model_core.c
+++ b/main_func/regularizers_CPU/LLT_model_core.c
@@ -19,6 +19,31 @@ limitations under the License.
 
 #include "LLT_model_core.h"
 
+/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty
+*
+* Input Parameters:
+* 1. U0 - origanal noise image/volume
+* 2. lambda - regularization parameter
+* 3. tau - time-step  for explicit scheme
+* 4. iter - iterations number
+* 5. epsil  - tolerance constant (to terminate earlier)
+* 6. switcher - default is 0, switch to (1) to restrictive smoothing in Z dimension (in test)
+*
+* Output:
+* Filtered/regularized image
+*
+* Example:
+* figure;
+* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+* u0 = Im + .03*randn(size(Im)); % adding noise
+* [Den] = LLT_model(single(u0), 10, 0.1, 1);
+*
+* References: Lysaker, Lundervold and Tai (LLT) 2003, IEEE
+*
+* 28.11.16/Harwell
+*/
+
+
 float der2D(float *U, float *D1, float *D2, int dimX, int dimY, int dimZ)
 {
 	int i, j, i_p, i_m, j_m, j_p;
diff --git a/main_func/regularizers_CPU/LLT_model_core.h b/main_func/regularizers_CPU/LLT_model_core.h
index ef8803d..10f52fe 100644
--- a/main_func/regularizers_CPU/LLT_model_core.h
+++ b/main_func/regularizers_CPU/LLT_model_core.h
@@ -26,31 +26,6 @@ limitations under the License.
 
 #define EPS 0.01
 
-/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty
-*
-* Input Parameters:
-* 1. U0 - origanal noise image/volume
-* 2. lambda - regularization parameter
-* 3. tau - time-step  for explicit scheme
-* 4. iter - iterations number
-* 5. epsil  - tolerance constant (to terminate earlier)
-* 6. switcher - default is 0, switch to (1) to restrictive smoothing in Z dimension (in test)
-*
-* Output:
-* Filtered/regularized image
-*
-* Example:
-* figure;
-* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-* u0 = Im + .03*randn(size(Im)); % adding noise
-* [Den] = LLT_model(single(u0), 10, 0.1, 1);
-*
-*
-* to compile with OMP support: mex LLT_model.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-* References: Lysaker, Lundervold and Tai (LLT) 2003, IEEE
-*
-* 28.11.16/Harwell
-*/
 /* 2D functions */
 float der2D(float *U, float *D1, float *D2, int dimX, int dimY, int dimZ);
 float div_upd2D(float *U0, float *U, float *D1, float *D2, int dimX, int dimY, int dimZ, float lambda, float tau);
diff --git a/main_func/regularizers_CPU/PatchBased_Regul.c b/main_func/regularizers_CPU/PatchBased_Regul.c
index 24ee210..59eb3b3 100644
--- a/main_func/regularizers_CPU/PatchBased_Regul.c
+++ b/main_func/regularizers_CPU/PatchBased_Regul.c
@@ -27,27 +27,23 @@ limitations under the License.
  * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
  *             2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
  *
- * Input Parameters (mandatory):
- * 1. Image (2D or 3D)
- * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window)
- * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window)
- * 4. h - parameter for the PB penalty function
- * 5. lambda - regularization parameter 
+ * Input Parameters:
+ * 1. Image (2D or 3D) [required]
+ * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window) [optional]
+ * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window) [optional]
+ * 4. h - parameter for the PB penalty function [optional]
+ * 5. lambda - regularization parameter  [optional]
 
  * Output:
  * 1. regularized (denoised) Image (N x N)/volume (N x N x N)
  *
- * Quick 2D denoising example in Matlab:   
+ * 2D denoising example in Matlab:   
    Im = double(imread('lena_gray_256.tif'))/255;  % loading image
    u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
-   ImDen = PB_Regul_CPU(single(u0), 3, 1, 0.08, 0.05); 
- *
- * Please see more tests in a file: 
-   TestTemporalSmoothing.m
- 
+   ImDen = PatchBased_Regul(single(u0), 3, 1, 0.08, 0.05); 
  *
  * Matlab + C/mex compilers needed
- * to compile with OMP support: mex PB_Regul_CPU.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp"
+ * to compile with OMP support: mex PatchBased_Regul.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp"
  *
  * D. Kazantsev *
  * 02/07/2014
@@ -70,17 +66,23 @@ void mexFunction(
     M = dims[1];
     Z = dims[2];
     
-    if ((numdims < 2) || (numdims > 3)) {mexErrMsgTxt("The input should be 2D image or 3D volume");}
+    if ((numdims < 2) || (numdims > 3)) {mexErrMsgTxt("The input is 2D image or 3D volume");}
     if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
     
     if(nrhs != 5) mexErrMsgTxt("Five inputs reqired: Image(2D,3D), SearchW, SimilW, Threshold, Regularization parameter");
     
     /*Handling inputs*/
-    A  = (float *) mxGetData(prhs[0]);    /* the image to regularize/filter */
-    SearchW_real  = (int) mxGetScalar(prhs[1]); /* the searching window ratio */
-    SimilW =  (int) mxGetScalar(prhs[2]);  /* the similarity window ratio */
-    h =  (float) mxGetScalar(prhs[3]);  /* parameter for the PB filtering function */
-    lambda = (float) mxGetScalar(prhs[4]); /* regularization parameter */   
+    A  = (float *) mxGetData(prhs[0]);    /* the image/volume to regularize/filter */
+    SearchW_real = 3; /*default value*/
+    SimilW = 1; /*default value*/
+    h = 0.1; 
+    lambda = 0.1;
+    
+    if ((nrhs == 2) || (nrhs == 3) || (nrhs == 4) || (nrhs == 5))   SearchW_real  = (int) mxGetScalar(prhs[1]); /* the searching window ratio */
+    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5))   SimilW =  (int) mxGetScalar(prhs[2]);  /* the similarity window ratio */
+    if ((nrhs == 4) || (nrhs == 5))  h =  (float) mxGetScalar(prhs[3]);  /* parameter for the PB filtering function */
+    if ((nrhs == 5))  lambda = (float) mxGetScalar(prhs[4]); /* regularization parameter */   
+
 
     if (h <= 0) mexErrMsgTxt("Parmeter for the PB penalty function should be > 0");
     if (lambda <= 0) mexErrMsgTxt(" Regularization parmeter should be > 0");
@@ -89,7 +91,6 @@ void mexFunction(
     
     /* SearchW_full = 2*SearchW + 1; */ /* the full searching window  size */
     /* SimilW_full = 2*SimilW + 1;  */  /* the full similarity window  size */
-
     
     padXY = SearchW + 2*SimilW; /* padding sizes */
     newsizeX = N + 2*(padXY); /* the X size of the padded array */
@@ -135,4 +136,4 @@ void mexFunction(
         switchpad_crop = 1; /*cropping*/
         pad_crop(Bp, B, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop);
     } /*end else ndims*/ 
-}    
\ No newline at end of file
+}    
diff --git a/main_func/regularizers_CPU/PatchBased_Regul_core.c b/main_func/regularizers_CPU/PatchBased_Regul_core.c
index 6f0a48d..acfb464 100644
--- a/main_func/regularizers_CPU/PatchBased_Regul_core.c
+++ b/main_func/regularizers_CPU/PatchBased_Regul_core.c
@@ -19,40 +19,33 @@ limitations under the License.
 
 #include "PatchBased_Regul_core.h"
 
-/* C-OMP implementation of  patch-based (PB) regularization (2D and 3D cases).
-* This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function
-*
-* References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
-*             2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
-*
-* Input Parameters (mandatory):
-* 1. Image (2D or 3D)
-* 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window)
-* 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window)
-* 4. h - parameter for the PB penalty function
-* 5. lambda - regularization parameter
+/* C-OMP implementation of  patch-based (PB) regularization (2D and 3D cases). 
+ * This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function
+ * 
+ * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
+ *             2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
+ *
+ * Input Parameters:
+ * 1. Image (2D or 3D) [required]
+ * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window) [optional]
+ * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window) [optional]
+ * 4. h - parameter for the PB penalty function [optional]
+ * 5. lambda - regularization parameter  [optional]
 
-* Output:
-* 1. regularized (denoised) Image (N x N)/volume (N x N x N)
-*
-* Quick 2D denoising example in Matlab:
-Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
-ImDen = PB_Regul_CPU(single(u0), 3, 1, 0.08, 0.05);
-*
-* Please see more tests in a file:
-TestTemporalSmoothing.m
-
-*
-* Matlab + C/mex compilers needed
-* to compile with OMP support: mex PB_Regul_CPU.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp"
-*
-* D. Kazantsev *
-* 02/07/2014
-* Harwell, UK
-*/
+ * Output:
+ * 1. regularized (denoised) Image (N x N)/volume (N x N x N)
+ *
+ * 2D denoising example in Matlab:   
+   Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+   u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
+   ImDen = PatchBased_Regul(single(u0), 3, 1, 0.08, 0.05); 
+ 
+ * D. Kazantsev *
+ * 02/07/2014
+ * Harwell, UK
+ */
 
-/*2D version*/
+/*2D version function */
 float PB_FUNC2D(float *A, float *B, int dimX, int dimY, int padXY, int SearchW, int SimilW, float h, float lambda)
 {
     int i, j, i_n, j_n, i_m, j_m, i_p, j_p, i_l, j_l, i1, j1, i2, j2, i3, j3, i5,j5, count, SimilW_full;
diff --git a/main_func/regularizers_CPU/PatchBased_Regul_core.h b/main_func/regularizers_CPU/PatchBased_Regul_core.h
index b83cf10..5aa6415 100644
--- a/main_func/regularizers_CPU/PatchBased_Regul_core.h
+++ b/main_func/regularizers_CPU/PatchBased_Regul_core.h
@@ -26,39 +26,6 @@ limitations under the License.
 #include <stdio.h>
 #include "omp.h"
 
-/* C-OMP implementation of  patch-based (PB) regularization (2D and 3D cases).
-* This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function
-*
-* References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
-*             2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
-*
-* Input Parameters (mandatory):
-* 1. Image (2D or 3D)
-* 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window)
-* 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window)
-* 4. h - parameter for the PB penalty function
-* 5. lambda - regularization parameter
-
-* Output:
-* 1. regularized (denoised) Image (N x N)/volume (N x N x N)
-*
-* Quick 2D denoising example in Matlab:
-Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
-ImDen = PB_Regul_CPU(single(u0), 3, 1, 0.08, 0.05);
-*
-* Please see more tests in a file:
-TestTemporalSmoothing.m
-
-*
-* Matlab + C/mex compilers needed
-* to compile with OMP support: mex PB_Regul_CPU.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp"
-*
-* D. Kazantsev *
-* 02/07/2014
-* Harwell, UK
-*/
-
 float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop);
 float PB_FUNC2D(float *A, float *B, int dimX, int dimY, int padXY, int SearchW, int SimilW, float h, float lambda);
 float PB_FUNC3D(float *A, float *B, int dimX, int dimY, int dimZ, int padXY, int SearchW, int SimilW, float h, float lambda);
diff --git a/main_func/regularizers_CPU/SplitBregman_TV_core.c b/main_func/regularizers_CPU/SplitBregman_TV_core.c
index 26ad5b1..283dd43 100644
--- a/main_func/regularizers_CPU/SplitBregman_TV_core.c
+++ b/main_func/regularizers_CPU/SplitBregman_TV_core.c
@@ -37,7 +37,6 @@ limitations under the License.
 * u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
 * u = SplitBregman_TV(single(u0), 10, 30, 1e-04);
 *
-* to compile with OMP support: mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
 * References:
 * The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher.
 * D. Kazantsev, 2016*
diff --git a/main_func/regularizers_CPU/SplitBregman_TV_core.h b/main_func/regularizers_CPU/SplitBregman_TV_core.h
index ed9112c..a7aaabb 100644
--- a/main_func/regularizers_CPU/SplitBregman_TV_core.h
+++ b/main_func/regularizers_CPU/SplitBregman_TV_core.h
@@ -23,30 +23,6 @@ limitations under the License.
 #include <stdio.h>
 #include "omp.h"
 
-/* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D)
-*
-* Input Parameters:
-* 1. Noisy image/volume
-* 2. lambda - regularization parameter
-* 3. Number of iterations [OPTIONAL parameter]
-* 4. eplsilon - tolerance constant [OPTIONAL parameter]
-* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
-*
-* Output:
-* Filtered/regularized image
-*
-* Example:
-* figure;
-* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-* u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
-* u = SplitBregman_TV(single(u0), 10, 30, 1e-04);
-*
-* to compile with OMP support: mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-* References:
-* The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher.
-* D. Kazantsev, 2016*
-*/
-
 float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
 float gauss_seidel2D(float *U, float *A, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu);
 float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
diff --git a/main_func/regularizers_CPU/TGV_PD.c b/main_func/regularizers_CPU/TGV_PD.c
index 8bce18a..d4bb787 100644
--- a/main_func/regularizers_CPU/TGV_PD.c
+++ b/main_func/regularizers_CPU/TGV_PD.c
@@ -39,7 +39,7 @@ limitations under the License.
  * u0 = Im + .03*randn(size(Im)); % adding noise
  * tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc;
  *
- * to compile with OMP support: mex TGV_PD.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+ * to compile with OMP support: mex TGV_PD.c  TGV_PD_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
  * References:
  * K. Bredies "Total Generalized Variation"
  *
@@ -53,7 +53,7 @@ void mexFunction(
 {
     int number_of_dims, iter, dimX, dimY, dimZ, ll;
     const int  *dim_array;
-    float *A, *U, *U_old, *P1, *P2, *P3, *Q1, *Q2, *Q3, *Q4, *Q5, *Q6, *Q7, *Q8, *Q9, *V1, *V1_old, *V2, *V2_old, *V3, *V3_old, lambda, L2, tau, sigma,  alpha1, alpha0;
+    float *A, *U, *U_old, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, lambda, L2, tau, sigma,  alpha1, alpha0;
     
     number_of_dims = mxGetNumberOfDimensions(prhs[0]);
     dim_array = mxGetDimensions(prhs[0]);
@@ -88,40 +88,10 @@ void mexFunction(
         V1 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
         V1_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
         V2 = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-        V2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));   }
-    else if (number_of_dims == 3) {
-        mexErrMsgTxt("The input data should be 2D");
-        /*3D case*/
-//         dimZ = dim_array[2];
-//         U = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-//         
-//         P1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-//         P2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-//         P3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-//         
-//         Q1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-//         Q2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-//         Q3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-//         Q4 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-//         Q5 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-//         Q6 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-//         Q7 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-//         Q8 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-//         Q9 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-//         
-//         U_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-//         
-//         V1 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-//         V1_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-//         V2 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-//         V2_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-//         V3 = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-//         V3_old = (float*)mxGetPr(mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));	  
-    }
-    else {mexErrMsgTxt("The input data should be 2D");}
-    
-    
-    /*printf("%i \n", i);*/
+        V2_old = (float*)mxGetPr(mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));   
+        
+        
+         /*printf("%i \n", i);*/
     L2 = 12.0; /*Lipshitz constant*/
     tau = 1.0/pow(L2,0.5);
     sigma = 1.0/pow(L2,0.5);
@@ -129,7 +99,6 @@ void mexFunction(
     /*Copy A to U*/
     copyIm(A, U, dimX, dimY, dimZ);
     
-    if (number_of_dims == 2) {
         /* Here primal-dual iterations begin for 2D */
         for(ll = 0; ll < iter; ll++) {
             
@@ -164,23 +133,12 @@ void mexFunction(
             /*get new V*/
             newU(V1, V1_old, dimX, dimY, dimZ);
             newU(V2, V2_old, dimX, dimY, dimZ);
-        } /*end of iterations*/
+        } /*end of iterations*/        
     }
-    
-//     /*3D version*/
-//     if (number_of_dims == 3) {
-//         /* Here primal-dual iterations begin for 3D */
-//         for(ll = 0; ll < iter; ll++) {
-//             
-//             /* Calculate Dual Variable P */
-//             DualP_3D(U, V1, V2, V3, P1, P2, P3, dimX, dimY, dimZ, sigma);
-//             
-//             /*Projection onto convex set for P*/
-//             ProjP_3D(P1, P2, P3, dimX, dimY, dimZ, alpha1);
-//             
-//             /* Calculate Dual Variable Q */
-//             DualQ_3D(V1, V2, V2, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, dimX, dimY, dimZ, sigma);
-//             
-//         } /*end of iterations*/
-//     }
+    else if (number_of_dims == 3) {
+        mexErrMsgTxt("The input data should be a 2D array");
+        /*3D case*/
+    }
+    else {mexErrMsgTxt("The input data should be a 2D array");}    
+   
 }
diff --git a/main_func/regularizers_CPU/TGV_PD_core.c b/main_func/regularizers_CPU/TGV_PD_core.c
index 4c0427c..1164b73 100644
--- a/main_func/regularizers_CPU/TGV_PD_core.c
+++ b/main_func/regularizers_CPU/TGV_PD_core.c
@@ -38,7 +38,6 @@ limitations under the License.
  * u0 = Im + .03*randn(size(Im)); % adding noise
  * tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc;
  *
- * to compile with OMP support: mex TGV_PD.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
  * References:
  * K. Bredies "Total Generalized Variation"
  *
diff --git a/main_func/regularizers_CPU/TGV_PD_core.h b/main_func/regularizers_CPU/TGV_PD_core.h
index e25ae68..04ba95c 100644
--- a/main_func/regularizers_CPU/TGV_PD_core.h
+++ b/main_func/regularizers_CPU/TGV_PD_core.h
@@ -24,32 +24,6 @@ limitations under the License.
 #include <stdio.h>
 #include "omp.h"
 
-/* C-OMP implementation of Primal-Dual denoising method for
-* Total Generilized Variation (TGV)-L2 model (2D case only)
-*
-* Input Parameters:
-* 1. Noisy image/volume (2D)
-* 2. lambda - regularization parameter
-* 3. parameter to control first-order term (alpha1)
-* 4. parameter to control the second-order term (alpha0)
-* 5. Number of CP iterations
-*
-* Output:
-* Filtered/regularized image
-*
-* Example:
-* figure;
-* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
-* u0 = Im + .03*randn(size(Im)); % adding noise
-* tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc;
-*
-* to compile with OMP support: mex TGV_PD.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
-* References:
-* K. Bredies "Total Generalized Variation"
-*
-* 28.11.16/Harwell
-*/
-
 /* 2D functions */
 float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, int dimZ, float sigma);
 float ProjP_2D(float *P1, float *P2, int dimX, int dimY, int dimZ, float alpha1);
@@ -57,8 +31,5 @@ float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX,
 float ProjQ_2D(float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float alpha0);
 float DivProjP_2D(float *U, float *A, float *P1, float *P2, int dimX, int dimY, int dimZ, float lambda, float tau);
 float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float tau);
-/*3D functions*/
-float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float sigma);
-
 float newU(float *U, float *U_old, int dimX, int dimY, int dimZ);
 float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
-- 
cgit v1.2.3


From e412e73b172a99776d108d3b4c1f8662a20e9fce Mon Sep 17 00:00:00 2001
From: Daniil Kazantsev <dkazanc@hotmail.com>
Date: Thu, 3 Aug 2017 00:26:46 +0100
Subject: 2D or 3D regularization choices added

---
 main_func/FISTA_REC.m | 114 ++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 93 insertions(+), 21 deletions(-)

(limited to 'main_func')

diff --git a/main_func/FISTA_REC.m b/main_func/FISTA_REC.m
index 1e93719..43ed0cb 100644
--- a/main_func/FISTA_REC.m
+++ b/main_func/FISTA_REC.m
@@ -3,6 +3,7 @@ function [X,  output] = FISTA_REC(params)
 % <<<< FISTA-based reconstruction algorithm using ASTRA-toolbox >>>>
 % ___Input___:
 % params.[] file:
+%----------------General Parameters------------------------
 %       - .proj_geom (geometry of the projector) [required]
 %       - .vol_geom (geometry of the reconstructed object) [required]
 %       - .sino (vectorized in 2D or 3D sinogram) [required]
@@ -13,12 +14,19 @@ function [X,  output] = FISTA_REC(params)
 %       - .ROI (Region-of-interest, only if X_ideal is given)
 %       - .initialize (a 'warm start' using SIRT method from ASTRA)
 %----------------Regularization choices------------------------
-%       - .Regul_Lambda_FGPTV (FGP-TV regularization parameter)
-%       - .Regul_Lambda_SBTV (SplitBregman-TV regularization parameter)
-%       - .Regul_Lambda_TVLLT (Higher order SB-LLT regularization parameter)
+%       1 .Regul_Lambda_FGPTV (FGP-TV regularization parameter)
+%       2 .Regul_Lambda_SBTV (SplitBregman-TV regularization parameter)
+%       3 .Regul_LambdaLLT (Higher order LLT regularization parameter)
+%          3.1 .Regul_tauLLT (time step parameter for LLT (HO) term)
+%       4 .Regul_LambdaPatchBased (Patch-based nonlocal regularization parameter)
+%                 4.1  .Regul_PB_SearchW (ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window))
+%                 4.2  .Regul_PB_SimilW (ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window))
+%                 4.3  .Regul_PB_h (PB penalty function threshold)
+%       5 .Regul_LambdaTGV (Total Generalized variation regularization parameter)
 %       - .Regul_tol (tolerance to terminate regul iterations, default 1.0e-04)
 %       - .Regul_Iterations (iterations for the selected penalty, default 25)
-%       - .Regul_tauLLT (time step parameter for LLT term)
+%       - .Regul_Dimension ('2D' or '3D' way to apply regularization, '3D' is the default)
+%----------------Ring removal------------------------
 %       - .Ring_LambdaR_L1 (regularization parameter for L1-ring minimization, if lambdaR_L1 > 0 then switch on ring removal)
 %       - .Ring_Alpha (larger values can accelerate convergence but check stability, default 1)
 %----------------Visualization parameters------------------------
@@ -150,8 +158,8 @@ if (isfield(params,'Regul_Iterations'))
 else
     IterationsRegul = 25;
 end
-if (isfield(params,'Regul_LambdaHO'))
-    lambdaHO = params.Regul_LambdaHO;
+if (isfield(params,'Regul_LambdaLLT'))
+    lambdaHO = params.Regul_LambdaLLT;
 else
     lambdaHO = 0;
 end
@@ -165,6 +173,26 @@ if (isfield(params,'Regul_tauLLT'))
 else
     tauHO = 0.0001;
 end
+if (isfield(params,'Regul_LambdaPatchBased'))
+    lambdaPB = params.Regul_LambdaPatchBased;
+else
+    lambdaPB = 0;
+end
+if (isfield(params,'Regul_PB_SearchW'))
+    SearchW = params.Regul_PB_SearchW;
+else
+    SearchW = 3; % default
+end
+if (isfield(params,'Regul_PB_SimilW'))
+    SimilW = params.Regul_PB_SimilW;
+else
+    SimilW = 1; % default
+end
+if (isfield(params,'Regul_PB_h'))
+    h_PB = params.Regul_PB_h;
+else
+    h_PB = 0.1; % default
+end
 if (isfield(params,'Ring_LambdaR_L1'))
     lambdaR_L1 = params.Ring_LambdaR_L1;
 else
@@ -175,6 +203,14 @@ if (isfield(params,'Ring_Alpha'))
 else
     alpha_ring = 1;
 end
+if (isfield(params,'Regul_Dimension'))
+    Dimension = params.Regul_Dimension;
+    if ((strcmp('2D', Dimension) ~= 1) && (strcmp('3D', Dimension) ~= 1))
+        Dimension = '3D';
+    end
+else
+    Dimension = '3D';
+end
 if (isfield(params,'show'))
     show = params.show;
 else
@@ -293,21 +329,57 @@ for i = 1:iterFISTA
     astra_mex_data3d('delete', sino_id);
     astra_mex_data3d('delete', id);
     
-    if (lambdaFGP_TV > 0)
-        % FGP-TV regularization
-        [X, f_val] = FGP_TV(single(X), lambdaFGP_TV, IterationsRegul, tol, 'iso');
-        objective(i) = objective(i) + f_val;
-    end
-    if (lambdaSB_TV > 0)
-        % Split Bregman regularization
-        X = SplitBregman_TV(single(X), lambdaSB_TV, IterationsRegul, tol);  % (more memory efficent)
-    end
-    if (lambdaHO > 0)
-        % Higher Order (LLT) regularization
-        X2 = LLT_model(single(X), lambdaHO, tauHO, iterHO, 3.0e-05, 0);
-        X = 0.5.*(X + X2); % averaged combination of two solutions
-    end
-    
+    % regularization
+     if (lambdaFGP_TV > 0)
+         % FGP-TV regularization
+        if ((strcmp('2D', Dimension) == 1))
+            % 2D regularization
+            for kkk = 1:SlicesZ
+                [X(:,:,kkk), f_val] = FGP_TV(single(X(:,:,kkk)), lambdaFGP_TV, IterationsRegul, tol, 'iso');
+            end
+        else
+            % 3D regularization
+            [X, f_val] = FGP_TV(single(X), lambdaFGP_TV, IterationsRegul, tol, 'iso');
+        end
+         objective(i) = objective(i) + f_val;
+     end
+     if (lambdaSB_TV > 0)
+         % Split Bregman regularization
+        if ((strcmp('2D', Dimension) == 1))
+            % 2D regularization
+            for kkk = 1:SlicesZ
+                X(:,:,kkk) = SplitBregman_TV(single(X(:,:,kkk)), lambdaSB_TV, IterationsRegul, tol);  % (more memory efficent)
+            end
+        else
+            % 3D regularization
+            X = SplitBregman_TV(single(X), lambdaSB_TV, IterationsRegul, tol);  % (more memory efficent)
+        end
+     end
+     if (lambdaHO > 0)
+         % Higher Order (LLT) regularization
+        X2 = zeros(N,N,SlicesZ,'single');
+        if ((strcmp('2D', Dimension) == 1))
+            % 2D regularization
+            for kkk = 1:SlicesZ
+                X2(:,:,kkk) = LLT_model(single(X(:,:,kkk)), lambdaHO, tauHO, iterHO, 3.0e-05, 0);
+            end
+        else
+            % 3D regularization
+            X2 = LLT_model(single(X), lambdaHO, tauHO, iterHO, 3.0e-05, 0);
+        end
+         X = 0.5.*(X + X2); % averaged combination of two solutions
+     end
+    if (lambdaPB > 0)
+        % Patch-Based regularization (can be slow on CPU)
+        if ((strcmp('2D', Dimension) == 1))
+            % 2D regularization
+            for kkk = 1:SlicesZ
+                X(:,:,kkk) = PatchBased_Regul(single(X(:,:,kkk)), SearchW, SimilW, h_PB, lambdaPB);
+            end
+        else
+            X = PatchBased_Regul(single(X), SearchW, SimilW, h_PB, lambdaPB);
+        end
+    end  
     
     
     if (lambdaR_L1 > 0)
-- 
cgit v1.2.3


From d7281b95f70dd3707f82640972a52f4155c2fde5 Mon Sep 17 00:00:00 2001
From: Daniil Kazantsev <dkazanc@hotmail.com>
Date: Thu, 3 Aug 2017 15:25:50 +0100
Subject: Ordered Subset FISTA added and demos updated in DemoRD2

---
 main_func/FISTA_REC.m               | 404 +++++++++++++++++++++++++-----------
 main_func/regularizers_CPU/TGV_PD.c |   6 +-
 2 files changed, 291 insertions(+), 119 deletions(-)

(limited to 'main_func')

diff --git a/main_func/FISTA_REC.m b/main_func/FISTA_REC.m
index 43ed0cb..381fa34 100644
--- a/main_func/FISTA_REC.m
+++ b/main_func/FISTA_REC.m
@@ -1,6 +1,6 @@
 function [X,  output] = FISTA_REC(params)
 
-% <<<< FISTA-based reconstruction algorithm using ASTRA-toolbox >>>>
+% <<<< FISTA-based reconstruction routine using ASTRA-toolbox >>>>
 % ___Input___:
 % params.[] file:
 %----------------General Parameters------------------------
@@ -19,9 +19,9 @@ function [X,  output] = FISTA_REC(params)
 %       3 .Regul_LambdaLLT (Higher order LLT regularization parameter)
 %          3.1 .Regul_tauLLT (time step parameter for LLT (HO) term)
 %       4 .Regul_LambdaPatchBased (Patch-based nonlocal regularization parameter)
-%                 4.1  .Regul_PB_SearchW (ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window))
-%                 4.2  .Regul_PB_SimilW (ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window))
-%                 4.3  .Regul_PB_h (PB penalty function threshold)
+%          4.1  .Regul_PB_SearchW (ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window))
+%          4.2  .Regul_PB_SimilW (ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window))
+%          4.3  .Regul_PB_h (PB penalty function threshold)
 %       5 .Regul_LambdaTGV (Total Generalized variation regularization parameter)
 %       - .Regul_tol (tolerance to terminate regul iterations, default 1.0e-04)
 %       - .Regul_Iterations (iterations for the selected penalty, default 25)
@@ -193,6 +193,11 @@ if (isfield(params,'Regul_PB_h'))
 else
     h_PB = 0.1; % default
 end
+if (isfield(params,'Regul_LambdaTGV'))
+    LambdaTGV = params.Regul_LambdaTGV;
+else
+    LambdaTGV = 0.01;
+end
 if (isfield(params,'Ring_LambdaR_L1'))
     lambdaR_L1 = params.Ring_LambdaR_L1;
 else
@@ -252,20 +257,17 @@ if (isfield(params,'initialize'))
 else
     X = zeros(N,N,SlicesZ, 'single'); % storage for the solution
 end
-if (isfield(params,'OS'))
+if (isfield(params,'subsets'))
     % Ordered Subsets reorganisation of data and angles
-    OS = 1;
-    subsets = 8;
+    subsets = params.subsets; % subsets number
     angles = proj_geom.ProjectionAngles;
     binEdges = linspace(min(angles),max(angles),subsets+1);
     
     % assign values to bins
     [binsDiscr,~] = histc(angles, [binEdges(1:end-1) Inf]);
     
-    % rearrange angles into subsets
-    AnglesReorg = zeros(length(angles),1);
-    SinoReorg = zeros(Detectors, anglesNumb, SlicesZ, 'single');
-    
+    % get rearranged subset indices
+    IndicesReorg = zeros(length(angles),1);
     counterM = 0;
     for ii = 1:max(binsDiscr(:))
         counter = 0;
@@ -273,143 +275,313 @@ if (isfield(params,'OS'))
             curr_index = ii+jj-1 + counter;
             if (binsDiscr(jj) >= ii)
                 counterM = counterM + 1;
-                AnglesReorg(counterM) = angles(curr_index);
-                SinoReorg(:,counterM,:) = squeeze(sino(:,curr_index,:));
+                IndicesReorg(counterM) = curr_index;
             end
             counter = (counter + binsDiscr(jj)) - 1;
         end
     end
-    sino = SinoReorg;
-    clear SinoReorg;
-else 
-    OS = 0; % normal FISTA
+else
+    subsets = 0; % Classical FISTA
 end
 
-
 %----------------Reconstruction part------------------------
 Resid_error = zeros(iterFISTA,1); % errors vector (if the ground truth is given)
 objective = zeros(iterFISTA,1); % objective function values vector
 
-t = 1;
-X_t = X;
 
-r = zeros(Detectors,SlicesZ, 'single'); % 2D array (for 3D data) of sparse "ring" vectors
-r_x = r; % another ring variable
-residual = zeros(size(sino),'single');
-
-% Outer FISTA iterations loop
-for i = 1:iterFISTA
-    
-    X_old = X;
-    t_old = t;
-    r_old = r;
+if (subsets == 0)
+    % Classical FISTA
+    t = 1;
+    X_t = X;
     
-    [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geom, vol_geom);
+    r = zeros(Detectors,SlicesZ, 'single'); % 2D array (for 3D data) of sparse "ring" vectors
+    r_x = r; % another ring variable
+    residual = zeros(size(sino),'single');
     
-    if (lambdaR_L1 > 0)
-        % ring removal part (Group-Huber fidelity)
-        for kkk = 1:anglesNumb
-            residual(:,kkk,:) =  squeeze(weights(:,kkk,:)).*(squeeze(sino_updt(:,kkk,:)) - (squeeze(sino(:,kkk,:)) - alpha_ring.*r_x));
+    % Outer FISTA iterations loop
+    for i = 1:iterFISTA
+        
+        X_old = X;
+        t_old = t;
+        r_old = r;
+        
+        [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geom, vol_geom);
+        
+        if (lambdaR_L1 > 0)
+            % the ring removal part (Group-Huber fidelity)
+            for kkk = 1:anglesNumb
+                residual(:,kkk,:) =  squeeze(weights(:,kkk,:)).*(squeeze(sino_updt(:,kkk,:)) - (squeeze(sino(:,kkk,:)) - alpha_ring.*r_x));
+            end
+            vec = sum(residual,2);
+            if (SlicesZ > 1)
+                vec = squeeze(vec(:,1,:));
+            end
+            r = r_x - (1./L_const).*vec;
+        else
+            % no ring removal
+            residual = weights.*(sino_updt - sino);
         end
-        vec = sum(residual,2);
-        if (SlicesZ > 1)
-            vec = squeeze(vec(:,1,:));
+        
+        objective(i) = (0.5*norm(residual(:))^2)/(Detectors*anglesNumb*SlicesZ); % for the objective function output
+        
+        [id, x_temp] = astra_create_backprojection3d_cuda(residual, proj_geom, vol_geom);
+        
+        X = X_t - (1/L_const).*x_temp;
+        astra_mex_data3d('delete', sino_id);
+        astra_mex_data3d('delete', id);
+        
+        % regularization
+        if (lambdaFGP_TV > 0)
+            % FGP-TV regularization
+            if ((strcmp('2D', Dimension) == 1))
+                % 2D regularization
+                for kkk = 1:SlicesZ
+                    [X(:,:,kkk), f_val] = FGP_TV(single(X(:,:,kkk)), lambdaFGP_TV, IterationsRegul, tol, 'iso');
+                end
+            else
+                % 3D regularization
+                [X, f_val] = FGP_TV(single(X), lambdaFGP_TV, IterationsRegul, tol, 'iso');
+            end
+            objective(i) = objective(i) + f_val;
         end
-        r = r_x - (1./L_const).*vec;
-    else
-        % no ring removal
-        residual = weights.*(sino_updt - sino);
-    end
-    
-    objective(i) = (0.5*norm(residual(:))^2)/(Detectors*anglesNumb*SlicesZ); % for the objective function output
-    
-    [id, x_temp] = astra_create_backprojection3d_cuda(residual, proj_geom, vol_geom);
-    
-    X = X_t - (1/L_const).*x_temp;
-    astra_mex_data3d('delete', sino_id);
-    astra_mex_data3d('delete', id);
-    
-    % regularization
-     if (lambdaFGP_TV > 0)
-         % FGP-TV regularization
-        if ((strcmp('2D', Dimension) == 1))
-            % 2D regularization
-            for kkk = 1:SlicesZ
-                [X(:,:,kkk), f_val] = FGP_TV(single(X(:,:,kkk)), lambdaFGP_TV, IterationsRegul, tol, 'iso');
+        if (lambdaSB_TV > 0)
+            % Split Bregman regularization
+            if ((strcmp('2D', Dimension) == 1))
+                % 2D regularization
+                for kkk = 1:SlicesZ
+                    X(:,:,kkk) = SplitBregman_TV(single(X(:,:,kkk)), lambdaSB_TV, IterationsRegul, tol);  % (more memory efficent)
+                end
+            else
+                % 3D regularization
+                X = SplitBregman_TV(single(X), lambdaSB_TV, IterationsRegul, tol);  % (more memory efficent)
             end
-        else
-            % 3D regularization
-            [X, f_val] = FGP_TV(single(X), lambdaFGP_TV, IterationsRegul, tol, 'iso');
         end
-         objective(i) = objective(i) + f_val;
-     end
-     if (lambdaSB_TV > 0)
-         % Split Bregman regularization
-        if ((strcmp('2D', Dimension) == 1))
-            % 2D regularization
-            for kkk = 1:SlicesZ
-                X(:,:,kkk) = SplitBregman_TV(single(X(:,:,kkk)), lambdaSB_TV, IterationsRegul, tol);  % (more memory efficent)
+        if (lambdaHO > 0)
+            % Higher Order (LLT) regularization
+            X2 = zeros(N,N,SlicesZ,'single');
+            if ((strcmp('2D', Dimension) == 1))
+                % 2D regularization
+                for kkk = 1:SlicesZ
+                    X2(:,:,kkk) = LLT_model(single(X(:,:,kkk)), lambdaHO, tauHO, iterHO, 3.0e-05, 0);
+                end
+            else
+                % 3D regularization
+                X2 = LLT_model(single(X), lambdaHO, tauHO, iterHO, 3.0e-05, 0);
             end
-        else
-            % 3D regularization
-            X = SplitBregman_TV(single(X), lambdaSB_TV, IterationsRegul, tol);  % (more memory efficent)
+            X = 0.5.*(X + X2); % averaged combination of two solutions
         end
-     end
-     if (lambdaHO > 0)
-         % Higher Order (LLT) regularization
-        X2 = zeros(N,N,SlicesZ,'single');
-        if ((strcmp('2D', Dimension) == 1))
-            % 2D regularization
-            for kkk = 1:SlicesZ
-                X2(:,:,kkk) = LLT_model(single(X(:,:,kkk)), lambdaHO, tauHO, iterHO, 3.0e-05, 0);
+        if (lambdaPB > 0)
+            % Patch-Based regularization (can be slow on CPU)
+            if ((strcmp('2D', Dimension) == 1))
+                % 2D regularization
+                for kkk = 1:SlicesZ
+                    X(:,:,kkk) = PatchBased_Regul(single(X(:,:,kkk)), SearchW, SimilW, h_PB, lambdaPB);
+                end
+            else
+                X = PatchBased_Regul(single(X), SearchW, SimilW, h_PB, lambdaPB);
             end
-        else
-            % 3D regularization
-            X2 = LLT_model(single(X), lambdaHO, tauHO, iterHO, 3.0e-05, 0);
         end
-         X = 0.5.*(X + X2); % averaged combination of two solutions
-     end
-    if (lambdaPB > 0)
-        % Patch-Based regularization (can be slow on CPU)
-        if ((strcmp('2D', Dimension) == 1))
-            % 2D regularization
-            for kkk = 1:SlicesZ
-                X(:,:,kkk) = PatchBased_Regul(single(X(:,:,kkk)), SearchW, SimilW, h_PB, lambdaPB);
+        if (LambdaTGV > 0)
+                % Total Generalized variation (currently only 2D)
+                lamTGV1 = 1.1; % smoothing trade-off parameters, see Pock's paper
+                lamTGV2 = 0.5; % second-order term
+                for kkk = 1:SlicesZ
+                X(:,:,kkk) = TGV_PD(single(X(:,:,kkk)), LambdaTGV, lamTGV1, lamTGV2, IterationsRegul);
+                end
+        end
+        
+        if (lambdaR_L1 > 0)
+            r =  max(abs(r)-lambdaR_L1, 0).*sign(r); % soft-thresholding operator for ring vector
+        end
+        
+        t = (1 + sqrt(1 + 4*t^2))/2; % updating t
+        X_t = X + ((t_old-1)/t).*(X - X_old); % updating X
+        
+        if (lambdaR_L1 > 0)
+            r_x = r + ((t_old-1)/t).*(r - r_old); % updating r
+        end
+        
+        if (show == 1)
+            figure(10); imshow(X(:,:,slice), [0 maxvalplot]);
+            if (lambdaR_L1 > 0)
+                figure(11); plot(r); title('Rings offset vector')
             end
+            pause(0.01);
+        end
+        if (strcmp(X_ideal, 'none' ) == 0)
+            Resid_error(i) = RMSE(X(ROI), X_ideal(ROI));
+            fprintf('%s %i %s %s %.4f  %s %s %f \n', 'Iteration Number:', i, '|', 'Error RMSE:', Resid_error(i), '|', 'Objective:', objective(i));
         else
-            X = PatchBased_Regul(single(X), SearchW, SimilW, h_PB, lambdaPB);
+            fprintf('%s %i  %s %s %f \n', 'Iteration Number:', i, '|', 'Objective:', objective(i));
         end
-    end  
-    
-    
-    if (lambdaR_L1 > 0)
-        r =  max(abs(r)-lambdaR_L1, 0).*sign(r); % soft-thresholding operator for ring vector
     end
+else
+    % Ordered Subsets (OS) FISTA reconstruction routine (normally one order of magnitude faster than classical)
+    t = 1;
+    X_t = X;
+    proj_geomSUB = proj_geom;
     
-    t = (1 + sqrt(1 + 4*t^2))/2; % updating t
-    X_t = X + ((t_old-1)/t).*(X - X_old); % updating X
     
-    if (lambdaR_L1 > 0)
-        r_x = r + ((t_old-1)/t).*(r - r_old); % updating r
-    end
+    r = zeros(Detectors,SlicesZ, 'single'); % 2D array (for 3D data) of sparse "ring" vectors
+    r_x = r; % another ring variable
+    residual2 = zeros(size(sino),'single');
     
-    if (show == 1)
-        figure(10); imshow(X(:,:,slice), [0 maxvalplot]);
-        if (lambdaR_L1 > 0)
-            figure(11); plot(r); title('Rings offset vector')
+    % Outer FISTA iterations loop
+    for i = 1:iterFISTA
+        
+        % With OS approach it becomes trickier to correlate independent subsets, hence additional work is required
+        % one solution is to work with a full sinogram at times
+        if ((i >= 3) && (lambdaR_L1 > 0))
+            [sino_id2, sino_updt2] = astra_create_sino3d_cuda(X, proj_geom, vol_geom);
+            astra_mex_data3d('delete', sino_id2);
+        end
+        
+        % subsets loop
+        counterInd = 1;
+        for ss = 1:subsets
+            X_old = X;
+            t_old = t;
+            r_old = r;
+            
+            numProjSub = binsDiscr(ss); % the number of projections per subset
+            CurrSubIndeces = IndicesReorg(counterInd:(counterInd + numProjSub - 1)); % extract indeces attached to the subset
+            proj_geomSUB.ProjectionAngles = angles(CurrSubIndeces);
+            
+            if (lambdaR_L1 > 0)
+                
+                % the ring removal part (Group-Huber fidelity)
+                % first 2 iterations do additional work reconstructing whole dataset to ensure
+                % stablility
+                if (i < 3)
+                    [sino_id2, sino_updt2] = astra_create_sino3d_cuda(X_t, proj_geom, vol_geom);
+                    astra_mex_data3d('delete', sino_id2);
+                else
+                    [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geomSUB, vol_geom);
+                end
+                
+                for kkk = 1:anglesNumb
+                    residual2(:,kkk,:) = squeeze(weights(:,kkk,:)).*(squeeze(sino_updt2(:,kkk,:)) - (squeeze(sino(:,kkk,:)) - alpha_ring.*r_x));
+                end
+                
+                residual = zeros(Detectors, numProjSub, SlicesZ,'single');
+                for kkk = 1:numProjSub
+                    indC = CurrSubIndeces(kkk);
+                    if (i < 3)
+                        residual(:,kkk,:) =  squeeze(residual2(:,indC,:));
+                    else
+                        residual(:,kkk,:) =  squeeze(weights(:,indC,:)).*(squeeze(sino_updt(:,kkk,:)) - (squeeze(sino(:,indC,:)) - alpha_ring.*r_x));
+                    end
+                end
+                vec = sum(residual2,2);
+                if (SlicesZ > 1)
+                    vec = squeeze(vec(:,1,:));
+                end
+                r = r_x - (1./L_const).*vec;
+            else
+                [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geomSUB, vol_geom);
+                % no ring removal
+                residual = squeeze(weights(:,CurrSubIndeces,:)).*(sino_updt - squeeze(sino(:,CurrSubIndeces,:)));
+            end
+            
+            [id, x_temp] = astra_create_backprojection3d_cuda(residual, proj_geomSUB, vol_geom);
+            
+            X = X_t - (1/L_const).*x_temp;
+            astra_mex_data3d('delete', sino_id);
+            astra_mex_data3d('delete', id);
+            
+            % regularization
+            if (lambdaFGP_TV > 0)
+                % FGP-TV regularization
+                if ((strcmp('2D', Dimension) == 1))
+                    % 2D regularization
+                    for kkk = 1:SlicesZ
+                        [X(:,:,kkk), f_val] = FGP_TV(single(X(:,:,kkk)), lambdaFGP_TV/subsets, IterationsRegul, tol, 'iso');
+                    end
+                else
+                    % 3D regularization
+                    [X, f_val] = FGP_TV(single(X), lambdaFGP_TV/subsets, IterationsRegul, tol, 'iso');
+                end
+                objective(i) = objective(i) + f_val;
+            end
+            if (lambdaSB_TV > 0)
+                % Split Bregman regularization
+                if ((strcmp('2D', Dimension) == 1))
+                    % 2D regularization
+                    for kkk = 1:SlicesZ
+                        X(:,:,kkk) = SplitBregman_TV(single(X(:,:,kkk)), lambdaSB_TV/subsets, IterationsRegul, tol);  % (more memory efficent)
+                    end
+                else
+                    % 3D regularization
+                    X = SplitBregman_TV(single(X), lambdaSB_TV/subsets, IterationsRegul, tol);  % (more memory efficent)
+                end
+            end
+            if (lambdaHO > 0)
+                % Higher Order (LLT) regularization
+                X2 = zeros(N,N,SlicesZ,'single');
+                if ((strcmp('2D', Dimension) == 1))
+                    % 2D regularization
+                    for kkk = 1:SlicesZ
+                        X2(:,:,kkk) = LLT_model(single(X(:,:,kkk)), lambdaHO/subsets, tauHO/subsets, iterHO, 2.0e-05, 0);
+                    end
+                else
+                    % 3D regularization
+                    X2 = LLT_model(single(X), lambdaHO/subsets, tauHO/subsets, iterHO, 2.0e-05, 0);
+                end
+                X = 0.5.*(X + X2); % averaged combination of two solutions
+            end
+            if (lambdaPB > 0)
+                % Patch-Based regularization (can be slow on CPU)
+                if ((strcmp('2D', Dimension) == 1))
+                    % 2D regularization
+                    for kkk = 1:SlicesZ
+                        X(:,:,kkk) = PatchBased_Regul(single(X(:,:,kkk)), SearchW, SimilW, h_PB, lambdaPB/subsets);
+                    end
+                else
+                    X = PatchBased_Regul(single(X), SearchW, SimilW, h_PB, lambdaPB/subsets);
+                end
+            end
+            if (LambdaTGV > 0)
+                % Total Generalized variation (currently only 2D)
+                lamTGV1 = 1.1; % smoothing trade-off parameters, see Pock's paper
+                lamTGV2 = 0.5; % second-order term
+                for kkk = 1:SlicesZ
+                X(:,:,kkk) = TGV_PD(single(X(:,:,kkk)), LambdaTGV/subsets, lamTGV1, lamTGV2, IterationsRegul);
+                end
+            end           
+            
+            if (lambdaR_L1 > 0)
+                r =  max(abs(r)-lambdaR_L1, 0).*sign(r); % soft-thresholding operator for ring vector
+            end
+            
+            t = (1 + sqrt(1 + 4*t^2))/2; % updating t
+            X_t = X + ((t_old-1)/t).*(X - X_old); % updating X
+            
+            if (lambdaR_L1 > 0)
+                r_x = r + ((t_old-1)/t).*(r - r_old); % updating r
+            end
+            
+            counterInd = counterInd + numProjSub;
+        end
+        
+        if (show == 1)
+            figure(10); imshow(X(:,:,slice), [0 maxvalplot]);
+            if (lambdaR_L1 > 0)
+                figure(11); plot(r); title('Rings offset vector')
+            end
+            pause(0.01);
+        end
+        
+        if (strcmp(X_ideal, 'none' ) == 0)
+            Resid_error(i) = RMSE(X(ROI), X_ideal(ROI));
+            fprintf('%s %i %s %s %.4f  %s %s %f \n', 'Iteration Number:', i, '|', 'Error RMSE:', Resid_error(i), '|', 'Objective:', objective(i));
+        else
+            fprintf('%s %i  %s %s %f \n', 'Iteration Number:', i, '|', 'Objective:', objective(i));
         end
-        pause(0.01);
-    end
-    if (strcmp(X_ideal, 'none' ) == 0)
-        Resid_error(i) = RMSE(X(ROI), X_ideal(ROI));
-        fprintf('%s %i %s %s %.4f  %s %s %f \n', 'Iteration Number:', i, '|', 'Error RMSE:', Resid_error(i), '|', 'Objective:', objective(i));
-    else
-        fprintf('%s %i  %s %s %f \n', 'Iteration Number:', i, '|', 'Objective:', objective(i));
     end
 end
+
 output.Resid_error = Resid_error;
 output.objective = objective;
 output.L_const = L_const;
-output.sino = sino;
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
 end
diff --git a/main_func/regularizers_CPU/TGV_PD.c b/main_func/regularizers_CPU/TGV_PD.c
index d4bb787..6593d8e 100644
--- a/main_func/regularizers_CPU/TGV_PD.c
+++ b/main_func/regularizers_CPU/TGV_PD.c
@@ -37,9 +37,9 @@ limitations under the License.
  * figure;
  * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
  * u0 = Im + .03*randn(size(Im)); % adding noise
- * tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc;
+ * tic; u = TGV_PD(single(u0), 0.02, 1.3, 1, 550); toc;
  *
- * to compile with OMP support: mex TGV_PD.c  TGV_PD_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+ * to compile with OMP support: mex TGV_PD.c TGV_PD_core.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
  * References:
  * K. Bredies "Total Generalized Variation"
  *
@@ -92,7 +92,7 @@ void mexFunction(
         
         
          /*printf("%i \n", i);*/
-    L2 = 12.0; /*Lipshitz constant*/
+    L2 = 12.0f; /*Lipshitz constant*/
     tau = 1.0/pow(L2,0.5);
     sigma = 1.0/pow(L2,0.5);
     
-- 
cgit v1.2.3


From 69c2afc4670966c7e12420be7afd55ea69dfc8e8 Mon Sep 17 00:00:00 2001
From: Daniil Kazantsev <dkazanc@hotmail.com>
Date: Thu, 24 Aug 2017 12:46:12 +0100
Subject: a minor big with TGV being constatly swtiched one removed

---
 main_func/FISTA_REC.m | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'main_func')

diff --git a/main_func/FISTA_REC.m b/main_func/FISTA_REC.m
index 381fa34..a6e0ae5 100644
--- a/main_func/FISTA_REC.m
+++ b/main_func/FISTA_REC.m
@@ -98,7 +98,7 @@ else
         for i = 1:niter
             [id,x1] = astra_create_backprojection3d_cuda(sqweight.*y, proj_geomT, vol_geomT);
             s = norm(x1(:));
-            x1 = x1/s;
+            x1 = x1./s;
             [sino_id, y] = astra_create_sino3d_cuda(x1, proj_geomT, vol_geomT);
             y = sqweight.*y;
             astra_mex_data3d('delete', sino_id);
@@ -196,7 +196,7 @@ end
 if (isfield(params,'Regul_LambdaTGV'))
     LambdaTGV = params.Regul_LambdaTGV;
 else
-    LambdaTGV = 0.01;
+    LambdaTGV = 0;
 end
 if (isfield(params,'Ring_LambdaR_L1'))
     lambdaR_L1 = params.Ring_LambdaR_L1;
@@ -369,6 +369,7 @@ if (subsets == 0)
                 X2 = LLT_model(single(X), lambdaHO, tauHO, iterHO, 3.0e-05, 0);
             end
             X = 0.5.*(X + X2); % averaged combination of two solutions
+            
         end
         if (lambdaPB > 0)
             % Patch-Based regularization (can be slow on CPU)
@@ -384,7 +385,7 @@ if (subsets == 0)
         if (LambdaTGV > 0)
                 % Total Generalized variation (currently only 2D)
                 lamTGV1 = 1.1; % smoothing trade-off parameters, see Pock's paper
-                lamTGV2 = 0.5; % second-order term
+                lamTGV2 = 0.8; % second-order term
                 for kkk = 1:SlicesZ
                 X(:,:,kkk) = TGV_PD(single(X(:,:,kkk)), LambdaTGV, lamTGV1, lamTGV2, IterationsRegul);
                 end
-- 
cgit v1.2.3


From ac2d90b52b15127eadbbf0d2f300d9da31f755c7 Mon Sep 17 00:00:00 2001
From: algol <algol@algol-UX305FA>
Date: Thu, 7 Sep 2017 14:59:36 +0100
Subject: parallel3D updated for big data in Matlab

---
 main_func/FISTA_REC.m | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

(limited to 'main_func')

diff --git a/main_func/FISTA_REC.m b/main_func/FISTA_REC.m
index a6e0ae5..8dd569f 100644
--- a/main_func/FISTA_REC.m
+++ b/main_func/FISTA_REC.m
@@ -305,7 +305,17 @@ if (subsets == 0)
         t_old = t;
         r_old = r;
         
+        % if the geometry is parallel use slice-by-slice projection-backprojection routine 
+        if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'parallel3d'))
+			sino_updt = zeros(size(sino),'single');
+			for kkk = 1:SlicesZ			
+			  [sino_id, sino_updt(:,kkk,:)] = astra_create_sino3d_cuda(X_t(:,:,kkk), proj_geomT, vol_geomT);
+			  astra_mex_data3d('delete', sino_id);
+			end
+		else
+		% for divergent 3D geometry (for Matlab watch the GPU memory overflow)
         [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geom, vol_geom);
+        end
         
         if (lambdaR_L1 > 0)
             % the ring removal part (Group-Huber fidelity)
@@ -324,8 +334,16 @@ if (subsets == 0)
         
         objective(i) = (0.5*norm(residual(:))^2)/(Detectors*anglesNumb*SlicesZ); % for the objective function output
         
+        % if the geometry is parallel use slice-by-slice projection-backprojection routine 
+        if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'parallel3d'))
+        x_temp = zeros(size(X),'single');
+			for kkk = 1:SlicesZ	        
+			[id, x_temp(:,:,kkk)] = astra_create_backprojection3d_cuda(squeeze(residual(:,kkk,:)), proj_geomT, vol_geomT);
+			astra_mex_data3d('delete', id);
+			end
+        else
         [id, x_temp] = astra_create_backprojection3d_cuda(residual, proj_geom, vol_geom);
-        
+        end
         X = X_t - (1/L_const).*x_temp;
         astra_mex_data3d('delete', sino_id);
         astra_mex_data3d('delete', id);
-- 
cgit v1.2.3


From 078b9e2db2e25d663a1140cc71ee4d16c36cc161 Mon Sep 17 00:00:00 2001
From: "Jakob Jorgensen, algol@harwell" <jakj@dtu.dk>
Date: Thu, 7 Sep 2017 15:09:50 +0100
Subject: bugsf

---
 main_func/FISTA_REC.m | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'main_func')

diff --git a/main_func/FISTA_REC.m b/main_func/FISTA_REC.m
index 8dd569f..00e59ab 100644
--- a/main_func/FISTA_REC.m
+++ b/main_func/FISTA_REC.m
@@ -104,7 +104,7 @@ else
             astra_mex_data3d('delete', sino_id);
             astra_mex_data3d('delete', id);
         end
-        clear proj_geomT vol_geomT
+        %clear proj_geomT vol_geomT
     else
         % divergen beam geometry
         fprintf('%s \n', 'Calculating Lipshitz constant for divergen beam geometry... will take some time!');
@@ -309,7 +309,7 @@ if (subsets == 0)
         if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'parallel3d'))
 			sino_updt = zeros(size(sino),'single');
 			for kkk = 1:SlicesZ			
-			  [sino_id, sino_updt(:,kkk,:)] = astra_create_sino3d_cuda(X_t(:,:,kkk), proj_geomT, vol_geomT);
+			  [sino_id, sino_updt(:,:,kkk)] = astra_create_sino3d_cuda(X_t(:,:,kkk), proj_geomT, vol_geomT);
 			  astra_mex_data3d('delete', sino_id);
 			end
 		else
@@ -338,7 +338,7 @@ if (subsets == 0)
         if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'parallel3d'))
         x_temp = zeros(size(X),'single');
 			for kkk = 1:SlicesZ	        
-			[id, x_temp(:,:,kkk)] = astra_create_backprojection3d_cuda(squeeze(residual(:,kkk,:)), proj_geomT, vol_geomT);
+			[id, x_temp(:,:,kkk)] = astra_create_backprojection3d_cuda(squeeze(residual(:,:,kkk)), proj_geomT, vol_geomT);
 			astra_mex_data3d('delete', id);
 			end
         else
-- 
cgit v1.2.3


From 62ab6cd46c3f1c189328c8d41899db7444c7ac29 Mon Sep 17 00:00:00 2001
From: Daniil Kazantsev <dkazanc@hotmail.com>
Date: Mon, 11 Sep 2017 09:36:13 +0100
Subject: 2 new GPU regularizers, FGP objective fixed, FISTA_REC updated

---
 main_func/FISTA_REC.m                              | 138 ++++++++---
 main_func/regularizers_CPU/FGP_TV.c                |  47 +---
 main_func/regularizers_CPU/FGP_TV_core.c           |  49 ++++
 main_func/regularizers_CPU/FGP_TV_core.h           |   2 +
 .../Diffus_HO/Diff4thHajiaboli_GPU.cpp             | 114 +++++++++
 .../Diffus_HO/Diff4th_GPU_kernel.cu                | 270 +++++++++++++++++++++
 .../Diffus_HO/Diff4th_GPU_kernel.h                 |   6 +
 main_func/regularizers_GPU/NL_Regul/NLM_GPU.cpp    | 171 +++++++++++++
 .../regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu    | 239 ++++++++++++++++++
 .../regularizers_GPU/NL_Regul/NLM_GPU_kernel.h     |   6 +
 10 files changed, 974 insertions(+), 68 deletions(-)
 create mode 100644 main_func/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp
 create mode 100644 main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu
 create mode 100644 main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h
 create mode 100644 main_func/regularizers_GPU/NL_Regul/NLM_GPU.cpp
 create mode 100644 main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu
 create mode 100644 main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h

(limited to 'main_func')

diff --git a/main_func/FISTA_REC.m b/main_func/FISTA_REC.m
index 00e59ab..edccbe1 100644
--- a/main_func/FISTA_REC.m
+++ b/main_func/FISTA_REC.m
@@ -1,6 +1,15 @@
 function [X,  output] = FISTA_REC(params)
 
 % <<<< FISTA-based reconstruction routine using ASTRA-toolbox >>>>
+% This code solves regularised PWLS problem using FISTA approach.
+% The code contains multiple regularisation penalties as well as it can be
+% accelerated by using ordered-subset version. Various projection
+% geometries supported.
+
+% DISCLAIMER
+% It is recommended to use ASTRA version 1.8 or later in order to avoid
+% crashing due to GPU memory overflow for big datasets
+
 % ___Input___:
 % params.[] file:
 %----------------General Parameters------------------------
@@ -18,11 +27,17 @@ function [X,  output] = FISTA_REC(params)
 %       2 .Regul_Lambda_SBTV (SplitBregman-TV regularization parameter)
 %       3 .Regul_LambdaLLT (Higher order LLT regularization parameter)
 %          3.1 .Regul_tauLLT (time step parameter for LLT (HO) term)
-%       4 .Regul_LambdaPatchBased (Patch-based nonlocal regularization parameter)
+%       4 .Regul_LambdaPatchBased_CPU (Patch-based nonlocal regularization parameter)
 %          4.1  .Regul_PB_SearchW (ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window))
 %          4.2  .Regul_PB_SimilW (ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window))
 %          4.3  .Regul_PB_h (PB penalty function threshold)
-%       5 .Regul_LambdaTGV (Total Generalized variation regularization parameter)
+%       5 .Regul_LambdaPatchBased_GPU (Patch-based nonlocal regularization parameter)
+%          5.1  .Regul_PB_SearchW (ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window))
+%          5.2  .Regul_PB_SimilW (ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window))
+%          5.3  .Regul_PB_h (PB penalty function threshold)
+%       6 .Regul_LambdaDiffHO (Higher-Order Diffusion regularization parameter)
+%          6.1  .Regul_DiffHO_EdgePar (edge-preserving noise related parameter)
+%       7 .Regul_LambdaTGV (Total Generalized variation regularization parameter)
 %       - .Regul_tol (tolerance to terminate regul iterations, default 1.0e-04)
 %       - .Regul_Iterations (iterations for the selected penalty, default 25)
 %       - .Regul_Dimension ('2D' or '3D' way to apply regularization, '3D' is the default)
@@ -173,11 +188,16 @@ if (isfield(params,'Regul_tauLLT'))
 else
     tauHO = 0.0001;
 end
-if (isfield(params,'Regul_LambdaPatchBased'))
-    lambdaPB = params.Regul_LambdaPatchBased;
+if (isfield(params,'Regul_LambdaPatchBased_CPU'))
+    lambdaPB = params.Regul_LambdaPatchBased_CPU;
 else
     lambdaPB = 0;
 end
+if (isfield(params,'Regul_LambdaPatchBased_GPU'))
+    lambdaPB_GPU = params.Regul_LambdaPatchBased_GPU;
+else
+    lambdaPB_GPU = 0;
+end
 if (isfield(params,'Regul_PB_SearchW'))
     SearchW = params.Regul_PB_SearchW;
 else
@@ -193,6 +213,16 @@ if (isfield(params,'Regul_PB_h'))
 else
     h_PB = 0.1; % default
 end
+if (isfield(params,'Regul_LambdaDiffHO'))
+    LambdaDiff_HO = params.Regul_LambdaDiffHO;
+else
+    LambdaDiff_HO = 0;
+end
+if (isfield(params,'Regul_DiffHO_EdgePar'))
+    LambdaDiff_HO_EdgePar = params.Regul_DiffHO_EdgePar;
+else
+    LambdaDiff_HO_EdgePar = 0.01;
+end
 if (isfield(params,'Regul_LambdaTGV'))
     LambdaTGV = params.Regul_LambdaTGV;
 else
@@ -305,16 +335,16 @@ if (subsets == 0)
         t_old = t;
         r_old = r;
         
-        % if the geometry is parallel use slice-by-slice projection-backprojection routine 
+        % if the geometry is parallel use slice-by-slice projection-backprojection routine
         if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'parallel3d'))
-			sino_updt = zeros(size(sino),'single');
-			for kkk = 1:SlicesZ			
-			  [sino_id, sino_updt(:,:,kkk)] = astra_create_sino3d_cuda(X_t(:,:,kkk), proj_geomT, vol_geomT);
-			  astra_mex_data3d('delete', sino_id);
-			end
-		else
-		% for divergent 3D geometry (for Matlab watch the GPU memory overflow)
-        [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geom, vol_geom);
+            sino_updt = zeros(size(sino),'single');
+            for kkk = 1:SlicesZ
+                [sino_id, sino_updt(:,:,kkk)] = astra_create_sino3d_cuda(X_t(:,:,kkk), proj_geomT, vol_geomT);
+                astra_mex_data3d('delete', sino_id);
+            end
+        else
+            % for divergent 3D geometry (watch the GPU memory overflow in earlier ASTRA versions < 1.8)
+            [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geom, vol_geom);
         end
         
         if (lambdaR_L1 > 0)
@@ -332,17 +362,17 @@ if (subsets == 0)
             residual = weights.*(sino_updt - sino);
         end
         
-        objective(i) = (0.5*norm(residual(:))^2)/(Detectors*anglesNumb*SlicesZ); % for the objective function output
+        objective(i) = (0.5*sum(residual(:).^2)); % for the objective function output
         
-        % if the geometry is parallel use slice-by-slice projection-backprojection routine 
+        % if the geometry is parallel use slice-by-slice projection-backprojection routine
         if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'parallel3d'))
-        x_temp = zeros(size(X),'single');
-			for kkk = 1:SlicesZ	        
-			[id, x_temp(:,:,kkk)] = astra_create_backprojection3d_cuda(squeeze(residual(:,:,kkk)), proj_geomT, vol_geomT);
-			astra_mex_data3d('delete', id);
-			end
+            x_temp = zeros(size(X),'single');
+            for kkk = 1:SlicesZ
+                [id, x_temp(:,:,kkk)] = astra_create_backprojection3d_cuda(squeeze(residual(:,:,kkk)), proj_geomT, vol_geomT);
+                astra_mex_data3d('delete', id);
+            end
         else
-        [id, x_temp] = astra_create_backprojection3d_cuda(residual, proj_geom, vol_geom);
+            [id, x_temp] = astra_create_backprojection3d_cuda(residual, proj_geom, vol_geom);
         end
         X = X_t - (1/L_const).*x_temp;
         astra_mex_data3d('delete', sino_id);
@@ -360,7 +390,7 @@ if (subsets == 0)
                 % 3D regularization
                 [X, f_val] = FGP_TV(single(X), lambdaFGP_TV, IterationsRegul, tol, 'iso');
             end
-            objective(i) = objective(i) + f_val;
+            objective(i) = (objective(i) + f_val)./(Detectors*anglesNumb*SlicesZ);
         end
         if (lambdaSB_TV > 0)
             % Split Bregman regularization
@@ -390,7 +420,7 @@ if (subsets == 0)
             
         end
         if (lambdaPB > 0)
-            % Patch-Based regularization (can be slow on CPU)
+            % Patch-Based regularization (can be very slow on CPU)
             if ((strcmp('2D', Dimension) == 1))
                 % 2D regularization
                 for kkk = 1:SlicesZ
@@ -400,13 +430,35 @@ if (subsets == 0)
                 X = PatchBased_Regul(single(X), SearchW, SimilW, h_PB, lambdaPB);
             end
         end
-        if (LambdaTGV > 0)
-                % Total Generalized variation (currently only 2D)
-                lamTGV1 = 1.1; % smoothing trade-off parameters, see Pock's paper
-                lamTGV2 = 0.8; % second-order term
+        if (lambdaPB_GPU > 0)
+            % Patch-Based regularization (GPU CUDA implementation)
+            if ((strcmp('2D', Dimension) == 1))
+                % 2D regularization
                 for kkk = 1:SlicesZ
-                X(:,:,kkk) = TGV_PD(single(X(:,:,kkk)), LambdaTGV, lamTGV1, lamTGV2, IterationsRegul);
+                    X(:,:,kkk) = NLM_GPU(single(X(:,:,kkk)), SearchW, SimilW, h_PB, lambdaPB_GPU);
                 end
+            else
+                X = NLM_GPU(single(X), SearchW, SimilW, h_PB, lambdaPB_GPU);
+            end
+        end
+        if (LambdaDiff_HO > 0)
+            % Higher-order diffusion penalty (GPU CUDA implementation)
+            if ((strcmp('2D', Dimension) == 1))
+                % 2D regularization
+                for kkk = 1:SlicesZ
+                    X(:,:,kkk) = Diff4thHajiaboli_GPU(single(X(:,:,kkk)), LambdaDiff_HO_EdgePar, LambdaDiff_HO, IterationsRegul);
+                end
+            else
+                X = Diff4thHajiaboli_GPU(X, LambdaDiff_HO_EdgePar, LambdaDiff_HO, IterationsRegul);
+            end
+        end
+        if (LambdaTGV > 0)
+            % Total Generalized variation (currently only 2D)
+            lamTGV1 = 1.1; % smoothing trade-off parameters, see Pock's paper
+            lamTGV2 = 0.8; % second-order term
+            for kkk = 1:SlicesZ
+                X(:,:,kkk) = TGV_PD(single(X(:,:,kkk)), LambdaTGV, lamTGV1, lamTGV2, IterationsRegul);
+            end
         end
         
         if (lambdaR_L1 > 0)
@@ -470,7 +522,7 @@ else
                 
                 % the ring removal part (Group-Huber fidelity)
                 % first 2 iterations do additional work reconstructing whole dataset to ensure
-                % stablility
+                % the stablility
                 if (i < 3)
                     [sino_id2, sino_updt2] = astra_create_sino3d_cuda(X_t, proj_geom, vol_geom);
                     astra_mex_data3d('delete', sino_id2);
@@ -546,7 +598,7 @@ else
                     % 3D regularization
                     X2 = LLT_model(single(X), lambdaHO/subsets, tauHO/subsets, iterHO, 2.0e-05, 0);
                 end
-                X = 0.5.*(X + X2); % averaged combination of two solutions
+                X = 0.5.*(X + X2); % the averaged combination of two solutions
             end
             if (lambdaPB > 0)
                 % Patch-Based regularization (can be slow on CPU)
@@ -559,14 +611,36 @@ else
                     X = PatchBased_Regul(single(X), SearchW, SimilW, h_PB, lambdaPB/subsets);
                 end
             end
+            if (lambdaPB_GPU > 0)
+                % Patch-Based regularization (GPU CUDA implementation)
+                if ((strcmp('2D', Dimension) == 1))
+                    % 2D regularization
+                    for kkk = 1:SlicesZ
+                        X(:,:,kkk) = NLM_GPU(single(X(:,:,kkk)), SearchW, SimilW, h_PB, lambdaPB_GPU);
+                    end
+                else
+                    X = NLM_GPU(single(X), SearchW, SimilW, h_PB, lambdaPB_GPU);
+                end
+            end
+            if (LambdaDiff_HO > 0)
+                % Higher-order diffusion penalty (GPU CUDA implementation)
+                if ((strcmp('2D', Dimension) == 1))
+                    % 2D regularization
+                    for kkk = 1:SlicesZ
+                        X(:,:,kkk) = Diff4thHajiaboli_GPU(single(X(:,:,kkk)), LambdaDiff_HO_EdgePar, LambdaDiff_HO, round(IterationsRegul/subsets));
+                    end
+                else
+                    X = Diff4thHajiaboli_GPU(X, LambdaDiff_HO_EdgePar, LambdaDiff_HO, round(IterationsRegul/subsets));
+                end
+            end
             if (LambdaTGV > 0)
                 % Total Generalized variation (currently only 2D)
                 lamTGV1 = 1.1; % smoothing trade-off parameters, see Pock's paper
                 lamTGV2 = 0.5; % second-order term
                 for kkk = 1:SlicesZ
-                X(:,:,kkk) = TGV_PD(single(X(:,:,kkk)), LambdaTGV/subsets, lamTGV1, lamTGV2, IterationsRegul);
+                    X(:,:,kkk) = TGV_PD(single(X(:,:,kkk)), LambdaTGV/subsets, lamTGV1, lamTGV2, IterationsRegul);
                 end
-            end           
+            end
             
             if (lambdaR_L1 > 0)
                 r =  max(abs(r)-lambdaR_L1, 0).*sign(r); % soft-thresholding operator for ring vector
diff --git a/main_func/regularizers_CPU/FGP_TV.c b/main_func/regularizers_CPU/FGP_TV.c
index 5d8cfb9..cfe5b9e 100644
--- a/main_func/regularizers_CPU/FGP_TV.c
+++ b/main_func/regularizers_CPU/FGP_TV.c
@@ -54,7 +54,7 @@ void mexFunction(
 {
     int number_of_dims, iter, dimX, dimY, dimZ, ll, j, count, methTV;
     const int  *dim_array;
-    float *A, *D=NULL, *D_old=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_old=NULL, *P2_old=NULL, *P3_old=NULL, *R1=NULL, *R2=NULL, *R3=NULL, lambda, tk, tkp1, re, re1, re_old, epsil, funcval;
+    float *A, *D=NULL, *D_old=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_old=NULL, *P2_old=NULL, *P3_old=NULL, *R1=NULL, *R2=NULL, *R3=NULL, lambda, tk, tkp1, re, re1, re_old, epsil;
     
     number_of_dims = mxGetNumberOfDimensions(prhs[0]);
     dim_array = mxGetDimensions(prhs[0]);
@@ -78,7 +78,6 @@ void mexFunction(
         mxFree(penalty_type);
     }
     /*output function value (last iteration) */
-    funcval = 0.0f;
     plhs[1] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL);  
     float *funcvalA = (float *) mxGetData(plhs[1]);
         
@@ -117,7 +116,7 @@ void mexFunction(
             
             /*updating R and t*/
             tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
-            Rupd_func2D(P1, P1_old, P2, P2_old, R1, R2, tkp1, tk, dimX, dimY);
+            Rupd_func2D(P1, P1_old, P2, P2_old, R1, R2, tkp1, tk, dimX, dimY);                
             
             /* calculate norm */
             re = 0.0f; re1 = 0.0f;
@@ -129,23 +128,17 @@ void mexFunction(
             re = sqrt(re)/sqrt(re1);
             if (re < epsil)  count++;
             if (count > 3) {
-                Obj_func2D(A, D, P1, P2, lambda, dimX, dimY);            
-                funcval = 0.0f; 
-                for(j=0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j],2);                           
-                funcvalA[0] = sqrt(funcval);     
+                Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY); 
                 break; }
             
             /* check that the residual norm is decreasing */
             if (ll > 2) {
                 if (re > re_old) {
-                    Obj_func2D(A, D, P1, P2, lambda, dimX, dimY);            
-                    funcval = 0.0f; 
-                    for(j=0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j],2);                           
-                    funcvalA[0] = sqrt(funcval);                     
+                    Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY);                                   
                     break; }}            
             re_old = re;
-            /*printf("%f %i %i \n", re, ll, count); */
-            
+            /*printf("%f %i %i \n", re, ll, count); */                      
+          
             /*storing old values*/
             copyIm(D, D_old, dimX, dimY, dimZ);
             copyIm(P1, P1_old, dimX, dimY, dimZ);
@@ -153,12 +146,7 @@ void mexFunction(
             tk = tkp1;
             
             /* calculating the objective function value */
-            if (ll == (iter-1)) {
-            Obj_func2D(A, D, P1, P2, lambda, dimX, dimY);            
-            funcval = 0.0f; 
-            for(j=0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j],2);                           
-            funcvalA[0] = sqrt(funcval);            
-            }
+            if (ll == (iter-1)) Obj_func_CALC2D(A, D, funcvalA, lambda, dimX, dimY);            
         }
         printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
     }
@@ -202,21 +190,14 @@ void mexFunction(
             /* stop if the norm residual is less than the tolerance EPS */
             if (re < epsil)  count++;
             if (count > 3) {
-                Obj_func3D(A, D, P1, P2, P3,lambda, dimX, dimY, dimZ);
-                funcval = 0.0f; 
-                for(j=0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j],2);                           
-                funcvalA[0] = sqrt(funcval);                  
+                Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ);                            
                 break;}
             
             /* check that the residual norm is decreasing */
             if (ll > 2) {
                 if (re > re_old) {
-                Obj_func3D(A, D, P1, P2, P3,lambda, dimX, dimY, dimZ);
-                funcval = 0.0f; 
-                for(j=0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j],2);                           
-                funcvalA[0] = sqrt(funcval);  
-                break; }}
-            
+                Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ);
+                }}            
             re_old = re;
             /*printf("%f %i %i \n", re, ll, count); */
             
@@ -227,13 +208,7 @@ void mexFunction(
             copyIm(P3, P3_old, dimX, dimY, dimZ);
             tk = tkp1;
             
-            if (ll == (iter-1)) {
-            Obj_func3D(A, D, P1, P2, P3,lambda, dimX, dimY, dimZ);
-            funcval = 0.0f; 
-            for(j=0; j<dimX*dimY*dimZ; j++) funcval += pow(D[j],2);                           
-            funcvalA[0] = sqrt(funcval);            
-            }
-            
+            if (ll == (iter-1)) Obj_func_CALC3D(A, D, funcvalA, lambda, dimX, dimY, dimZ);            
         }
         printf("FGP-TV iterations stopped at iteration %i with the function value %f \n", ll, funcvalA[0]);
     }
diff --git a/main_func/regularizers_CPU/FGP_TV_core.c b/main_func/regularizers_CPU/FGP_TV_core.c
index a55991c..9cde327 100644
--- a/main_func/regularizers_CPU/FGP_TV_core.c
+++ b/main_func/regularizers_CPU/FGP_TV_core.c
@@ -47,6 +47,30 @@ limitations under the License.
 
 /* 2D-case related Functions */
 /*****************************************************************/
+float Obj_func_CALC2D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY)
+{   
+    int i,j;
+    float f1, f2, val1, val2;
+    
+    /*data-related term */
+    f1 = 0.0f;
+    for(i=0; i<dimX*dimY; i++) f1 += pow(D[i] - A[i],2);    
+    
+    /*TV-related term */
+    f2 = 0.0f;
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            /* boundary conditions  */
+            if (i == dimX-1) {val1 = 0.0f;} else {val1 = A[(i+1)*dimY + (j)] - A[(i)*dimY + (j)];}
+            if (j == dimY-1) {val2 = 0.0f;} else {val2 = A[(i)*dimY + (j+1)] - A[(i)*dimY + (j)];}    
+            f2 += sqrt(pow(val1,2) + pow(val2,2));
+        }}  
+    
+    /* sum of two terms */
+    funcvalA[0] = 0.5f*f1 + lambda*f2;     
+    return *funcvalA;
+}
+
 float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY)
 {
 	float val1, val2;
@@ -131,6 +155,31 @@ float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1,
 
 /* 3D-case related Functions */
 /*****************************************************************/
+float Obj_func_CALC3D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY, int dimZ)
+{   
+    int i,j,k;
+    float f1, f2, val1, val2, val3;
+    
+    /*data-related term */
+    f1 = 0.0f;
+    for(i=0; i<dimX*dimY*dimZ; i++) f1 += pow(D[i] - A[i],2);    
+    
+    /*TV-related term */
+    f2 = 0.0f;
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+            /* boundary conditions  */
+            if (i == dimX-1) {val1 = 0.0f;} else {val1 = A[(dimX*dimY)*k + (i+1)*dimY + (j)] - A[(dimX*dimY)*k + (i)*dimY + (j)];}
+            if (j == dimY-1) {val2 = 0.0f;} else {val2 = A[(dimX*dimY)*k + (i)*dimY + (j+1)] - A[(dimX*dimY)*k + (i)*dimY + (j)];}    
+            if (k == dimZ-1) {val3 = 0.0f;} else {val3 = A[(dimX*dimY)*(k+1) + (i)*dimY + (j)] - A[(dimX*dimY)*k + (i)*dimY + (j)];}    
+            f2 += sqrt(pow(val1,2) + pow(val2,2)  + pow(val3,2));
+        }}}     
+    /* sum of two terms */
+    funcvalA[0] = 0.5f*f1 + lambda*f2;     
+    return *funcvalA;
+}
+
 float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ)
 {
 	float val1, val2, val3;
diff --git a/main_func/regularizers_CPU/FGP_TV_core.h b/main_func/regularizers_CPU/FGP_TV_core.h
index e5328fb..697fd84 100644
--- a/main_func/regularizers_CPU/FGP_TV_core.h
+++ b/main_func/regularizers_CPU/FGP_TV_core.h
@@ -29,8 +29,10 @@ float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dim
 float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
 float Proj_func2D(float *P1, float *P2, int methTV, int dimX, int dimY);
 float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int dimX, int dimY);
+float Obj_func_CALC2D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY);
 
 float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ);
 float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ);
 float Proj_func3D(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ);
 float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int dimX, int dimY, int dimZ);
+float Obj_func_CALC3D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY, int dimZ);
diff --git a/main_func/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp b/main_func/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp
new file mode 100644
index 0000000..5a8c7c0
--- /dev/null
+++ b/main_func/regularizers_GPU/Diffus_HO/Diff4thHajiaboli_GPU.cpp
@@ -0,0 +1,114 @@
+#include "mex.h"
+#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include <iostream>
+#include "Diff4th_GPU_kernel.h"
+
+/*
+ * 2D and 3D CUDA implementation of the 4th order PDE denoising model by Hajiaboli
+ *
+ * Reference :
+ * "An anisotropic fourth-order diffusion filter for image noise removal" by M. Hajiaboli
+ * 
+ * Example
+ * figure;
+ * Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+ * u0 = Im + .05*randn(size(Im)); % adding noise
+ * u = Diff4thHajiaboli_GPU(single(u0), 0.02, 150);
+ * subplot (1,2,1); imshow(u0,[ ]); title('Noisy Image')
+ * subplot (1,2,2); imshow(u,[ ]); title('Denoised Image')
+ *
+ *
+ * Linux/Matlab compilation:
+ * compile in terminal: nvcc -Xcompiler -fPIC -shared -o Diff4th_GPU_kernel.o Diff4th_GPU_kernel.cu
+ * then compile in Matlab: mex -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart Diff4thHajiaboli_GPU.cpp Diff4th_GPU_kernel.o
+ */
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+{
+    int numdims, dimZ, size;
+    float *A, *B, *A_L, *B_L;
+    const int *dims;    
+    
+    numdims = mxGetNumberOfDimensions(prhs[0]);
+    dims = mxGetDimensions(prhs[0]);       
+    
+    float sigma = (float)mxGetScalar(prhs[1]); /* edge-preserving parameter */
+    float lambda = (float)mxGetScalar(prhs[2]); /* regularization parameter */
+    int iter = (int)mxGetScalar(prhs[3]); /* iterations number */
+    
+    if (numdims == 2)  {
+        
+        int N, M, Z, i, j;
+        Z = 0; // for the 2D case
+        float tau = 0.01; // time step is sufficiently small for an explicit methods
+        
+        /*Input data*/
+        A = (float*)mxGetData(prhs[0]);        
+        N = dims[0] + 2;
+        M = dims[1] + 2;
+        A_L = (float*)mxGetData(mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL));
+        B_L = (float*)mxGetData(mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL));
+        
+        /*Output data*/
+        B = (float*)mxGetData(plhs[0] = mxCreateNumericMatrix(dims[0], dims[1], mxSINGLE_CLASS, mxREAL));        
+        
+        // copy A to the bigger A_L with boundaries
+        #pragma omp parallel for shared(A_L, A) private(i,j)
+        for (i=0; i < N; i++) {
+            for (j=0; j < M; j++) {
+                if (((i > 0) && (i < N-1)) &&  ((j > 0) && (j < M-1)))  A_L[i*M+j] = A[(i-1)*(dims[1])+(j-1)];
+            }}
+        
+        // Running CUDA code here
+        Diff4th_GPU_kernel(A_L, B_L, N, M, Z, (float)sigma, iter, (float)tau, lambda);
+        
+        // copy the processed B_L to a smaller B
+        #pragma omp parallel for shared(B_L, B) private(i,j)
+        for (i=0; i < N; i++) {
+            for (j=0; j < M; j++) {
+                if (((i > 0) && (i < N-1)) &&  ((j > 0) && (j < M-1)))   B[(i-1)*(dims[1])+(j-1)] = B_L[i*M+j];
+            }}
+    }
+    if (numdims == 3)  {
+        //  3D image denoising / regularization
+        int N, M, Z, i, j, k;
+        float tau = 0.0007; // Time Step is small for an explicit methods
+        A = (float*)mxGetData(prhs[0]);
+        N = dims[0] + 2;
+        M = dims[1] + 2;
+        Z = dims[2] + 2;
+        int N_dims[] = {N, M, Z};
+        A_L = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
+        B_L = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
+        
+        /* output data */
+        B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dims, mxSINGLE_CLASS, mxREAL));
+        
+        // copy A to the bigger A_L with boundaries
+        #pragma omp parallel for shared(A_L, A) private(i,j,k)
+        for (i=0; i < N; i++) {
+            for (j=0; j < M; j++) {
+                for (k=0; k < Z; k++) {
+                    if (((i > 0) && (i < N-1)) &&  ((j > 0) && (j < M-1)) &&  ((k > 0) && (k < Z-1))) {
+                        A_L[(N*M)*(k)+(i)*M+(j)] = A[(dims[0]*dims[1])*(k-1)+(i-1)*dims[1]+(j-1)];
+                    }}}}
+        
+        // Running CUDA kernel here for diffusivity
+        Diff4th_GPU_kernel(A_L, B_L, N, M, Z, (float)sigma, iter, (float)tau, lambda);
+        
+        // copy the processed B_L to a smaller B
+        #pragma omp parallel for shared(B_L, B) private(i,j,k)
+        for (i=0; i < N; i++) {
+            for (j=0; j < M; j++) {
+                for (k=0; k < Z; k++) {
+                    if (((i > 0) && (i < N-1)) &&  ((j > 0) && (j < M-1)) &&  ((k > 0) && (k < Z-1))) {
+                        B[(dims[0]*dims[1])*(k-1)+(i-1)*dims[1]+(j-1)] = B_L[(N*M)*(k)+(i)*M+(j)];
+                    }}}}
+    }
+}
\ No newline at end of file
diff --git a/main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu b/main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu
new file mode 100644
index 0000000..178af00
--- /dev/null
+++ b/main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.cu
@@ -0,0 +1,270 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include "Diff4th_GPU_kernel.h"
+
+#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)
+
+inline void __checkCudaErrors(cudaError err, const char *file, const int line)
+{
+    if (cudaSuccess != err)
+    {
+        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
+                file, line, (int)err, cudaGetErrorString(err));
+        exit(EXIT_FAILURE);
+    }
+}
+
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+#define sizeT (sizeX*sizeY*sizeZ)
+#define epsilon 0.00000001
+
+/////////////////////////////////////////////////
+// 2D Image denosing - Second Step (The second derrivative)
+__global__ void Diff4th2D_derriv(float* B, float* A, float *A0, int N, int M, float sigma, int iter, float tau, float lambda)
+{
+    float gradXXc = 0, gradYYc = 0;
+    int i = blockIdx.x*blockDim.x + threadIdx.x;
+    int j = blockIdx.y*blockDim.y + threadIdx.y;
+    
+    int index = j + i*N;
+    
+    if (((i < 1) || (i > N-2)) || ((j < 1) || (j > M-2))) {
+        return;    }
+    
+    int indexN = (j)+(i-1)*(N); if (A[indexN] == 0) indexN = index;
+    int indexS = (j)+(i+1)*(N); if (A[indexS] == 0) indexS = index;
+    int indexW = (j-1)+(i)*(N); if (A[indexW] == 0) indexW = index;
+    int indexE = (j+1)+(i)*(N); if (A[indexE] == 0) indexE = index;
+    
+    gradXXc = B[indexN] + B[indexS] - 2*B[index] ;
+    gradYYc = B[indexW] + B[indexE] - 2*B[index] ;
+    A[index]  = A[index] - tau*((A[index] - A0[index]) + lambda*(gradXXc + gradYYc));
+}
+
+// 2D Image denosing - The First Step
+__global__ void Diff4th2D(float* A, float* B, int N, int M, float sigma, int iter, float tau)
+{
+    float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, sq_sum, xy_2,  V_norm, V_orth, c, c_sq;
+    
+    int i = blockIdx.x*blockDim.x + threadIdx.x;
+    int j = blockIdx.y*blockDim.y + threadIdx.y;
+    
+    int index = j + i*N;
+    
+    V_norm = 0.0f; V_orth = 0.0f;
+    
+    if (((i < 1) || (i > N-2)) || ((j < 1) || (j > M-2))) {
+        return;    }
+    
+    int indexN = (j)+(i-1)*(N); if (A[indexN] == 0) indexN = index;
+    int indexS = (j)+(i+1)*(N); if (A[indexS] == 0) indexS = index;
+    int indexW = (j-1)+(i)*(N); if (A[indexW] == 0) indexW = index;
+    int indexE = (j+1)+(i)*(N); if (A[indexE] == 0) indexE = index;
+    int indexNW = (j-1)+(i-1)*(N); if (A[indexNW] == 0) indexNW = index;
+    int indexNE = (j+1)+(i-1)*(N); if (A[indexNE] == 0) indexNE = index;
+    int indexWS = (j-1)+(i+1)*(N); if (A[indexWS] == 0) indexWS = index;
+    int indexES = (j+1)+(i+1)*(N); if (A[indexES] == 0) indexES = index;
+    
+    gradX = 0.5f*(A[indexN]-A[indexS]);
+    gradX_sq = gradX*gradX;
+    gradXX = A[indexN] + A[indexS] - 2*A[index];
+    
+    gradY = 0.5f*(A[indexW]-A[indexE]);
+    gradY_sq = gradY*gradY;
+    gradYY = A[indexW] + A[indexE] - 2*A[index];
+    
+    gradXY = 0.25f*(A[indexNW] - A[indexNE] - A[indexWS] + A[indexES]);
+    xy_2 = 2.0f*gradX*gradY*gradXY;
+    sq_sum =  gradX_sq + gradY_sq;
+    
+    if (sq_sum <= epsilon) {
+        V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/epsilon;
+        V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/epsilon; }
+    else  {
+        V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/sq_sum;
+        V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/sq_sum;  }
+    
+    c = 1.0f/(1.0f + sq_sum/sigma);
+    c_sq = c*c;
+    B[index] =  c_sq*V_norm + c*V_orth;
+}
+
+/////////////////////////////////////////////////
+// 3D data parocerssing
+__global__ void Diff4th3D_derriv(float *B, float *A, float *A0, int N, int M, int Z, float sigma, int iter, float tau, float lambda)
+{
+    float gradXXc = 0, gradYYc = 0, gradZZc = 0;
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    int zIndex = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = xIndex + M*yIndex + N*M*zIndex;
+    
+    if (((xIndex < 1) || (xIndex > N-2)) || ((yIndex < 1) || (yIndex > M-2)) || ((zIndex < 1) || (zIndex > Z-2))) {
+        return;    }
+    
+    int indexN = (xIndex-1) + M*yIndex + N*M*zIndex; if (A[indexN] == 0) indexN = index;
+    int indexS = (xIndex+1) + M*yIndex + N*M*zIndex; if (A[indexS] == 0) indexS = index;
+    int indexW = xIndex + M*(yIndex-1) + N*M*zIndex; if (A[indexW] == 0) indexW = index;
+    int indexE = xIndex + M*(yIndex+1) + N*M*zIndex; if (A[indexE] == 0) indexE = index;
+    int indexU = xIndex + M*yIndex + N*M*(zIndex-1); if (A[indexU] == 0) indexU = index;
+    int indexD = xIndex + M*yIndex + N*M*(zIndex+1); if (A[indexD] == 0) indexD = index;
+    
+    gradXXc = B[indexN] + B[indexS] - 2*B[index] ;
+    gradYYc = B[indexW] + B[indexE] - 2*B[index] ;
+    gradZZc = B[indexU] + B[indexD] - 2*B[index] ;   
+        
+    A[index]  = A[index] - tau*((A[index] - A0[index]) + lambda*(gradXXc + gradYYc + gradZZc));    
+}
+
+__global__ void Diff4th3D(float* A, float* B, int N, int M, int Z, float sigma, int iter, float tau)
+{
+    float gradX, gradX_sq, gradY, gradY_sq, gradZ, gradZ_sq, gradXX, gradYY, gradZZ, gradXY, gradXZ, gradYZ, sq_sum, xy_2, xyz_1, xyz_2, V_norm, V_orth, c, c_sq;
+    
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    int zIndex = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = xIndex + M*yIndex + N*M*zIndex;
+    V_norm = 0.0f; V_orth = 0.0f;
+    
+    if (((xIndex < 1) || (xIndex > N-2)) || ((yIndex < 1) || (yIndex > M-2)) || ((zIndex < 1) || (zIndex > Z-2))) {
+        return;    }
+    
+    B[index] = 0;
+    
+    int indexN = (xIndex-1) + M*yIndex + N*M*zIndex; if (A[indexN] == 0) indexN = index;
+    int indexS = (xIndex+1) + M*yIndex + N*M*zIndex; if (A[indexS] == 0) indexS = index;
+    int indexW = xIndex + M*(yIndex-1) + N*M*zIndex; if (A[indexW] == 0) indexW = index;
+    int indexE = xIndex + M*(yIndex+1) + N*M*zIndex; if (A[indexE] == 0) indexE = index;
+    int indexU = xIndex + M*yIndex + N*M*(zIndex-1); if (A[indexU] == 0) indexU = index;
+    int indexD = xIndex + M*yIndex + N*M*(zIndex+1); if (A[indexD] == 0) indexD = index;
+    
+    int indexNW = (xIndex-1) + M*(yIndex-1) + N*M*zIndex;  if (A[indexNW] == 0) indexNW = index;
+    int indexNE = (xIndex-1) + M*(yIndex+1) + N*M*zIndex;  if (A[indexNE] == 0) indexNE = index;
+    int indexWS =  (xIndex+1) + M*(yIndex-1) + N*M*zIndex; if (A[indexWS] == 0) indexWS = index;
+    int indexES = (xIndex+1) + M*(yIndex+1) + N*M*zIndex;  if (A[indexES] == 0) indexES = index;
+    
+    int indexUW = (xIndex-1) + M*(yIndex) + N*M*(zIndex-1); if (A[indexUW] == 0) indexUW = index;
+    int indexUE = (xIndex+1) + M*(yIndex) + N*M*(zIndex-1); if (A[indexUE] == 0) indexUE = index;
+    int indexDW =  (xIndex-1) + M*(yIndex) + N*M*(zIndex+1); if (A[indexDW] == 0) indexDW = index;
+    int indexDE = (xIndex+1) + M*(yIndex) + N*M*(zIndex+1); if (A[indexDE] == 0) indexDE = index;
+    
+    int indexUN = (xIndex) + M*(yIndex-1) + N*M*(zIndex-1);  if (A[indexUN] == 0) indexUN = index;
+    int indexUS = (xIndex) + M*(yIndex+1) + N*M*(zIndex-1);  if (A[indexUS] == 0) indexUS = index;
+    int indexDN =  (xIndex) + M*(yIndex-1) + N*M*(zIndex+1); if (A[indexDN] == 0) indexDN = index;
+    int indexDS = (xIndex) + M*(yIndex+1) + N*M*(zIndex+1);  if (A[indexDS] == 0) indexDS = index;
+    
+    gradX = 0.5f*(A[indexN]-A[indexS]);
+    gradX_sq = gradX*gradX;
+    gradXX = A[indexN] + A[indexS] - 2*A[index];
+    
+    gradY = 0.5f*(A[indexW]-A[indexE]);
+    gradY_sq = gradY*gradY;
+    gradYY = A[indexW] + A[indexE] - 2*A[index];
+    
+    gradZ = 0.5f*(A[indexU]-A[indexD]);
+    gradZ_sq = gradZ*gradZ;
+    gradZZ = A[indexU] + A[indexD] - 2*A[index];
+    
+    gradXY = 0.25f*(A[indexNW] - A[indexNE] - A[indexWS] + A[indexES]);
+    gradXZ = 0.25f*(A[indexUW] - A[indexUE] - A[indexDW] + A[indexDE]);
+    gradYZ = 0.25f*(A[indexUN] - A[indexUS] - A[indexDN] + A[indexDS]);
+    
+    xy_2  = 2.0f*gradX*gradY*gradXY;
+    xyz_1 = 2.0f*gradX*gradZ*gradXZ;
+    xyz_2 = 2.0f*gradY*gradZ*gradYZ;
+    
+    sq_sum =  gradX_sq + gradY_sq + gradZ_sq;
+    
+    if (sq_sum <= epsilon) {
+        V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/epsilon;
+        V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/epsilon;  }
+    else  {
+        V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/sq_sum;
+        V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/sq_sum;  }
+    
+    c = 1;
+    if ((1.0f + sq_sum/sigma) != 0.0f)  {c = 1.0f/(1.0f + sq_sum/sigma);}
+    
+    c_sq = c*c;
+    B[index] =  c_sq*V_norm + c*V_orth;
+}
+
+/******************************************************/
+/********* HOST FUNCTION*************/
+extern "C" void Diff4th_GPU_kernel(float* A, float* B, int N, int M, int Z, float sigma, int iter, float tau, float lambda)
+{
+     int deviceCount = -1; // number of devices
+    cudaGetDeviceCount(&deviceCount);
+    if (deviceCount == 0) {
+        fprintf(stderr, "No CUDA devices found\n");
+        return;
+    }    
+    
+      int BLKXSIZE, BLKYSIZE,BLKZSIZE;
+      float *Ad, *Bd, *Cd;
+      sigma = sigma*sigma;
+    
+    if (Z == 0){
+        // 4th order diffusion for 2D case     
+        BLKXSIZE = 8;
+        BLKYSIZE = 16;
+        
+        dim3 dimBlock(BLKXSIZE,BLKYSIZE);
+        dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));
+        
+        checkCudaErrors(cudaMalloc((void**)&Ad,N*M*sizeof(float)));
+        checkCudaErrors(cudaMalloc((void**)&Bd,N*M*sizeof(float)));
+        checkCudaErrors(cudaMalloc((void**)&Cd,N*M*sizeof(float)));
+        
+        checkCudaErrors(cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice));
+        checkCudaErrors(cudaMemcpy(Bd,A,N*M*sizeof(float),cudaMemcpyHostToDevice));
+        checkCudaErrors(cudaMemcpy(Cd,A,N*M*sizeof(float),cudaMemcpyHostToDevice));
+        
+        int n = 1;
+        while (n <= iter) {
+            Diff4th2D<<<dimGrid,dimBlock>>>(Bd, Cd, N, M, sigma, iter, tau);
+            cudaDeviceSynchronize();
+            checkCudaErrors( cudaPeekAtLastError() );
+            Diff4th2D_derriv<<<dimGrid,dimBlock>>>(Cd, Bd, Ad, N, M, sigma, iter, tau, lambda);
+            cudaDeviceSynchronize();
+            checkCudaErrors( cudaPeekAtLastError() );
+            n++;
+        }
+        checkCudaErrors(cudaMemcpy(B,Bd,N*M*sizeof(float),cudaMemcpyDeviceToHost));
+        cudaFree(Ad); cudaFree(Bd); cudaFree(Cd);
+    }
+    
+    if (Z != 0){
+        // 4th order diffusion for 3D case
+        BLKXSIZE = 8;
+        BLKYSIZE = 8;
+        BLKZSIZE = 8;        
+        
+        dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+        dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE));
+        
+        checkCudaErrors(cudaMalloc((void**)&Ad,N*M*Z*sizeof(float)));
+        checkCudaErrors(cudaMalloc((void**)&Bd,N*M*Z*sizeof(float)));
+        checkCudaErrors(cudaMalloc((void**)&Cd,N*M*Z*sizeof(float)));
+        
+        checkCudaErrors(cudaMemcpy(Ad,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
+        checkCudaErrors(cudaMemcpy(Bd,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));        
+        checkCudaErrors(cudaMemcpy(Cd,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
+        
+        int n = 1;
+        while (n <= iter) {
+            Diff4th3D<<<dimGrid,dimBlock>>>(Bd, Cd, N, M, Z, sigma, iter, tau);
+            cudaDeviceSynchronize();
+            checkCudaErrors( cudaPeekAtLastError() );
+            Diff4th3D_derriv<<<dimGrid,dimBlock>>>(Cd, Bd, Ad, N, M, Z, sigma, iter, tau, lambda);
+            cudaDeviceSynchronize();
+            checkCudaErrors( cudaPeekAtLastError() );
+            n++;
+        }
+        checkCudaErrors(cudaMemcpy(B,Bd,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost));
+        cudaFree(Ad); cudaFree(Bd); cudaFree(Cd);
+    }
+}
\ No newline at end of file
diff --git a/main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h b/main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h
new file mode 100644
index 0000000..cfbb45a
--- /dev/null
+++ b/main_func/regularizers_GPU/Diffus_HO/Diff4th_GPU_kernel.h
@@ -0,0 +1,6 @@
+#ifndef __DIFF_HO_H_
+#define __DIFF_HO_H_
+
+extern "C" void Diff4th_GPU_kernel(float* A, float* B, int N, int M, int Z, float sigma, int iter, float tau, float lambda);
+
+#endif 
diff --git a/main_func/regularizers_GPU/NL_Regul/NLM_GPU.cpp b/main_func/regularizers_GPU/NL_Regul/NLM_GPU.cpp
new file mode 100644
index 0000000..ff0cc90
--- /dev/null
+++ b/main_func/regularizers_GPU/NL_Regul/NLM_GPU.cpp
@@ -0,0 +1,171 @@
+#include "mex.h"
+#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include <iostream>
+#include "NLM_GPU_kernel.h"
+
+/* CUDA implementation of the patch-based (PB) regularization  for 2D and 3D images/volumes  
+ * This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function
+ * 
+ * References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
+ *             2. Kazantsev D. at. all "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
+ *
+ * Input Parameters (mandatory):
+ * 1. Image/volume (2D/3D)
+ * 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window)
+ * 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window)
+ * 4. h - parameter for the PB penalty function
+ * 5. lambda - regularization parameter 
+
+ * Output:
+ * 1. regularized (denoised) Image/volume (N x N x N)
+ *
+ * In matlab check what kind of GPU you have with "gpuDevice" command,
+ * then set your ComputeCapability, here I use -arch compute_35
+ *
+ * Quick 2D denoising example in Matlab:   
+   Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+   u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
+   ImDen = NLM_GPU(single(u0), 3, 2, 0.15, 1);
+ 
+ * Linux/Matlab compilation:
+ * compile in terminal: nvcc -Xcompiler -fPIC -shared -o NLM_GPU_kernel.o NLM_GPU_kernel.cu
+ * then compile in Matlab: mex -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart NLM_GPU.cpp NLM_GPU_kernel.o
+ *
+ * D. Kazantsev 
+ * 2014-17
+ * Harwell/Manchester UK
+ */
+
+float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY,  int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop);
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+{
+    int N, M, Z, i_n, j_n, k_n, numdims, SearchW, SimilW, SearchW_real, padXY, newsizeX, newsizeY, newsizeZ, switchpad_crop, count, SearchW_full, SimilW_full;
+    const int  *dims;
+    float *A, *B=NULL, *Ap=NULL, *Bp=NULL, *Eucl_Vec, h, h2, lambda, val, denh2;
+    
+    numdims = mxGetNumberOfDimensions(prhs[0]);
+    dims = mxGetDimensions(prhs[0]);
+    
+    N = dims[0];
+    M = dims[1];
+    Z = dims[2];
+    
+    if ((numdims < 2) || (numdims > 3)) {mexErrMsgTxt("The input should be 2D image or 3D volume");}
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input in single precision is required"); }
+    
+    if(nrhs != 5) mexErrMsgTxt("Five inputs reqired: Image(2D,3D), SearchW, SimilW, Threshold, Regularization parameter");
+    
+    /*Handling inputs*/
+    A  = (float *) mxGetData(prhs[0]);    /* the image to regularize/filter */
+    SearchW_real  = (int) mxGetScalar(prhs[1]); /* the searching window ratio */
+    SimilW =  (int) mxGetScalar(prhs[2]);  /* the similarity window ratio */
+    h =  (float) mxGetScalar(prhs[3]);  /* parameter for the PB filtering function */
+    lambda = (float) mxGetScalar(prhs[4]);
+    
+    if (h <= 0) mexErrMsgTxt("Parmeter for the PB penalty function should be > 0");
+      
+    SearchW = SearchW_real + 2*SimilW;
+    
+    SearchW_full = 2*SearchW + 1; /* the full searching window  size */
+    SimilW_full = 2*SimilW + 1;   /* the full similarity window  size */
+    h2 = h*h;
+    
+    padXY = SearchW + 2*SimilW; /* padding sizes */
+    newsizeX = N + 2*(padXY); /* the X size of the padded array */
+    newsizeY = M + 2*(padXY); /* the Y size of the padded array */
+    newsizeZ = Z + 2*(padXY); /* the Z size of the padded array */
+    int N_dims[] = {newsizeX, newsizeY, newsizeZ};
+    
+    /******************************2D case ****************************/
+    if (numdims == 2) {
+        /*Handling output*/
+        B = (float*)mxGetData(plhs[0] = mxCreateNumericMatrix(N, M, mxSINGLE_CLASS, mxREAL));
+        /*allocating memory for the padded arrays */
+        Ap = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL));
+        Bp = (float*)mxGetData(mxCreateNumericMatrix(newsizeX, newsizeY, mxSINGLE_CLASS, mxREAL));
+        Eucl_Vec = (float*)mxGetData(mxCreateNumericMatrix(SimilW_full*SimilW_full, 1, mxSINGLE_CLASS, mxREAL));
+        
+        /*Gaussian kernel */
+        count = 0;
+        for(i_n=-SimilW; i_n<=SimilW; i_n++) {
+            for(j_n=-SimilW; j_n<=SimilW; j_n++) {
+                val = (float)(i_n*i_n + j_n*j_n)/(2*SimilW*SimilW);
+                Eucl_Vec[count] = exp(-val);
+                count = count + 1;
+            }} /*main neighb loop */
+        
+        /**************************************************************************/
+        /*Perform padding of image A to the size of [newsizeX * newsizeY] */
+        switchpad_crop = 0; /*padding*/
+        pad_crop(A, Ap, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop);
+        
+        /* Do PB regularization with the padded array  */
+        NLM_GPU_kernel(Ap, Bp, Eucl_Vec, newsizeY, newsizeX, 0, numdims, SearchW, SimilW, SearchW_real, (float)h2, (float)lambda);
+        
+        switchpad_crop = 1; /*cropping*/
+        pad_crop(Bp, B, M, N, 0, newsizeY, newsizeX, 0, padXY, switchpad_crop);
+    }
+    else
+    {
+        /******************************3D case ****************************/
+        /*Handling output*/
+        B = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dims, mxSINGLE_CLASS, mxREAL));
+        /*allocating memory for the padded arrays */
+        Ap = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
+        Bp = (float*)mxGetPr(mxCreateNumericArray(3, N_dims, mxSINGLE_CLASS, mxREAL));
+        Eucl_Vec = (float*)mxGetData(mxCreateNumericMatrix(SimilW_full*SimilW_full*SimilW_full, 1, mxSINGLE_CLASS, mxREAL));
+        
+        /*Gaussian kernel */
+        count = 0;
+        for(i_n=-SimilW; i_n<=SimilW; i_n++) {
+            for(j_n=-SimilW; j_n<=SimilW; j_n++) {
+                for(k_n=-SimilW; k_n<=SimilW; k_n++) {
+                    val = (float)(i_n*i_n + j_n*j_n + k_n*k_n)/(2*SimilW*SimilW*SimilW);
+                    Eucl_Vec[count] = exp(-val);
+                    count = count + 1;
+                }}} /*main neighb loop */
+        /**************************************************************************/
+        /*Perform padding of image A to the size of [newsizeX * newsizeY * newsizeZ] */
+        switchpad_crop = 0; /*padding*/
+        pad_crop(A, Ap, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop);
+        
+        /* Do PB regularization with the padded array  */
+        NLM_GPU_kernel(Ap, Bp, Eucl_Vec, newsizeY, newsizeX, newsizeZ, numdims, SearchW, SimilW, SearchW_real, (float)h2, (float)lambda);
+        
+        switchpad_crop = 1; /*cropping*/
+        pad_crop(Bp, B, M, N, Z, newsizeY, newsizeX, newsizeZ, padXY, switchpad_crop);
+    } /*end else ndims*/
+}
+
+float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop)
+{
+    /* padding-cropping function */
+    int i,j,k;    
+    if (NewSizeZ > 1) {    
+           for (i=0; i < NewSizeX; i++) {
+            for (j=0; j < NewSizeY; j++) {
+              for (k=0; k < NewSizeZ; k++) {
+                if (((i >= padXY) && (i < NewSizeX-padXY)) &&  ((j >= padXY) && (j < NewSizeY-padXY)) &&  ((k >= padXY) && (k < NewSizeZ-padXY))) {
+                    if (switchpad_crop == 0)  Ap[NewSizeX*NewSizeY*k + i*NewSizeY+j] = A[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)];
+                    else  Ap[OldSizeX*OldSizeY*(k - padXY) + (i-padXY)*(OldSizeY)+(j-padXY)] = A[NewSizeX*NewSizeY*k + i*NewSizeY+j];
+                }
+            }}}   
+    }
+    else {
+        for (i=0; i < NewSizeX; i++) {
+            for (j=0; j < NewSizeY; j++) {
+                if (((i >= padXY) && (i < NewSizeX-padXY)) &&  ((j >= padXY) && (j < NewSizeY-padXY))) {
+                    if (switchpad_crop == 0)  Ap[i*NewSizeY+j] = A[(i-padXY)*(OldSizeY)+(j-padXY)];
+                    else  Ap[(i-padXY)*(OldSizeY)+(j-padXY)] = A[i*NewSizeY+j];
+                }
+            }}
+    }
+    return *Ap;
+}
\ No newline at end of file
diff --git a/main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu b/main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu
new file mode 100644
index 0000000..17da3a8
--- /dev/null
+++ b/main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.cu
@@ -0,0 +1,239 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include "NLM_GPU_kernel.h"
+
+#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)
+
+inline void __checkCudaErrors(cudaError err, const char *file, const int line)
+{
+    if (cudaSuccess != err)
+    {
+        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
+                file, line, (int)err, cudaGetErrorString(err));
+        exit(EXIT_FAILURE);
+    }
+}
+
+extern __shared__ float sharedmem[];
+
+// run PB den kernel here
+__global__ void NLM_kernel(float *Ad, float* Bd, float *Eucl_Vec_d, int N, int M, int Z, int SearchW, int SimilW, int SearchW_real, int SearchW_full, int SimilW_full, int padXY, float h2, float lambda,  dim3 imagedim, dim3 griddim, dim3 kerneldim, dim3 sharedmemdim, int nUpdatePerThread, float neighborsize)
+{
+    
+    int  i1, j1, k1, i2, j2, k2, i3, j3, k3, i_l, j_l, k_l, count;
+    float value, Weight_norm, normsum, Weight;
+    
+    int bidx = blockIdx.x;
+    int bidy = blockIdx.y%griddim.y;
+    int bidz = (int)((blockIdx.y)/griddim.y);
+    
+    // global index for block endpoint
+    int beidx = __mul24(bidx,blockDim.x);
+    int beidy = __mul24(bidy,blockDim.y);
+    int beidz = __mul24(bidz,blockDim.z);
+    
+    int tid = __mul24(threadIdx.z,__mul24(blockDim.x,blockDim.y)) +
+            __mul24(threadIdx.y,blockDim.x) + threadIdx.x;
+    
+    #ifdef __DEVICE_EMULATION__
+            printf("tid : %d", tid);
+    #endif
+            
+    // update shared memory
+    int nthreads = blockDim.x*blockDim.y*blockDim.z;
+    int sharedMemSize = sharedmemdim.x * sharedmemdim.y * sharedmemdim.z;
+    for(int i=0; i<nUpdatePerThread; i++)
+    {
+        int sid = tid + i*nthreads; // index in shared memory
+        if (sid < sharedMemSize)
+        {
+            // global x/y/z index in volume
+            int gidx, gidy, gidz;
+            int sidx, sidy, sidz, tid;
+            
+            sidz = sid / (sharedmemdim.x*sharedmemdim.y);
+            tid  = sid - sidz*(sharedmemdim.x*sharedmemdim.y);
+            sidy = tid / (sharedmemdim.x);
+            sidx = tid - sidy*(sharedmemdim.x);
+            
+            gidx = (int)sidx - (int)kerneldim.x + (int)beidx;
+            gidy = (int)sidy - (int)kerneldim.y + (int)beidy;
+            gidz = (int)sidz - (int)kerneldim.z + (int)beidz;
+            
+            // Neumann boundary condition
+            int cx = (int) min(max(0,gidx),imagedim.x-1);
+            int cy = (int) min(max(0,gidy),imagedim.y-1);
+            int cz = (int) min(max(0,gidz),imagedim.z-1);
+            
+            int gid = cz*imagedim.x*imagedim.y + cy*imagedim.x + cx;
+            
+            sharedmem[sid] = Ad[gid];
+        }
+    }
+    __syncthreads();
+    
+    // global index of the current voxel in the input volume
+    int idx = beidx + threadIdx.x;
+    int idy = beidy + threadIdx.y;
+    int idz = beidz + threadIdx.z;
+    
+    if (Z == 1) {
+        /* 2D case */
+        /*checking boundaries to be within the image and avoid padded spaces */
+        if( idx >= padXY && idx < (imagedim.x - padXY) &&
+                idy >= padXY && idy < (imagedim.y - padXY))
+        {
+            int i_centr = threadIdx.x + (SearchW); /*indices of the centrilized (main) pixel */
+            int j_centr = threadIdx.y + (SearchW); /*indices of the centrilized (main) pixel */
+            
+            if ((i_centr > 0) && (i_centr < N) && (j_centr > 0) && (j_centr < M)) {
+                
+                Weight_norm = 0; value = 0.0;
+                /* Massive Search window loop */
+                for(i1 = i_centr - SearchW_real ; i1 <= i_centr + SearchW_real; i1++) {
+                    for(j1 = j_centr - SearchW_real ; j1<= j_centr + SearchW_real ; j1++) {
+                        /* if inside the searching window */
+                        count = 0; normsum = 0.0;
+                        for(i_l=-SimilW; i_l<=SimilW; i_l++) {
+                            for(j_l=-SimilW; j_l<=SimilW; j_l++) {
+                                i2 = i1+i_l; j2 = j1+j_l;
+                                i3 = i_centr+i_l; j3 = j_centr+j_l;  /*coordinates of the inner patch loop */                                
+                                if ((i2 > 0) && (i2 < N) && (j2 > 0) && (j2 < M)) {
+                                       if ((i3 > 0) && (i3 < N) && (j3 > 0) && (j3 < M)) {
+                                            normsum += Eucl_Vec_d[count]*pow((sharedmem[(j3)*sharedmemdim.x+(i3)] - sharedmem[j2*sharedmemdim.x+i2]), 2);
+                                            }}
+                                        count++;
+                                }}
+                                if (normsum != 0) Weight = (expf(-normsum/h2));
+                                else Weight = 0.0;
+                                Weight_norm += Weight;
+                                value += sharedmem[j1*sharedmemdim.x+i1]*Weight;
+                            }}      
+                                
+                if (Weight_norm != 0) Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = value/Weight_norm;
+                else Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = Ad[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx];
+            }
+        }      /*boundary conditions end*/
+    }
+    else {
+        /*3D case*/
+        /*checking boundaries to be within the image and avoid padded spaces */
+        if( idx >= padXY && idx < (imagedim.x - padXY) &&
+                idy >= padXY && idy < (imagedim.y - padXY) &&
+                idz >= padXY && idz < (imagedim.z - padXY) )
+        {
+            int i_centr = threadIdx.x + SearchW; /*indices of the centrilized (main) pixel */
+            int j_centr = threadIdx.y + SearchW; /*indices of the centrilized (main) pixel */
+            int k_centr = threadIdx.z + SearchW; /*indices of the centrilized (main) pixel */
+            
+            if ((i_centr > 0) && (i_centr < N) && (j_centr > 0) && (j_centr < M) && (k_centr > 0) && (k_centr < Z)) {
+                
+                Weight_norm = 0; value = 0.0;
+                /* Massive Search window loop */
+                for(i1 = i_centr - SearchW_real ; i1 <= i_centr + SearchW_real; i1++) {
+                    for(j1 = j_centr - SearchW_real ; j1<= j_centr + SearchW_real ; j1++) {
+                        for(k1 = k_centr - SearchW_real ; k1<= k_centr + SearchW_real ; k1++) {
+                            /* if inside the searching window */
+                            count = 0; normsum = 0.0;
+                            for(i_l=-SimilW; i_l<=SimilW; i_l++) {
+                                for(j_l=-SimilW; j_l<=SimilW; j_l++) {
+                                    for(k_l=-SimilW; k_l<=SimilW; k_l++) {
+                                        i2 = i1+i_l; j2 = j1+j_l; k2 = k1+k_l;
+                                        i3 = i_centr+i_l; j3 = j_centr+j_l; k3 = k_centr+k_l;   /*coordinates of the inner patch loop */                              
+                                                    if ((i2 > 0) && (i2 < N) && (j2 > 0) && (j2 < M) && (k2 > 0) && (k2 < Z)) {
+                                                        if ((i3 > 0) && (i3 < N) && (j3 > 0) && (j3 < M) && (k3 > 0) && (k3 < Z)) {
+                                                            normsum += Eucl_Vec_d[count]*pow((sharedmem[(k3)*sharedmemdim.x*sharedmemdim.y + (j3)*sharedmemdim.x+(i3)] - sharedmem[(k2)*sharedmemdim.x*sharedmemdim.y + j2*sharedmemdim.x+i2]), 2);
+                                                        }}
+                                                    count++;
+                                                }}}
+                                       if (normsum != 0) Weight = (expf(-normsum/h2));
+                                       else Weight = 0.0;
+                                       Weight_norm += Weight;
+                                       value += sharedmem[k1*sharedmemdim.x*sharedmemdim.y + j1*sharedmemdim.x+i1]*Weight;                                                                
+                        }}}      /* BIG search window loop end*/
+                
+               
+                if (Weight_norm != 0) Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = value/Weight_norm;
+                else Bd[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx] = Ad[idz*imagedim.x*imagedim.y + idy*imagedim.x + idx];
+            }
+        }      /* boundary conditions end */
+    }
+}
+        
+/////////////////////////////////////////////////
+// HOST FUNCTION
+extern "C" void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int N, int M, int Z, int dimension, int SearchW, int SimilW, int SearchW_real, float h2, float lambda)
+{
+    int deviceCount = -1; // number of devices
+    cudaGetDeviceCount(&deviceCount);
+    if (deviceCount == 0) {
+        fprintf(stderr, "No CUDA devices found\n");
+        return;
+    }
+    
+//     cudaDeviceReset();
+    
+    int padXY, SearchW_full, SimilW_full,  blockWidth, blockHeight, blockDepth, nBlockX, nBlockY, nBlockZ, kernel_depth;
+    float *Ad, *Bd, *Eucl_Vec_d;
+    
+    if (dimension == 2) {
+        blockWidth  = 16;
+        blockHeight = 16;
+        blockDepth  = 1;
+        Z = 1;
+        kernel_depth = 0;
+    }
+    else {
+        blockWidth  = 8;
+        blockHeight = 8;
+        blockDepth  = 8;
+        kernel_depth = SearchW;
+    }
+    
+    // compute how many blocks are needed
+    nBlockX = ceil((float)N / (float)blockWidth);
+    nBlockY = ceil((float)M / (float)blockHeight);
+    nBlockZ = ceil((float)Z / (float)blockDepth);
+    
+    dim3 dimGrid(nBlockX,nBlockY*nBlockZ);
+    dim3 dimBlock(blockWidth, blockHeight, blockDepth);
+    dim3 imagedim(N,M,Z);
+    dim3 griddim(nBlockX,nBlockY,nBlockZ);
+    
+    dim3 kerneldim(SearchW,SearchW,kernel_depth);
+    dim3 sharedmemdim((SearchW*2)+blockWidth,(SearchW*2)+blockHeight,(kernel_depth*2)+blockDepth);
+    int sharedmemsize = sizeof(float)*sharedmemdim.x*sharedmemdim.y*sharedmemdim.z;
+    int updateperthread = ceil((float)(sharedmemdim.x*sharedmemdim.y*sharedmemdim.z)/(float)(blockWidth*blockHeight*blockDepth));
+    float neighborsize = (2*SearchW+1)*(2*SearchW+1)*(2*kernel_depth+1);
+    
+    padXY = SearchW + 2*SimilW; /* padding sizes */
+    
+    SearchW_full = 2*SearchW + 1; /* the full searching window  size */
+    SimilW_full = 2*SimilW + 1;   /* the full similarity window  size */
+    
+    /*allocate space for images on device*/
+    checkCudaErrors( cudaMalloc((void**)&Ad,N*M*Z*sizeof(float)) );
+    checkCudaErrors( cudaMalloc((void**)&Bd,N*M*Z*sizeof(float)) );
+    /*allocate space for vectors on device*/
+    if (dimension == 2) {
+        checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d,SimilW_full*SimilW_full*sizeof(float)) );
+        checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );
+    }
+    else {
+        checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d,SimilW_full*SimilW_full*SimilW_full*sizeof(float)) );
+        checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*SimilW_full*SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );
+    }
+    
+    /* copy data from the host to device */
+    checkCudaErrors( cudaMemcpy(Ad,A,N*M*Z*sizeof(float),cudaMemcpyHostToDevice) );
+    
+    // Run CUDA kernel here
+    NLM_kernel<<<dimGrid,dimBlock,sharedmemsize>>>(Ad, Bd, Eucl_Vec_d, M, N, Z, SearchW, SimilW, SearchW_real, SearchW_full, SimilW_full, padXY, h2, lambda, imagedim, griddim, kerneldim, sharedmemdim, updateperthread, neighborsize);
+    
+    checkCudaErrors( cudaPeekAtLastError() );
+//     gpuErrchk( cudaDeviceSynchronize() );
+    
+    checkCudaErrors( cudaMemcpy(B,Bd,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost) );
+    cudaFree(Ad); cudaFree(Bd); cudaFree(Eucl_Vec_d);
+}
diff --git a/main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h b/main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h
new file mode 100644
index 0000000..bc9d4a3
--- /dev/null
+++ b/main_func/regularizers_GPU/NL_Regul/NLM_GPU_kernel.h
@@ -0,0 +1,6 @@
+#ifndef __NLMREG_KERNELS_H_
+#define __NLMREG_KERNELS_H_
+
+extern "C" void NLM_GPU_kernel(float *A, float* B, float *Eucl_Vec, int N, int M, int Z, int dimension, int SearchW, int SimilW, int SearchW_real, float denh2, float lambda);
+
+#endif 
-- 
cgit v1.2.3


From 6fb8f5d188ed31d7a7077cba8ab7aea17b25b8bf Mon Sep 17 00:00:00 2001
From: algol <dkazanc@hotmail.com>
Date: Mon, 11 Sep 2017 12:23:44 +0100
Subject: StudentT fidelity added, can be skipped during pythonising

---
 main_func/FISTA_REC.m | 56 ++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 47 insertions(+), 9 deletions(-)

(limited to 'main_func')

diff --git a/main_func/FISTA_REC.m b/main_func/FISTA_REC.m
index edccbe1..6987dca 100644
--- a/main_func/FISTA_REC.m
+++ b/main_func/FISTA_REC.m
@@ -19,7 +19,8 @@ function [X,  output] = FISTA_REC(params)
 %       - .iterFISTA (iterations for the main loop, default 40)
 %       - .L_const (Lipschitz constant, default Power method)                                                                                                    )
 %       - .X_ideal (ideal image, if given)
-%       - .weights (statisitcal weights, size of the sinogram)
+%       - .weights (statisitcal weights for the PWLS model, size of the sinogram)
+%       - .fidelity (use 'studentt' fidelity)
 %       - .ROI (Region-of-interest, only if X_ideal is given)
 %       - .initialize (a 'warm start' using SIRT method from ASTRA)
 %----------------Regularization choices------------------------
@@ -92,6 +93,15 @@ if (isfield(params,'weights'))
 else
     weights = ones(size(sino));
 end
+if (isfield(params,'fidelity'))
+	studentt = 0;
+    if (strcmp(params.fidelity,'studentt') == 1)
+    studentt = 1;
+    lambdaR_L1 = 0;
+    end    
+else
+    studentt = 0;    
+end
 if (isfield(params,'L_const'))
     L_const = params.L_const;
 else
@@ -357,12 +367,25 @@ if (subsets == 0)
                 vec = squeeze(vec(:,1,:));
             end
             r = r_x - (1./L_const).*vec;
-        else
-            % no ring removal
+            objective(i) = (0.5*sum(residual(:).^2)); % for the objective function output
+        else            
+			if (studentt == 1)
+				% artifacts removal with Students t penalty
+				residual = weights.*(sino_updt - sino);
+				for kkk = 1:SlicesZ
+				res_vec = reshape(residual(:,:,kkk), Detectors*anglesNumb, 1); % 1D vectorized sinogram
+                %s = 100;
+                %gr = (2)*res_vec./(s*2 + conj(res_vec).*res_vec);
+                [ff, gr] = studentst(res_vec, 1);
+                residual(:,:,kkk) = reshape(gr, Detectors, anglesNumb);
+				end            
+				objective(i) = ff; % for the objective function output
+            else
+            % no ring removal (LS model)
             residual = weights.*(sino_updt - sino);
-        end
-        
-        objective(i) = (0.5*sum(residual(:).^2)); % for the objective function output
+            objective(i) = (0.5*sum(residual(:).^2)); % for the objective function output
+            end	
+        end       
         
         % if the geometry is parallel use slice-by-slice projection-backprojection routine
         if (strcmp(proj_geom.type,'parallel') || strcmp(proj_geom.type,'parallel3d'))
@@ -549,9 +572,24 @@ else
                 end
                 r = r_x - (1./L_const).*vec;
             else
-                [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geomSUB, vol_geom);
-                % no ring removal
-                residual = squeeze(weights(:,CurrSubIndeces,:)).*(sino_updt - squeeze(sino(:,CurrSubIndeces,:)));
+                [sino_id, sino_updt] = astra_create_sino3d_cuda(X_t, proj_geomSUB, vol_geom);                
+                
+                if (studentt == 1)
+				% artifacts removal with Students t penalty
+				residual = squeeze(weights(:,CurrSubIndeces,:)).*(sino_updt - squeeze(sino(:,CurrSubIndeces,:)));
+				
+				for kkk = 1:SlicesZ
+				res_vec = reshape(residual(:,:,kkk), Detectors*numProjSub, 1); % 1D vectorized sinogram
+                %s = 100;
+                %gr = (2)*res_vec./(s*2 + conj(res_vec).*res_vec);
+                [ff, gr] = studentst(res_vec, 1);
+                residual(:,:,kkk) = reshape(gr, Detectors, numProjSub);
+				end            
+				objective(i) = ff; % for the objective function output
+				else
+				% no ring removal (LS model)
+				residual = squeeze(weights(:,CurrSubIndeces,:)).*(sino_updt - squeeze(sino(:,CurrSubIndeces,:)));				
+				end	               
             end
             
             [id, x_temp] = astra_create_backprojection3d_cuda(residual, proj_geomSUB, vol_geom);
-- 
cgit v1.2.3


From 33da5e128e51e5adc1f6085b4772b55b2cd76e2e Mon Sep 17 00:00:00 2001
From: Edoardo Pasca <edo.paskino@gmail.com>
Date: Wed, 2 Aug 2017 16:51:14 +0100
Subject: added utils

---
 main_func/regularizers_CPU/FGP_TV_core.h          | 30 ++++++++++++++++++++++-
 main_func/regularizers_CPU/LLT_model_core.h       |  3 ++-
 main_func/regularizers_CPU/SplitBregman_TV_core.h | 28 ++++++++++++++++++++-
 main_func/regularizers_CPU/TGV_PD_core.h          |  3 ++-
 4 files changed, 60 insertions(+), 4 deletions(-)

(limited to 'main_func')

diff --git a/main_func/regularizers_CPU/FGP_TV_core.h b/main_func/regularizers_CPU/FGP_TV_core.h
index 697fd84..d256c9a 100644
--- a/main_func/regularizers_CPU/FGP_TV_core.h
+++ b/main_func/regularizers_CPU/FGP_TV_core.h
@@ -23,8 +23,36 @@ limitations under the License.
 #include <memory.h>
 #include <stdio.h>
 #include "omp.h"
+#include "utils.h"
 
-float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
+/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
+*
+* Input Parameters:
+* 1. Noisy image/volume [REQUIRED]
+* 2. lambda - regularization parameter [REQUIRED]
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon: tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+*
+* Output:
+* [1] Filtered/regularized image
+* [2] last function value
+*
+* Example of image denoising:
+* figure;
+* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+* u0 = Im + .05*randn(size(Im)); % adding noise
+* u = FGP_TV(single(u0), 0.05, 100, 1e-04);
+*
+* to compile with OMP support: mex FGP_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+* This function is based on the Matlab's code and paper by
+* [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+*
+* D. Kazantsev, 2016-17
+*
+*/
+
+//float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
 float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
 float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
 float Proj_func2D(float *P1, float *P2, int methTV, int dimX, int dimY);
diff --git a/main_func/regularizers_CPU/LLT_model_core.h b/main_func/regularizers_CPU/LLT_model_core.h
index 10f52fe..560bb9c 100644
--- a/main_func/regularizers_CPU/LLT_model_core.h
+++ b/main_func/regularizers_CPU/LLT_model_core.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include <memory.h>
 #include <stdio.h>
 #include "omp.h"
+#include "utils.h"
 
 #define EPS 0.01
 
@@ -36,4 +37,4 @@ float div_upd3D(float *U0, float *U, float *D1, float *D2, float *D3, unsigned s
 float calcMap(float *U, unsigned short *Map, int dimX, int dimY, int dimZ);
 float cleanMap(unsigned short *Map, int dimX, int dimY, int dimZ);
 
-float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
+//float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
diff --git a/main_func/regularizers_CPU/SplitBregman_TV_core.h b/main_func/regularizers_CPU/SplitBregman_TV_core.h
index a7aaabb..78aef09 100644
--- a/main_func/regularizers_CPU/SplitBregman_TV_core.h
+++ b/main_func/regularizers_CPU/SplitBregman_TV_core.h
@@ -23,7 +23,33 @@ limitations under the License.
 #include <stdio.h>
 #include "omp.h"
 
-float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
+#include "utils.h"
+
+/* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D)
+*
+* Input Parameters:
+* 1. Noisy image/volume
+* 2. lambda - regularization parameter
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon - tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+*
+* Output:
+* Filtered/regularized image
+*
+* Example:
+* figure;
+* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+* u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
+* u = SplitBregman_TV(single(u0), 10, 30, 1e-04);
+*
+* to compile with OMP support: mex SplitBregman_TV.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+* References:
+* The Split Bregman Method for L1 Regularized Problems, by Tom Goldstein and Stanley Osher.
+* D. Kazantsev, 2016*
+*/
+
+//float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
 float gauss_seidel2D(float *U, float *A, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu);
 float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
 float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
diff --git a/main_func/regularizers_CPU/TGV_PD_core.h b/main_func/regularizers_CPU/TGV_PD_core.h
index 04ba95c..cbe7ea4 100644
--- a/main_func/regularizers_CPU/TGV_PD_core.h
+++ b/main_func/regularizers_CPU/TGV_PD_core.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include <memory.h>
 #include <stdio.h>
 #include "omp.h"
+#include "utils.h"
 
 /* 2D functions */
 float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, int dimZ, float sigma);
@@ -32,4 +33,4 @@ float ProjQ_2D(float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, fl
 float DivProjP_2D(float *U, float *A, float *P1, float *P2, int dimX, int dimY, int dimZ, float lambda, float tau);
 float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float tau);
 float newU(float *U, float *U_old, int dimX, int dimY, int dimZ);
-float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
+//float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
-- 
cgit v1.2.3


From 068380a2a2c84b46e486f59c397329f53a2eae1f Mon Sep 17 00:00:00 2001
From: Edoardo Pasca <edo.paskino@gmail.com>
Date: Wed, 2 Aug 2017 16:51:37 +0100
Subject: Added utils.c utils.h

a few regularizers defined the same copyIm function. I moved it into
this new common utils.
---
 main_func/regularizers_CPU/utils.c | 29 +++++++++++++++++++++++++++++
 main_func/regularizers_CPU/utils.h | 27 +++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)
 create mode 100644 main_func/regularizers_CPU/utils.c
 create mode 100644 main_func/regularizers_CPU/utils.h

(limited to 'main_func')

diff --git a/main_func/regularizers_CPU/utils.c b/main_func/regularizers_CPU/utils.c
new file mode 100644
index 0000000..0e83d2c
--- /dev/null
+++ b/main_func/regularizers_CPU/utils.c
@@ -0,0 +1,29 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazanteev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "utils.h"
+
+/* Copy Image */
+float copyIm(float *A, float *U, int dimX, int dimY, int dimZ)
+{
+	int j;
+#pragma omp parallel for shared(A, U) private(j)
+	for (j = 0; j<dimX*dimY*dimZ; j++)  U[j] = A[j];
+	return *U;
+}
\ No newline at end of file
diff --git a/main_func/regularizers_CPU/utils.h b/main_func/regularizers_CPU/utils.h
new file mode 100644
index 0000000..c720006
--- /dev/null
+++ b/main_func/regularizers_CPU/utils.h
@@ -0,0 +1,27 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazanteev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+
+float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
-- 
cgit v1.2.3


From 846e09d5c27736a4af80af7f9a3adcda3c6d4bf0 Mon Sep 17 00:00:00 2001
From: Edoardo Pasca <edo.paskino@gmail.com>
Date: Fri, 4 Aug 2017 16:12:27 +0100
Subject: fix typo and add include "matrix.h"

---
 main_func/regularizers_CPU/FGP_TV.c               |  3 ++-
 main_func/regularizers_CPU/LLT_model.c            | 10 ++++++++--
 main_func/regularizers_CPU/SplitBregman_TV.c      |  3 ++-
 main_func/regularizers_CPU/SplitBregman_TV_core.c | 12 +-----------
 4 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'main_func')

diff --git a/main_func/regularizers_CPU/FGP_TV.c b/main_func/regularizers_CPU/FGP_TV.c
index cfe5b9e..66442c9 100644
--- a/main_func/regularizers_CPU/FGP_TV.c
+++ b/main_func/regularizers_CPU/FGP_TV.c
@@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by
 Visual Analytics and Imaging System Group of the Science Technology
 Facilities Council, STFC
 
-Copyright 2017 Daniil Kazanteev
+Copyright 2017 Daniil Kazantsev
 Copyright 2017 Srikanth Nagella, Edoardo Pasca
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,6 +16,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
+#include "matrix.h"
 #include "mex.h"
 #include "FGP_TV_core.h"
 
diff --git a/main_func/regularizers_CPU/LLT_model.c b/main_func/regularizers_CPU/LLT_model.c
index 47146a5..0050654 100644
--- a/main_func/regularizers_CPU/LLT_model.c
+++ b/main_func/regularizers_CPU/LLT_model.c
@@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by
 Visual Analytics and Imaging System Group of the Science Technology
 Facilities Council, STFC
 
-Copyright 2017 Daniil Kazanteev
+Copyright 2017 Daniil Kazantsev
 Copyright 2017 Srikanth Nagella, Edoardo Pasca
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -18,12 +18,19 @@ limitations under the License.
 */
 
 #include "mex.h"
+#include "matrix.h"
 #include "LLT_model_core.h"
 
 /* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty
 *
 * Input Parameters:
 * 1. U0 - origanal noise image/volume
+=======
+/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model of higher order regularization penalty
+*
+* Input Parameters:
+* 1. U0 - original noise image/volume
+>>>>>>> fix typo and add include "matrix.h"
 * 2. lambda - regularization parameter
 * 3. tau - time-step  for explicit scheme
 * 4. iter - iterations number
@@ -46,7 +53,6 @@ limitations under the License.
 * 28.11.16/Harwell
 */
 
-
 void mexFunction(
         int nlhs, mxArray *plhs[],
         int nrhs, const mxArray *prhs[])
diff --git a/main_func/regularizers_CPU/SplitBregman_TV.c b/main_func/regularizers_CPU/SplitBregman_TV.c
index 0dc638d..38f6a9d 100644
--- a/main_func/regularizers_CPU/SplitBregman_TV.c
+++ b/main_func/regularizers_CPU/SplitBregman_TV.c
@@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by
 Visual Analytics and Imaging System Group of the Science Technology
 Facilities Council, STFC
 
-Copyright 2017 Daniil Kazanteev
+Copyright 2017 Daniil Kazantsev
 Copyright 2017 Srikanth Nagella, Edoardo Pasca
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -18,6 +18,7 @@ limitations under the License.
 */
 
 #include "mex.h"
+#include <matrix.h>
 #include "SplitBregman_TV_core.h"
 
 /* C-OMP implementation of Split Bregman - TV denoising-regularization model (2D/3D)
diff --git a/main_func/regularizers_CPU/SplitBregman_TV_core.c b/main_func/regularizers_CPU/SplitBregman_TV_core.c
index 283dd43..4109a4b 100644
--- a/main_func/regularizers_CPU/SplitBregman_TV_core.c
+++ b/main_func/regularizers_CPU/SplitBregman_TV_core.c
@@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by
 Visual Analytics and Imaging System Group of the Science Technology
 Facilities Council, STFC
 
-Copyright 2017 Daniil Kazanteev
+Copyright 2017 Daniil Kazantsev
 Copyright 2017 Srikanth Nagella, Edoardo Pasca
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -257,13 +257,3 @@ float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *B
             }}}
     return 1;
 }
-/* General Functions */
-/*****************************************************************/
-/* Copy Image */
-float copyIm(float *A, float *B, int dimX, int dimY, int dimZ)
-{
-    int j;
-#pragma omp parallel for shared(A, B) private(j)
-    for(j=0; j<dimX*dimY*dimZ; j++)  B[j] = A[j];
-    return *B;
-}
\ No newline at end of file
-- 
cgit v1.2.3


From 87fdeb59a0fd3456eabe3ebf045f989f1d833cab Mon Sep 17 00:00:00 2001
From: Edoardo Pasca <edo.paskino@gmail.com>
Date: Fri, 4 Aug 2017 16:13:19 +0100
Subject: removed definition of CopyImage

---
 main_func/regularizers_CPU/FGP_TV_core.c    | 13 ++-----------
 main_func/regularizers_CPU/LLT_model_core.c | 11 ++---------
 2 files changed, 4 insertions(+), 20 deletions(-)

(limited to 'main_func')

diff --git a/main_func/regularizers_CPU/FGP_TV_core.c b/main_func/regularizers_CPU/FGP_TV_core.c
index 9cde327..03cd445 100644
--- a/main_func/regularizers_CPU/FGP_TV_core.c
+++ b/main_func/regularizers_CPU/FGP_TV_core.c
@@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by
 Visual Analytics and Imaging System Group of the Science Technology
 Facilities Council, STFC
 
-Copyright 2017 Daniil Kazanteev
+Copyright 2017 Daniil Kazantsev
 Copyright 2017 Srikanth Nagella, Edoardo Pasca
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -263,13 +263,4 @@ float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3,
 	return 1;
 }
 
-/* General Functions */
-/*****************************************************************/
-/* Copy Image */
-float copyIm(float *A, float *B, int dimX, int dimY, int dimZ)
-{
-	int j;
-#pragma omp parallel for shared(A, B) private(j)
-	for (j = 0; j<dimX*dimY*dimZ; j++)  B[j] = A[j];
-	return *B;
-}
+
diff --git a/main_func/regularizers_CPU/LLT_model_core.c b/main_func/regularizers_CPU/LLT_model_core.c
index 7a1cdbe..3a853d2 100644
--- a/main_func/regularizers_CPU/LLT_model_core.c
+++ b/main_func/regularizers_CPU/LLT_model_core.c
@@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by
 Visual Analytics and Imaging System Group of the Science Technology
 Facilities Council, STFC
 
-Copyright 2017 Daniil Kazanteev
+Copyright 2017 Daniil Kazantsev
 Copyright 2017 Srikanth Nagella, Edoardo Pasca
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -314,12 +314,5 @@ float cleanMap(unsigned short *Map, int dimX, int dimY, int dimZ)
 	return *Map;
 }
 
-/* Copy Image */
-float copyIm(float *A, float *U, int dimX, int dimY, int dimZ)
-{
-	int j;
-#pragma omp parallel for shared(A, U) private(j)
-	for (j = 0; j<dimX*dimY*dimZ; j++)  U[j] = A[j];
-	return *U;
-}
+
 /*********************3D *********************/
\ No newline at end of file
-- 
cgit v1.2.3


From 28e07bd6fb75d36ac0ee230b894adad9bc6760d4 Mon Sep 17 00:00:00 2001
From: Edoardo Pasca <edo.paskino@gmail.com>
Date: Fri, 4 Aug 2017 16:14:24 +0100
Subject: extern C function called by C++

adds extern "C" to the definition of the functions if the code is called
from C++ rather than from C/Matlab
---
 main_func/regularizers_CPU/FGP_TV_core.h           | 11 ++++--
 main_func/regularizers_CPU/LLT_model_core.h        | 10 ++++--
 main_func/regularizers_CPU/PatchBased_Regul_core.h | 40 +++++++++++++++++++++-
 main_func/regularizers_CPU/SplitBregman_TV_core.h  | 12 +++++--
 main_func/regularizers_CPU/TGV_PD_core.h           | 35 +++++++++++++++++--
 5 files changed, 98 insertions(+), 10 deletions(-)

(limited to 'main_func')

diff --git a/main_func/regularizers_CPU/FGP_TV_core.h b/main_func/regularizers_CPU/FGP_TV_core.h
index d256c9a..7e7229a 100644
--- a/main_func/regularizers_CPU/FGP_TV_core.h
+++ b/main_func/regularizers_CPU/FGP_TV_core.h
@@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by
 Visual Analytics and Imaging System Group of the Science Technology
 Facilities Council, STFC
 
-Copyright 2017 Daniil Kazanteev
+Copyright 2017 Daniil Kazantsev
 Copyright 2017 Srikanth Nagella, Edoardo Pasca
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,7 +17,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-#include <matrix.h>
+//#include <matrix.h>
 #include <math.h>
 #include <stdlib.h>
 #include <memory.h>
@@ -51,7 +51,9 @@ limitations under the License.
 * D. Kazantsev, 2016-17
 *
 */
-
+#ifdef __cplusplus
+extern "C" {
+#endif
 //float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
 float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
 float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
@@ -64,3 +66,6 @@ float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R
 float Proj_func3D(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ);
 float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int dimX, int dimY, int dimZ);
 float Obj_func_CALC3D(float *A, float *D, float *funcvalA, float lambda, int dimX, int dimY, int dimZ);
+#ifdef __cplusplus
+}
+#endif
diff --git a/main_func/regularizers_CPU/LLT_model_core.h b/main_func/regularizers_CPU/LLT_model_core.h
index 560bb9c..13fce5a 100644
--- a/main_func/regularizers_CPU/LLT_model_core.h
+++ b/main_func/regularizers_CPU/LLT_model_core.h
@@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by
 Visual Analytics and Imaging System Group of the Science Technology
 Facilities Council, STFC
 
-Copyright 2017 Daniil Kazanteev
+Copyright 2017 Daniil Kazantsev
 Copyright 2017 Srikanth Nagella, Edoardo Pasca
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,7 +17,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-#include <matrix.h>
+//#include <matrix.h>
 #include <math.h>
 #include <stdlib.h>
 #include <memory.h>
@@ -28,6 +28,9 @@ limitations under the License.
 #define EPS 0.01
 
 /* 2D functions */
+#ifdef __cplusplus
+extern "C" {
+#endif
 float der2D(float *U, float *D1, float *D2, int dimX, int dimY, int dimZ);
 float div_upd2D(float *U0, float *U, float *D1, float *D2, int dimX, int dimY, int dimZ, float lambda, float tau);
 
@@ -38,3 +41,6 @@ float calcMap(float *U, unsigned short *Map, int dimX, int dimY, int dimZ);
 float cleanMap(unsigned short *Map, int dimX, int dimY, int dimZ);
 
 //float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/main_func/regularizers_CPU/PatchBased_Regul_core.h b/main_func/regularizers_CPU/PatchBased_Regul_core.h
index 5aa6415..d4a8a46 100644
--- a/main_func/regularizers_CPU/PatchBased_Regul_core.h
+++ b/main_func/regularizers_CPU/PatchBased_Regul_core.h
@@ -19,13 +19,51 @@ limitations under the License.
 
 #define _USE_MATH_DEFINES
 
-#include <matrix.h>
+//#include <matrix.h>
 #include <math.h>
 #include <stdlib.h>
 #include <memory.h>
 #include <stdio.h>
 #include "omp.h"
 
+/* C-OMP implementation of  patch-based (PB) regularization (2D and 3D cases).
+* This method finds self-similar patches in data and performs one fixed point iteration to mimimize the PB penalty function
+*
+* References: 1. Yang Z. & Jacob M. "Nonlocal Regularization of Inverse Problems"
+*             2. Kazantsev D. et al. "4D-CT reconstruction with unified spatial-temporal patch-based regularization"
+*
+* Input Parameters (mandatory):
+* 1. Image (2D or 3D)
+* 2. ratio of the searching window (e.g. 3 = (2*3+1) = 7 pixels window)
+* 3. ratio of the similarity window (e.g. 1 = (2*1+1) = 3 pixels window)
+* 4. h - parameter for the PB penalty function
+* 5. lambda - regularization parameter
+
+* Output:
+* 1. regularized (denoised) Image (N x N)/volume (N x N x N)
+*
+* Quick 2D denoising example in Matlab:
+Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+u0 = Im + .03*randn(size(Im)); u0(u0<0) = 0; % adding noise
+ImDen = PB_Regul_CPU(single(u0), 3, 1, 0.08, 0.05);
+*
+* Please see more tests in a file:
+TestTemporalSmoothing.m
+
+*
+* Matlab + C/mex compilers needed
+* to compile with OMP support: mex PB_Regul_CPU.c CFLAGS="\$CFLAGS -fopenmp -Wall" LDFLAGS="\$LDFLAGS -fopenmp"
+*
+* D. Kazantsev *
+* 02/07/2014
+* Harwell, UK
+*/
+#ifdef __cplusplus
+extern "C" {
+#endif
 float pad_crop(float *A, float *Ap, int OldSizeX, int OldSizeY, int OldSizeZ, int NewSizeX, int NewSizeY, int NewSizeZ, int padXY, int switchpad_crop);
 float PB_FUNC2D(float *A, float *B, int dimX, int dimY, int padXY, int SearchW, int SimilW, float h, float lambda);
 float PB_FUNC3D(float *A, float *B, int dimX, int dimY, int dimZ, int padXY, int SearchW, int SimilW, float h, float lambda);
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/main_func/regularizers_CPU/SplitBregman_TV_core.h b/main_func/regularizers_CPU/SplitBregman_TV_core.h
index 78aef09..6ed3ff9 100644
--- a/main_func/regularizers_CPU/SplitBregman_TV_core.h
+++ b/main_func/regularizers_CPU/SplitBregman_TV_core.h
@@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by
 Visual Analytics and Imaging System Group of the Science Technology
 Facilities Council, STFC
 
-Copyright 2017 Daniil Kazanteev
+Copyright 2017 Daniil Kazantsev
 Copyright 2017 Srikanth Nagella, Edoardo Pasca
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,7 +16,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
-#include <matrix.h>
+//#include <matrix.h>
 #include <math.h>
 #include <stdlib.h>
 #include <memory.h>
@@ -49,6 +49,10 @@ limitations under the License.
 * D. Kazantsev, 2016*
 */
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 //float copyIm(float *A, float *B, int dimX, int dimY, int dimZ);
 float gauss_seidel2D(float *U, float *A, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda, float mu);
 float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, int dimX, int dimY, float lambda);
@@ -59,3 +63,7 @@ float gauss_seidel3D(float *U, float *A, float *Dx, float *Dy, float *Dz, float
 float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda);
 float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ, float lambda);
 float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int dimX, int dimY, int dimZ);
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/main_func/regularizers_CPU/TGV_PD_core.h b/main_func/regularizers_CPU/TGV_PD_core.h
index cbe7ea4..d5378df 100644
--- a/main_func/regularizers_CPU/TGV_PD_core.h
+++ b/main_func/regularizers_CPU/TGV_PD_core.h
@@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by
 Visual Analytics and Imaging System Group of the Science Technology
 Facilities Council, STFC
 
-Copyright 2017 Daniil Kazanteev
+Copyright 2017 Daniil Kazantsev
 Copyright 2017 Srikanth Nagella, Edoardo Pasca
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,7 +17,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-#include <matrix.h>
+//#include <matrix.h>
 #include <math.h>
 #include <stdlib.h>
 #include <memory.h>
@@ -25,6 +25,34 @@ limitations under the License.
 #include "omp.h"
 #include "utils.h"
 
+/* C-OMP implementation of Primal-Dual denoising method for
+* Total Generilized Variation (TGV)-L2 model (2D case only)
+*
+* Input Parameters:
+* 1. Noisy image/volume (2D)
+* 2. lambda - regularization parameter
+* 3. parameter to control first-order term (alpha1)
+* 4. parameter to control the second-order term (alpha0)
+* 5. Number of CP iterations
+*
+* Output:
+* Filtered/regularized image
+*
+* Example:
+* figure;
+* Im = double(imread('lena_gray_256.tif'))/255;  % loading image
+* u0 = Im + .03*randn(size(Im)); % adding noise
+* tic; u = PrimalDual_TGV(single(u0), 0.02, 1.3, 1, 550); toc;
+*
+* to compile with OMP support: mex TGV_PD.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+* References:
+* K. Bredies "Total Generalized Variation"
+*
+* 28.11.16/Harwell
+*/
+#ifdef __cplusplus
+extern "C" {
+#endif
 /* 2D functions */
 float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, int dimZ, float sigma);
 float ProjP_2D(float *P1, float *P2, int dimX, int dimY, int dimZ, float alpha1);
@@ -34,3 +62,6 @@ float DivProjP_2D(float *U, float *A, float *P1, float *P2, int dimX, int dimY,
 float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, int dimZ, float tau);
 float newU(float *U, float *U_old, int dimX, int dimY, int dimZ);
 //float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
+#ifdef __cplusplus
+}
+#endif
-- 
cgit v1.2.3


From c9834452a55de7e55301fa95ffdf8768694400b3 Mon Sep 17 00:00:00 2001
From: Edoardo Pasca <edo.paskino@gmail.com>
Date: Fri, 4 Aug 2017 16:14:41 +0100
Subject: extern C fix

---
 main_func/regularizers_CPU/utils.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'main_func')

diff --git a/main_func/regularizers_CPU/utils.h b/main_func/regularizers_CPU/utils.h
index c720006..53463a3 100644
--- a/main_func/regularizers_CPU/utils.h
+++ b/main_func/regularizers_CPU/utils.h
@@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by
 Visual Analytics and Imaging System Group of the Science Technology
 Facilities Council, STFC
 
-Copyright 2017 Daniil Kazanteev
+Copyright 2017 Daniil Kazantsev
 Copyright 2017 Srikanth Nagella, Edoardo Pasca
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,11 +17,16 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-#include <matrix.h>
-#include <math.h>
+//#include <matrix.h>
+//#include <math.h>
 #include <stdlib.h>
 #include <memory.h>
-#include <stdio.h>
+//#include <stdio.h>
 #include "omp.h"
-
+#ifdef __cplusplus
+extern "C" {
+#endif
 float copyIm(float *A, float *U, int dimX, int dimY, int dimZ);
+#ifdef __cplusplus
+}
+#endif
-- 
cgit v1.2.3


From 927e1caa9b1d0df9f4f2eb95170f6bd8be657656 Mon Sep 17 00:00:00 2001
From: Edoardo Pasca <edo.paskino@gmail.com>
Date: Fri, 4 Aug 2017 16:48:41 +0100
Subject: typo and include matrix.h

---
 main_func/regularizers_CPU/PatchBased_Regul.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'main_func')

diff --git a/main_func/regularizers_CPU/PatchBased_Regul.c b/main_func/regularizers_CPU/PatchBased_Regul.c
index 59eb3b3..9c925df 100644
--- a/main_func/regularizers_CPU/PatchBased_Regul.c
+++ b/main_func/regularizers_CPU/PatchBased_Regul.c
@@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by
 Visual Analytics and Imaging System Group of the Science Technology
 Facilities Council, STFC
 
-Copyright 2017 Daniil Kazanteev
+Copyright 2017 Daniil Kazantsev
 Copyright 2017 Srikanth Nagella, Edoardo Pasca
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -18,6 +18,7 @@ limitations under the License.
 */
 
 #include "mex.h"
+#include "matrix.h"
 #include "PatchBased_Regul_core.h"
 
 
-- 
cgit v1.2.3


From c20554d5bec0168a54a6c472c0cc2f378511eccc Mon Sep 17 00:00:00 2001
From: Edoardo Pasca <edo.paskino@gmail.com>
Date: Mon, 7 Aug 2017 16:46:51 +0100
Subject: typo

---
 main_func/regularizers_CPU/TGV_PD.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'main_func')

diff --git a/main_func/regularizers_CPU/TGV_PD.c b/main_func/regularizers_CPU/TGV_PD.c
index 6593d8e..c9cb440 100644
--- a/main_func/regularizers_CPU/TGV_PD.c
+++ b/main_func/regularizers_CPU/TGV_PD.c
@@ -3,7 +3,7 @@ This work is part of the Core Imaging Library developed by
 Visual Analytics and Imaging System Group of the Science Technology
 Facilities Council, STFC
 
-Copyright 2017 Daniil Kazanteev
+Copyright 2017 Daniil Kazantsev
 Copyright 2017 Srikanth Nagella, Edoardo Pasca
 
 Licensed under the Apache License, Version 2.0 (the "License");
-- 
cgit v1.2.3


From 8094acfff8659e5c959066495936a8d5aff56fd5 Mon Sep 17 00:00:00 2001
From: Edoardo Pasca <edo.paskino@gmail.com>
Date: Mon, 7 Aug 2017 16:47:28 +0100
Subject: removed copyIm

---
 main_func/regularizers_CPU/TGV_PD_core.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'main_func')

diff --git a/main_func/regularizers_CPU/TGV_PD_core.c b/main_func/regularizers_CPU/TGV_PD_core.c
index 1164b73..4139d10 100644
--- a/main_func/regularizers_CPU/TGV_PD_core.c
+++ b/main_func/regularizers_CPU/TGV_PD_core.c
@@ -186,14 +186,6 @@ float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2,
         }}
     return 1;
 }
-/* Copy Image */
-float copyIm(float *A, float *U, int dimX, int dimY, int dimZ)
-{
-    int j;
-#pragma omp parallel for shared(A, U) private(j)
-    for(j=0; j<dimX*dimY*dimZ; j++)  U[j] = A[j];
-    return *U;
-}
 /*********************3D *********************/
 
 /*Calculating dual variable P (using forward differences)*/
-- 
cgit v1.2.3