Merge pull request #49 from vais-ral/dTV

dTV regulariser (2D/3D CPU/GPU)
author: Daniil Kazantsev <dkazanc3@googlemail.com> 2018-04-12 12:09:38 +0100
committer: GitHub <noreply@github.com> 2018-04-12 12:09:38 +0100
commit: 7ae26b005c5f3d9ca0181ab1cf06b6ee8df5ed69 (patch)
tree: 225dcf0db9dc7e0f0fc5fc001a7efb14c19658f8
parent: aa99eb8a9bd47ecd6e4d3d1e8c9f0cfbefb4f7bb (diff)
parent: 22f6e22cbe6db04c6bbe8d259ce761e3748d7102 (diff)
download: regularization-7ae26b005c5f3d9ca0181ab1cf06b6ee8df5ed69.tar.gz
regularization-7ae26b005c5f3d9ca0181ab1cf06b6ee8df5ed69.tar.bz2
regularization-7ae26b005c5f3d9ca0181ab1cf06b6ee8df5ed69.tar.xz
regularization-7ae26b005c5f3d9ca0181ab1cf06b6ee8df5ed69.zip
25 files changed, 2128 insertions, 138 deletions
diff --git a/Core/CMakeLists.txt b/Core/CMakeLists.txt
index 3bc5ceb..26912b9 100644
--- a/Core/CMakeLists.txt
+++ b/Core/CMakeLists.txt
@@ -89,6 +89,7 @@ add_library(cilreg SHARED
 	    #${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/SplitBregman_TV_core.c
 	    #${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/TGV_PD_core.c
         ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/ROF_TV_core.c
+        ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/FGP_dTV_core.c
 	    ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/utils.c
 	    )
 target_link_libraries(cilreg ${EXTRA_LIBRARIES} )
@@ -129,6 +130,7 @@ if (CUDA_FOUND)
   CUDA_ADD_LIBRARY(cilregcuda SHARED
 	${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_ROF_GPU_core.cu
 	${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_FGP_GPU_core.cu
+	${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/dTV_FGP_GPU_core.cu
   )
   if (UNIX)
     message ("I'd install into ${CMAKE_INSTALL_PREFIX}/lib")
diff --git a/Core/regularisers_CPU/FGP_dTV_core.c b/Core/regularisers_CPU/FGP_dTV_core.c
new file mode 100644
index 0000000..f6b4f79
--- /dev/null
+++ b/Core/regularisers_CPU/FGP_dTV_core.c
@@ -0,0 +1,440 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "FGP_dTV_core.h"
+
+/* C-OMP implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
+ * which employs structural similarity of the level sets of two images/volumes, see [1,2]
+ * The current implementation updates image 1 while image 2 is being fixed.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
+ * 3. lambdaPar - regularization parameter [REQUIRED]
+ * 4. Number of iterations [OPTIONAL]
+ * 5. eplsilon: tolerance constant [OPTIONAL]
+ * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * 
+ * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
+ * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
+ * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
+ *
+ * Output:
+ * [1] Filtered/regularized image/volume
+ *
+ * This function is based on the Matlab's codes and papers by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
+ */
+ 
+float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+{
+	int ll, j, DimTotal;
+	float re, re1;
+	float tk = 1.0f;
+    float tkp1=1.0f;
+    int count = 0;
+	
+	if (dimZ <= 1) {
+		/*2D case */
+		float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL, *InputRef_x=NULL, *InputRef_y=NULL;
+		DimTotal = dimX*dimY;
+		
+        Output_prev = calloc(DimTotal, sizeof(float));
+        P1 = calloc(DimTotal, sizeof(float));
+        P2 = calloc(DimTotal, sizeof(float));
+        P1_prev = calloc(DimTotal, sizeof(float));
+        P2_prev = calloc(DimTotal, sizeof(float));
+        R1 = calloc(DimTotal, sizeof(float));
+        R2 = calloc(DimTotal, sizeof(float)); 
+        InputRef_x = calloc(DimTotal, sizeof(float)); 
+        InputRef_y = calloc(DimTotal, sizeof(float)); 
+
+		/* calculate gradient field (smoothed) for the reference image */
+		GradNorm_func2D(InputRef, InputRef_x, InputRef_y, eta, dimX, dimY);
+		
+		/* begin iterations */
+        for(ll=0; ll<iterationsNumb; ll++) {
+            
+            /*projects a 2D vector field R-1,2 onto the orthogonal complement of another 2D vector field InputRef_xy*/                    
+            ProjectVect_func2D(R1, R2, InputRef_x, InputRef_y, dimX, dimY);
+            
+            /* computing the gradient of the objective function */
+            Obj_dfunc2D(Input, Output, R1, R2, lambdaPar, dimX, dimY);
+            
+            /* apply nonnegativity */
+            if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}
+            
+            /*Taking a step towards minus of the gradient*/
+            Grad_dfunc2D(P1, P2, Output, R1, R2, InputRef_x, InputRef_y, lambdaPar, dimX, dimY);
+            
+            /* projection step */
+            Proj_dfunc2D(P1, P2, methodTV, DimTotal);
+            
+            /*updating R and t*/
+            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+            Rupd_dfunc2D(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, DimTotal);
+            
+            /* check early stopping criteria */
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<DimTotal; j++)
+            {
+                re += pow(Output[j] - Output_prev[j],2);
+                re1 += pow(Output[j],2);
+            }
+            re = sqrt(re)/sqrt(re1);
+            if (re < epsil)  count++;
+				if (count > 4) break;
+            
+            /*storing old values*/
+            copyIm(Output, Output_prev, dimX, dimY, 1);
+            copyIm(P1, P1_prev, dimX, dimY, 1);
+            copyIm(P2, P2_prev, dimX, dimY, 1);
+            tk = tkp1;
+        }
+        if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", ll);   
+		free(Output_prev); free(P1); free(P2); free(P1_prev); free(P2_prev); free(R1); free(R2); free(InputRef_x); free(InputRef_y);
+	}
+	else {
+		/*3D case*/
+		float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL, *InputRef_x=NULL, *InputRef_y=NULL, *InputRef_z=NULL; 
+		DimTotal = dimX*dimY*dimZ;
+        
+        Output_prev = calloc(DimTotal, sizeof(float));
+        P1 = calloc(DimTotal, sizeof(float));
+        P2 = calloc(DimTotal, sizeof(float));
+        P3 = calloc(DimTotal, sizeof(float));
+        P1_prev = calloc(DimTotal, sizeof(float));
+        P2_prev = calloc(DimTotal, sizeof(float));
+        P3_prev = calloc(DimTotal, sizeof(float));
+        R1 = calloc(DimTotal, sizeof(float));
+        R2 = calloc(DimTotal, sizeof(float)); 
+        R3 = calloc(DimTotal, sizeof(float)); 
+        InputRef_x = calloc(DimTotal, sizeof(float)); 
+        InputRef_y = calloc(DimTotal, sizeof(float)); 
+        InputRef_z = calloc(DimTotal, sizeof(float)); 
+
+		/* calculate gradient field (smoothed) for the reference volume */
+		GradNorm_func3D(InputRef, InputRef_x, InputRef_y, InputRef_z, eta, dimX, dimY, dimZ);
+		
+		/* begin iterations */
+        for(ll=0; ll<iterationsNumb; ll++) {
+
+			 /*projects a 3D vector field R-1,2,3 onto the orthogonal complement of another 3D vector field InputRef_xyz*/
+            ProjectVect_func3D(R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ);
+            
+            /* computing the gradient of the objective function */
+            Obj_dfunc3D(Input, Output, R1, R2, R3, lambdaPar, dimX, dimY, dimZ);
+            
+            /* apply nonnegativity */
+            if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}  
+            
+            /*Taking a step towards minus of the gradient*/
+            Grad_dfunc3D(P1, P2, P3, Output, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, lambdaPar, dimX, dimY, dimZ);
+            
+            /* projection step */
+            Proj_dfunc3D(P1, P2, P3, methodTV, DimTotal);
+            
+            /*updating R and t*/
+            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+            Rupd_dfunc3D(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, DimTotal);
+            
+            /* calculate norm - stopping rules*/
+            re = 0.0f; re1 = 0.0f;
+            for(j=0; j<DimTotal; j++)
+            {
+                re += pow(Output[j] - Output_prev[j],2);
+                re1 += pow(Output[j],2);
+            }
+            re = sqrt(re)/sqrt(re1);
+            /* stop if the norm residual is less than the tolerance EPS */
+            if (re < epsil)  count++;
+            if (count > 4) break;            
+                        
+            /*storing old values*/
+            copyIm(Output, Output_prev, dimX, dimY, dimZ);
+            copyIm(P1, P1_prev, dimX, dimY, dimZ);
+            copyIm(P2, P2_prev, dimX, dimY, dimZ);
+            copyIm(P3, P3_prev, dimX, dimY, dimZ);
+            tk = tkp1;            
+        }	
+		if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", ll);   
+		free(Output_prev); free(P1); free(P2); free(P3); free(P1_prev); free(P2_prev); free(P3_prev); free(R1); free(R2); free(R3); free(InputRef_x); free(InputRef_y); free(InputRef_z);
+	}
+	return *Output;
+}
+
+
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+
+float GradNorm_func2D(float *B, float *B_x, float *B_y, float eta, int dimX, int dimY)
+{
+    int i,j,index;
+    float val1, val2, gradX, gradY, magn;
+#pragma omp parallel for shared(B, B_x, B_y) private(i,j,index,val1,val2,gradX,gradY,magn)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+			index = j*dimX+i;
+            /* zero boundary conditions */
+            if (i == dimX-1) {val1 = 0.0f;} else {val1 = B[j*dimX + (i+1)];}
+            if (j == dimY-1) {val2 = 0.0f;} else {val2 = B[(j+1)*dimX + i];}
+            gradX = val1 - B[index];
+            gradY = val2 - B[index];
+            magn = pow(gradX,2) + pow(gradY,2);
+            magn = sqrt(magn + pow(eta,2)); /* the eta-smoothed gradients magnitude */
+            B_x[index] = gradX/magn;
+            B_y[index] = gradY/magn;
+        }}
+    return 1;
+}
+
+float ProjectVect_func2D(float *R1, float *R2, float *B_x, float *B_y, int dimX, int dimY)
+{
+    int i,j,index;
+    float in_prod;
+#pragma omp parallel for shared(R1, R2, B_x, B_y) private(index,i,j,in_prod)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+			index = j*dimX+i;
+            in_prod = R1[index]*B_x[index] + R2[index]*B_y[index];   /* calculate inner product */
+            R1[index] = R1[index] - in_prod*B_x[index];
+            R2[index] = R2[index] - in_prod*B_y[index];
+        }}
+    return 1;
+}
+
+float Obj_dfunc2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY)
+{
+    float val1, val2;
+    int i,j,index;
+#pragma omp parallel for shared(A,D,R1,R2) private(index,i,j,val1,val2)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+			index = j*dimX+i;
+            /* boundary conditions  */
+            if (i == 0) {val1 = 0.0f;} else {val1 = R1[j*dimX + (i-1)];}
+            if (j == 0) {val2 = 0.0f;} else {val2 = R2[(j-1)*dimX + i];}
+            D[index] = A[index] - lambda*(R1[index] + R2[index] - val1 - val2);
+        }}
+    return *D;
+}
+float Grad_dfunc2D(float *P1, float *P2, float *D, float *R1, float *R2, float *B_x, float *B_y, float lambda, int dimX, int dimY)
+{
+    float val1, val2, multip, in_prod;
+    int i,j,index;
+    multip = (1.0f/(8.0f*lambda));
+#pragma omp parallel for shared(P1,P2,D,R1,R2,B_x,B_y,multip) private(i,j,index,val1,val2,in_prod)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+			index = j*dimX+i;
+            /* boundary conditions */
+            if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[j*dimX + (i+1)];
+            if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(j+1)*dimX + i];
+            
+            in_prod = val1*B_x[index] + val2*B_y[index];   /* calculate inner product */
+            val1 = val1 - in_prod*B_x[index];
+            val2 = val2 - in_prod*B_y[index];
+            
+            P1[index] = R1[index] + multip*val1;
+            P2[index] = R2[index] + multip*val2;
+
+        }}
+    return 1;
+}
+float Proj_dfunc2D(float *P1, float *P2, int methTV, int DimTotal)
+{
+    float val1, val2, denom, sq_denom;
+    int i;
+    if (methTV == 0) {
+        /* isotropic TV*/
+#pragma omp parallel for shared(P1,P2) private(i,denom,sq_denom)
+        for(i=0; i<DimTotal; i++) {
+                denom = powf(P1[i],2) +  powf(P2[i],2);
+                if (denom > 1.0f) {
+					sq_denom = 1.0f/sqrtf(denom);
+                    P1[i] = P1[i]*sq_denom;
+                    P2[i] = P2[i]*sq_denom;
+                }
+            }
+    }
+    else {
+        /* anisotropic TV*/
+#pragma omp parallel for shared(P1,P2) private(i,val1,val2)
+        for(i=0; i<DimTotal; i++) {
+                val1 = fabs(P1[i]);
+                val2 = fabs(P2[i]);
+                if (val1 < 1.0f) {val1 = 1.0f;}
+                if (val2 < 1.0f) {val2 = 1.0f;}
+                P1[i] = P1[i]/val1;
+                P2[i] = P2[i]/val2;
+            }
+    }
+    return 1;
+}
+float Rupd_dfunc2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int DimTotal)
+{
+    int i;
+    float multip;
+    multip = ((tk-1.0f)/tkp1);
+#pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i)
+    for(i=0; i<DimTotal; i++) {       
+            R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
+            R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
+        }
+    return 1;
+}
+
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+float GradNorm_func3D(float *B, float *B_x, float *B_y, float *B_z, float eta, int dimX, int dimY, int dimZ)
+{
+    int i, j, k, index;
+    float val1, val2, val3, gradX, gradY, gradZ, magn;
+#pragma omp parallel for shared(B, B_x, B_y, B_z) private(i,j,k,index,val1,val2,val3,gradX,gradY,gradZ,magn)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+			index = (dimX*dimY)*k + j*dimX+i;
+			
+            /* zero boundary conditions */
+            if (i == dimX-1) {val1 = 0.0f;} else {val1 = B[(dimX*dimY)*k + j*dimX+(i+1)];}
+            if (j == dimY-1) {val2 = 0.0f;} else {val2 = B[(dimX*dimY)*k + (j+1)*dimX+i];}
+            if (k == dimZ-1) {val3 = 0.0f;} else {val3 = B[(dimX*dimY)*(k+1) + (j)*dimX+i];}
+            
+            gradX = val1 - B[index];
+            gradY = val2 - B[index];
+            gradZ = val3 - B[index];
+            magn = pow(gradX,2) + pow(gradY,2) + pow(gradZ,2);
+            magn = sqrt(magn + pow(eta,2)); /* the eta-smoothed gradients magnitude */
+            B_x[index] = gradX/magn;
+            B_y[index] = gradY/magn;
+            B_z[index] = gradZ/magn;
+        }}}
+    return 1;
+}
+
+float ProjectVect_func3D(float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, int dimX, int dimY, int dimZ)
+{
+    int i,j,k,index;
+    float in_prod;
+#pragma omp parallel for shared(R1, R2, R3, B_x, B_y, B_z) private(index,i,j,k,in_prod)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+			index = (dimX*dimY)*k + j*dimX+i;
+            in_prod = R1[index]*B_x[index] + R2[index]*B_y[index] + R3[index]*B_z[index];   /* calculate inner product */
+            R1[index] = R1[index] - in_prod*B_x[index];
+            R2[index] = R2[index] - in_prod*B_y[index];
+            R3[index] = R3[index] - in_prod*B_z[index];
+        }}}
+    return 1;
+}
+
+float Obj_dfunc3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ)
+{
+    float val1, val2, val3;
+    int i,j,k,index;
+#pragma omp parallel for shared(A,D,R1,R2,R3) private(index,i,j,k,val1,val2,val3)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+				index = (dimX*dimY)*k + j*dimX+i;
+                /* boundary conditions */
+                if (i == 0) {val1 = 0.0f;} else {val1 = R1[(dimX*dimY)*k + j*dimX + (i-1)];}
+                if (j == 0) {val2 = 0.0f;} else {val2 = R2[(dimX*dimY)*k + (j-1)*dimX + i];}
+                if (k == 0) {val3 = 0.0f;} else {val3 = R3[(dimX*dimY)*(k-1) + j*dimX + i];}
+                D[index] = A[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3);
+            }}}
+    return *D;
+}
+float Grad_dfunc3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, float lambda, int dimX, int dimY, int dimZ)
+{
+    float val1, val2, val3, multip, in_prod;
+    int i,j,k, index;
+    multip = (1.0f/(26.0f*lambda));
+#pragma omp parallel for shared(P1,P2,P3,D,R1,R2,R3,multip) private(index,i,j,k,val1,val2,val3,in_prod)
+    for(i=0; i<dimX; i++) {
+        for(j=0; j<dimY; j++) {
+            for(k=0; k<dimZ; k++) {
+				index = (dimX*dimY)*k + j*dimX+i;				
+                /* boundary conditions */
+                if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[(dimX*dimY)*k + j*dimX + (i+1)];
+                if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(dimX*dimY)*k + (j+1)*dimX + i];
+                if (k == dimZ-1) val3 = 0.0f; else val3 = D[index] - D[(dimX*dimY)*(k+1) + j*dimX + i];
+                
+                in_prod = val1*B_x[index] + val2*B_y[index] + val3*B_z[index];   /* calculate inner product */
+                val1 = val1 - in_prod*B_x[index];
+                val2 = val2 - in_prod*B_y[index];
+                val3 = val3 - in_prod*B_z[index];
+                
+                P1[index] = R1[index] + multip*val1;
+                P2[index] = R2[index] + multip*val2;
+                P3[index] = R3[index] + multip*val3;
+            }}}
+    return 1;
+}
+float Proj_dfunc3D(float *P1, float *P2, float *P3, int methTV, int DimTotal)
+{		
+    float val1, val2, val3, denom, sq_denom;
+    int i;
+    if (methTV == 0) {
+	/* isotropic TV*/
+	#pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3,sq_denom)
+    for(i=0; i<DimTotal; i++) {        
+				denom = powf(P1[i],2) + powf(P2[i],2) + powf(P3[i],2);
+                if (denom > 1.0f) {
+					sq_denom = 1.0f/sqrtf(denom);
+                    P1[i] = P1[i]*sq_denom;
+                    P2[i] = P2[i]*sq_denom;
+                    P3[i] = P3[i]*sq_denom;
+                }
+			}
+	}    
+    else {
+    /* anisotropic TV*/
+#pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3)
+    for(i=0; i<DimTotal; i++) {
+                val1 = fabs(P1[i]);
+                val2 = fabs(P2[i]);
+                val3 = fabs(P3[i]);
+                if (val1 < 1.0f) {val1 = 1.0f;}
+                if (val2 < 1.0f) {val2 = 1.0f;}
+                if (val3 < 1.0f) {val3 = 1.0f;}                
+                P1[i] = P1[i]/val1;
+                P2[i] = P2[i]/val2;
+                P3[i] = P3[i]/val3;
+            }
+		}
+    return 1;
+}
+float Rupd_dfunc3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int DimTotal)
+{
+    int i;
+    float multip;
+    multip = ((tk-1.0f)/tkp1);
+#pragma omp parallel for shared(P1,P2,P3,P1_old,P2_old,P3_old,R1,R2,R3,multip) private(i)
+    for(i=0; i<DimTotal; i++) {
+                R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
+                R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
+                R3[i] = P3[i] + multip*(P3[i] - P3_old[i]);
+            }
+    return 1;
+}
diff --git a/Core/regularisers_CPU/FGP_dTV_core.h b/Core/regularisers_CPU/FGP_dTV_core.h
new file mode 100644
index 0000000..95dc249
--- /dev/null
+++ b/Core/regularisers_CPU/FGP_dTV_core.h
@@ -0,0 +1,72 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+//#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+/* C-OMP implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
+ * which employs structural similarity of the level sets of two images/volumes, see [1,2]
+ * The current implementation updates image 1 while image 2 is being fixed.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
+ * 3. lambdaPar - regularization parameter [REQUIRED]
+ * 4. Number of iterations [OPTIONAL]
+ * 5. eplsilon: tolerance constant [OPTIONAL]
+ * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * 
+ * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
+ * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
+ * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
+ *
+ * Output:
+ * [1] Filtered/regularized image/volume
+ *
+ * This function is based on the Matlab's codes and papers by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
+ */
+ 
+#ifdef __cplusplus
+extern "C" {
+#endif
+float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+
+CCPI_EXPORT float GradNorm_func2D(float *B, float *B_x, float *B_y, float eta, int dimX, int dimY);
+CCPI_EXPORT float ProjectVect_func2D(float *R1, float *R2, float *B_x, float *B_y, int dimX, int dimY);
+CCPI_EXPORT float Obj_dfunc2D(float *A, float *D, float *R1, float *R2, float lambda, int dimX, int dimY);
+CCPI_EXPORT float Grad_dfunc2D(float *P1, float *P2, float *D, float *R1, float *R2, float *B_x, float *B_y, float lambda, int dimX, int dimY);
+CCPI_EXPORT float Proj_dfunc2D(float *P1, float *P2, int methTV, int DimTotal);
+CCPI_EXPORT float Rupd_dfunc2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, int DimTotal);
+
+CCPI_EXPORT float GradNorm_func3D(float *B, float *B_x, float *B_y, float *B_z, float eta, int dimX, int dimY, int dimZ);
+CCPI_EXPORT float ProjectVect_func3D(float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, int dimX, int dimY, int dimZ);
+CCPI_EXPORT float Obj_dfunc3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, int dimX, int dimY, int dimZ);
+CCPI_EXPORT float Grad_dfunc3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, float lambda, int dimX, int dimY, int dimZ);
+CCPI_EXPORT float Proj_dfunc3D(float *P1, float *P2, float *P3, int methTV, int DimTotal);
+CCPI_EXPORT float Rupd_dfunc3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, int DimTotal);
+#ifdef __cplusplus
+}
+#endif
diff --git a/Core/regularisers_GPU/TV_FGP_GPU_core.cu b/Core/regularisers_GPU/TV_FGP_GPU_core.cu
index 314a367..3fbbcde 100755
--- a/Core/regularisers_GPU/TV_FGP_GPU_core.cu
+++ b/Core/regularisers_GPU/TV_FGP_GPU_core.cu
@@ -417,14 +417,14 @@ extern "C" void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, in
             checkCudaErrors(cudaPeekAtLastError() );
         
             if (epsil != 0.0f) {
-                /* calculate norm - stopping rules using the Thrust library */					
+                /* calculate norm - stopping rules using the Thrust library */
                 ResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize);
                 checkCudaErrors( cudaDeviceSynchronize() );
                 checkCudaErrors(cudaPeekAtLastError() );               
                 
-                thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize);  		
-                float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));		
-                thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);  		
+                thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize); 
+                float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
+                thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);
                 float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
                     
                 re = (reduction/reduction2);      
diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
new file mode 100644
index 0000000..04047a5
--- /dev/null
+++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
@@ -0,0 +1,751 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/ 
+
+#include "dTV_FGP_GPU_core.h"
+#include <thrust/device_vector.h>
+#include <thrust/transform_reduce.h>
+
+/* CUDA implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
+ * which employs structural similarity of the level sets of two images/volumes, see [1,2]
+ * The current implementation updates image 1 while image 2 is being fixed.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
+ * 3. lambdaPar - regularization parameter [REQUIRED]
+ * 4. Number of iterations [OPTIONAL]
+ * 5. eplsilon: tolerance constant [OPTIONAL]
+ * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * 
+ * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
+ * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
+ * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
+ *
+ * Output:
+ * [1] Filtered/regularized image/volume
+ *
+ * This function is based on the Matlab's codes and papers by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
+ */
+ 
+
+// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
+#define checkCudaErrors(err)           __checkCudaErrors (err, __FILE__, __LINE__)
+
+inline void __checkCudaErrors(cudaError err, const char *file, const int line)
+{
+    if (cudaSuccess != err)
+    {
+        fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
+                file, line, (int)err, cudaGetErrorString(err));
+        exit(EXIT_FAILURE);
+    }
+}
+
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+struct square { __host__ __device__ float operator()(float x) { return x * x; } };
+
+/************************************************/
+/*****************2D modules*********************/
+/************************************************/
+
+__global__ void GradNorm_func2D_kernel(float *Refd, float *Refd_x, float *Refd_y, float eta, int N, int M, int ImSize)
+{
+    
+    float val1, val2, gradX, gradY, magn;
+    //calculate each thread global index
+    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = xIndex + N*yIndex; 
+    
+    if ((xIndex < N) && (yIndex < M)) {        
+        /* boundary conditions */
+        if (xIndex >= N-1) val1 = 0.0f; else val1 =  Refd[(xIndex+1) + N*yIndex];
+        if (yIndex >= M-1) val2 = 0.0f; else val2 =  Refd[(xIndex) + N*(yIndex + 1)];        
+        
+            gradX = val1 - Refd[index];
+            gradY = val2 - Refd[index];
+            magn = pow(gradX,2) + pow(gradY,2);
+            magn = sqrt(magn + pow(eta,2));
+            Refd_x[index] = gradX/magn;
+            Refd_y[index] = gradY/magn;         
+    }
+    return;
+}
+
+__global__ void ProjectVect_func2D_kernel(float *R1, float *R2, float *Refd_x, float *Refd_y, int N, int M, int ImSize)
+{
+    
+    float in_prod;
+    //calculate each thread global index
+    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = xIndex + N*yIndex; 
+    
+    if ((xIndex < N) && (yIndex < M)) {
+        in_prod = R1[index]*Refd_x[index] + R2[index]*Refd_y[index];   /* calculate inner product */
+        R1[index] = R1[index] - in_prod*Refd_x[index];
+        R2[index] = R2[index] - in_prod*Refd_y[index];       
+    }
+    return;
+}
+
+
+__global__ void Obj_dfunc2D_kernel(float *Ad, float *D, float *R1, float *R2, int N, int M, int ImSize, float lambda)
+{
+    
+    float val1,val2;
+    
+    //calculate each thread global index
+    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = xIndex + N*yIndex; 
+    
+    if ((xIndex < N) && (yIndex < M)) {        
+        if (xIndex <= 0) {val1 = 0.0f;} else {val1 = R1[(xIndex-1) + N*yIndex];}
+        if (yIndex <= 0) {val2 = 0.0f;} else {val2 = R2[xIndex + N*(yIndex-1)];}
+        
+        //Write final result to global memory
+        D[index] = Ad[index] - lambda*(R1[index] + R2[index] - val1 - val2);
+    }
+    return;
+}
+
+__global__ void Grad_dfunc2D_kernel(float *P1, float *P2, float *D, float *R1, float *R2,  float *Refd_x, float *Refd_y, int N, int M, int ImSize, float multip)
+{
+    
+    float val1,val2,in_prod;
+    
+    //calculate each thread global index
+    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if ((xIndex < N) && (yIndex < M)) {        
+        
+        /* boundary conditions */
+        if (xIndex >= N-1) val1 = 0.0f; else val1 = D[index] - D[(xIndex+1) + N*yIndex];
+        if (yIndex >= M-1) val2 = 0.0f; else val2 = D[index] - D[(xIndex) + N*(yIndex + 1)];
+        
+        in_prod = val1*Refd_x[index] + val2*Refd_y[index];   /* calculate inner product */
+        val1 = val1 - in_prod*Refd_x[index];
+        val2 = val2 - in_prod*Refd_y[index];   
+        
+        //Write final result to global memory
+        P1[index] = R1[index] + multip*val1;
+        P2[index] = R2[index] + multip*val2;
+    }
+    return;
+}
+
+__global__ void Proj_dfunc2D_iso_kernel(float *P1, float *P2, int N, int M, int ImSize)
+{
+    
+    float denom;    
+    //calculate each thread global index
+    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if ((xIndex < N) && (yIndex < M)) { 
+        denom = pow(P1[index],2) +  pow(P2[index],2);        
+        if (denom > 1.0f) {
+            P1[index] = P1[index]/sqrt(denom);
+            P2[index] = P2[index]/sqrt(denom);
+        }
+    }
+    return;
+}
+__global__ void Proj_dfunc2D_aniso_kernel(float *P1, float *P2, int N, int M, int ImSize)
+{
+    
+    float val1, val2;    
+    //calculate each thread global index
+    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if ((xIndex < N) && (yIndex < M)) { 
+                val1 = abs(P1[index]);
+                val2 = abs(P2[index]);
+                if (val1 < 1.0f) {val1 = 1.0f;}
+                if (val2 < 1.0f) {val2 = 1.0f;}
+                P1[index] = P1[index]/val1;
+                P2[index] = P2[index]/val2;
+    }
+    return;
+}
+__global__ void Rupd_dfunc2D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, float multip2, int N, int M, int ImSize)
+{
+    //calculate each thread global index
+    const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+    const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if ((xIndex < N) && (yIndex < M)) { 
+        R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]);
+        R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]);
+    }
+    return;
+}
+__global__ void dTVnonneg2D_kernel(float* Output, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)	{
+        if (Output[index] < 0.0f) Output[index] = 0.0f;
+    }
+}
+__global__ void dTVcopy_kernel2D(float *Input, float* Output, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)	{
+        Output[index] = Input[index];
+    }
+}
+__global__ void dTVResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total)
+{
+    int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+    int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+    
+    int index = xIndex + N*yIndex;
+    
+    if (index < num_total)	{
+        Output[index] = Input1[index] - Input2[index];
+    }
+}   
+/************************************************/
+/*****************3D modules*********************/
+/************************************************/
+__global__ void GradNorm_func3D_kernel(float *Refd, float *Refd_x, float *Refd_y, float *Refd_z, float eta, int N, int M, int Z, int ImSize)
+{
+    
+    float val1, val2, val3, gradX, gradY, gradZ, magn;
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k < Z)) {  
+        /* boundary conditions */
+        if (i >= N-1) val1 = 0.0f; else val1 =  Refd[(N*M)*k + (i+1) + N*j];
+        if (j >= M-1) val2 = 0.0f; else val2 =  Refd[(N*M)*k + i + N*(j+1)];
+        if (k >= Z-1) val3 = 0.0f; else val3 =  Refd[(N*M)*(k+1) + i + N*j];
+        
+            gradX = val1 - Refd[index];
+            gradY = val2 - Refd[index];
+            gradZ = val3 - Refd[index];
+            magn = pow(gradX,2) + pow(gradY,2) + pow(gradZ,2);
+            magn = sqrt(magn + pow(eta,2));
+            Refd_x[index] = gradX/magn;
+            Refd_y[index] = gradY/magn;
+            Refd_z[index] = gradZ/magn;
+    }
+    return;
+}
+
+__global__ void ProjectVect_func3D_kernel(float *R1, float *R2, float *R3, float *Refd_x, float *Refd_y, float *Refd_z, int N, int M, int Z, int ImSize)
+{
+    
+    float in_prod;
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k < Z)) {
+        in_prod = R1[index]*Refd_x[index] + R2[index]*Refd_y[index] + R3[index]*Refd_z[index]; /* calculate inner product */
+        
+        R1[index] = R1[index] - in_prod*Refd_x[index];
+        R2[index] = R2[index] - in_prod*Refd_y[index];
+        R3[index] = R3[index] - in_prod*Refd_z[index];
+    }
+    return;
+}
+
+
+__global__ void Obj_dfunc3D_kernel(float *Ad, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float lambda)
+{
+    
+    float val1,val2,val3;
+    
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k < Z)) {
+        if (i <= 0) {val1 = 0.0f;} else {val1 = R1[(N*M)*(k) + (i-1) + N*j];}
+        if (j <= 0) {val2 = 0.0f;} else {val2 = R2[(N*M)*(k) + i + N*(j-1)];}
+        if (k <= 0) {val3 = 0.0f;} else {val3 = R3[(N*M)*(k-1) + i + N*j];}
+        //Write final result to global memory
+        D[index] = Ad[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3);
+    }
+    return;
+}
+
+__global__ void Grad_dfunc3D_kernel(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *Refd_x, float *Refd_y, float *Refd_z, int N, int M, int Z, int ImSize, float multip)
+{
+    
+    float val1,val2,val3,in_prod;
+    
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k <  Z)) {
+        /* boundary conditions */
+        if (i >= N-1) val1 = 0.0f; else val1 = D[index] - D[(N*M)*(k) + (i+1) + N*j];
+        if (j >= M-1) val2 = 0.0f; else val2 = D[index] - D[(N*M)*(k) + i + N*(j+1)];
+        if (k >= Z-1) val3 = 0.0f; else val3 = D[index] - D[(N*M)*(k+1) + i + N*j];       
+        
+        in_prod = val1*Refd_x[index] + val2*Refd_y[index] + val3*Refd_z[index];   /* calculate inner product */
+        val1 = val1 - in_prod*Refd_x[index];
+        val2 = val2 - in_prod*Refd_y[index];
+        val3 = val3 - in_prod*Refd_z[index];
+        
+        //Write final result to global memory
+        P1[index] = R1[index] + multip*val1;
+        P2[index] = R2[index] + multip*val2;
+        P3[index] = R3[index] + multip*val3;
+    }
+    return;
+}
+
+__global__ void Proj_dfunc3D_iso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
+{
+    
+    float denom,sq_denom;    
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k <  Z)) {
+        denom = pow(P1[index],2) +  pow(P2[index],2) + pow(P3[index],2);
+        
+        if (denom > 1.0f) {
+            sq_denom = 1.0f/sqrt(denom);
+            P1[index] = P1[index]*sq_denom;
+            P2[index] = P2[index]*sq_denom;
+            P3[index] = P3[index]*sq_denom;
+        }
+    }
+    return;
+}
+
+__global__ void Proj_dfunc3D_aniso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
+{
+    
+    float val1, val2, val3;    
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k <  Z)) {
+                val1 = abs(P1[index]);
+                val2 = abs(P2[index]);
+                val3 = abs(P3[index]);
+                if (val1 < 1.0f) {val1 = 1.0f;}
+                if (val2 < 1.0f) {val2 = 1.0f;}
+                if (val3 < 1.0f) {val3 = 1.0f;}
+                P1[index] = P1[index]/val1;
+                P2[index] = P2[index]/val2;
+                P3[index] = P3[index]/val3;
+    }
+    return;
+}
+
+
+__global__ void Rupd_dfunc3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize)
+{
+    //calculate each thread global index
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if ((i < N) && (j < M) && (k <  Z)) { 
+        R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]);
+        R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]);
+        R3[index] = P3[index] + multip2*(P3[index] - P3_old[index]);
+    }
+    return;
+}
+
+__global__ void dTVnonneg3D_kernel(float* Output, int N, int M, int Z, int num_total)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if (index < num_total)	{
+        if (Output[index] < 0.0f) Output[index] = 0.0f;
+    }
+}
+
+__global__ void dTVcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total)
+{
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if (index < num_total)	{
+        Output[index] = Input[index];
+    }
+}
+
+__global__ void dTVResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total)
+{
+	int i = blockDim.x * blockIdx.x + threadIdx.x;
+    int j = blockDim.y * blockIdx.y + threadIdx.y;
+    int k = blockDim.z * blockIdx.z + threadIdx.z;
+    
+    int index = (N*M)*k + i + N*j;
+    
+    if (index < num_total)	{
+        Output[index] = Input1[index] - Input2[index];
+    }
+}
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+
+////////////MAIN HOST FUNCTION ///////////////
+extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+{
+    int deviceCount = -1; // number of devices
+    cudaGetDeviceCount(&deviceCount);
+    if (deviceCount == 0) {
+        fprintf(stderr, "No CUDA devices found\n");
+        return;
+    }
+    
+    int count = 0, i;
+    float re, multip,multip2;    
+	float tk = 1.0f;
+    float tkp1=1.0f;
+        
+    if (dimZ <= 1) {
+		/*2D verson*/
+		int ImSize = dimX*dimY;    
+		float *d_input, *d_update=NULL, *d_update_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL, *InputRef_x=NULL, *InputRef_y=NULL, *d_InputRef=NULL;
+   
+		dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+		dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
+    
+		/*allocate space for images on device*/
+		checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) );
+		if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&d_InputRef,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&InputRef_x,ImSize*sizeof(float)) );
+		checkCudaErrors( cudaMalloc((void**)&InputRef_y,ImSize*sizeof(float)) );
+    
+        checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+        checkCudaErrors( cudaMemcpy(d_InputRef,InputRef,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+        
+        cudaMemset(P1, 0, ImSize*sizeof(float));
+        cudaMemset(P2, 0, ImSize*sizeof(float));
+        cudaMemset(P1_prev, 0, ImSize*sizeof(float));
+        cudaMemset(P2_prev, 0, ImSize*sizeof(float));
+        cudaMemset(R1, 0, ImSize*sizeof(float));
+        cudaMemset(R2, 0, ImSize*sizeof(float));
+        cudaMemset(InputRef_x, 0, ImSize*sizeof(float));
+        cudaMemset(InputRef_y, 0, ImSize*sizeof(float));
+        
+        /******************** Run CUDA 2D kernel here ********************/
+        multip = (1.0f/(8.0f*lambdaPar));
+        /* calculate gradient vectors for the reference */
+        GradNorm_func2D_kernel<<<dimGrid,dimBlock>>>(d_InputRef, InputRef_x, InputRef_y, eta, dimX, dimY, ImSize);
+        checkCudaErrors( cudaDeviceSynchronize() );
+        checkCudaErrors(cudaPeekAtLastError() );
+    
+        /* The main kernel */
+        for (i = 0; i < iter; i++) {
+        
+            /*projects a 2D vector field R-1,2 onto the orthogonal complement of another 2D vector field InputRef_xy*/         
+            ProjectVect_func2D_kernel<<<dimGrid,dimBlock>>>(R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+            
+            /* computing the gradient of the objective function */
+            Obj_dfunc2D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambdaPar);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+            
+            if (nonneg != 0) {
+            dTVnonneg2D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() ); }
+                    
+            /*Taking a step towards minus of the gradient*/
+            Grad_dfunc2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, d_update, R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize, multip);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            /* projection step */
+            if (methodTV == 0) Proj_dfunc2D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/
+            else Proj_dfunc2D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/            
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+            multip2 = ((tk-1.0f)/tkp1);
+        
+            Rupd_dfunc2D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            if (epsil != 0.0f) {
+                /* calculate norm - stopping rules using the Thrust library */
+                dTVResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize);
+                checkCudaErrors( cudaDeviceSynchronize() );
+                checkCudaErrors(cudaPeekAtLastError() );               
+                
+                thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize); 
+                float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
+                thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);
+                float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
+                    
+                re = (reduction/reduction2);      
+                if (re < epsil)  count++;
+                    if (count > 4) break;       
+             
+                dTVcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, ImSize);
+                checkCudaErrors( cudaDeviceSynchronize() );
+                checkCudaErrors(cudaPeekAtLastError() );                                              
+            }
+        
+            dTVcopy_kernel2D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            dTVcopy_kernel2D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );       
+ 
+            tk = tkp1;
+        }
+        if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", i);   
+            /***************************************************************/    
+            //copy result matrix from device to host memory
+            cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost);
+    
+            cudaFree(d_input);
+            cudaFree(d_update);
+            if (epsil != 0.0f) cudaFree(d_update_prev);
+            cudaFree(P1);
+            cudaFree(P2);
+            cudaFree(P1_prev);
+            cudaFree(P2_prev);
+            cudaFree(R1);
+            cudaFree(R2);
+            
+            cudaFree(d_InputRef);
+            cudaFree(InputRef_x);
+            cudaFree(InputRef_y);
+    }
+    else {
+            /*3D verson*/
+            int ImSize = dimX*dimY*dimZ;    
+            float *d_input, *d_update=NULL, *d_update_prev, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL, *InputRef_x=NULL, *InputRef_y=NULL, *InputRef_z=NULL, *d_InputRef=NULL;
+   
+            dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+            dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE));
+    
+            /*allocate space for images on device*/
+            checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) );
+            if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&P3,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&P3_prev,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&R3,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&d_InputRef,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&InputRef_x,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&InputRef_y,ImSize*sizeof(float)) );
+            checkCudaErrors( cudaMalloc((void**)&InputRef_z,ImSize*sizeof(float)) );    
+    
+            checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+            checkCudaErrors( cudaMemcpy(d_InputRef,InputRef,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+            
+            cudaMemset(P1, 0, ImSize*sizeof(float));
+            cudaMemset(P2, 0, ImSize*sizeof(float));
+            cudaMemset(P3, 0, ImSize*sizeof(float));
+            cudaMemset(P1_prev, 0, ImSize*sizeof(float));
+            cudaMemset(P2_prev, 0, ImSize*sizeof(float));
+            cudaMemset(P3_prev, 0, ImSize*sizeof(float));
+            cudaMemset(R1, 0, ImSize*sizeof(float));
+            cudaMemset(R2, 0, ImSize*sizeof(float));
+            cudaMemset(R3, 0, ImSize*sizeof(float));
+            cudaMemset(InputRef_x, 0, ImSize*sizeof(float));
+            cudaMemset(InputRef_y, 0, ImSize*sizeof(float));
+            cudaMemset(InputRef_z, 0, ImSize*sizeof(float));
+            
+            /********************** Run CUDA 3D kernel here ********************/    
+            multip = (1.0f/(26.0f*lambdaPar));
+            /* calculate gradient vectors for the reference */
+            GradNorm_func3D_kernel<<<dimGrid,dimBlock>>>(d_InputRef, InputRef_x, InputRef_y, InputRef_z, eta, dimX, dimY, dimZ, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+    
+            /* The main kernel */
+        for (i = 0; i < iter; i++) {
+
+			/*projects a 3D vector field R-1,2,3 onto the orthogonal complement of another 3D vector field InputRef_xyz*/
+            ProjectVect_func3D_kernel<<<dimGrid,dimBlock>>>(R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            /* computing the gradient of the objective function */
+            Obj_dfunc3D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambdaPar);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            if (nonneg != 0) {
+            dTVnonneg3D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, dimZ, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() ); }
+            
+            /*Taking a step towards minus of the gradient*/
+            Grad_dfunc3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, d_update, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize, multip);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            /* projection step */
+            if (methodTV == 0) Proj_dfunc3D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */
+            else Proj_dfunc3D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+            multip2 = ((tk-1.0f)/tkp1);
+        
+            Rupd_dfunc3D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+            
+            if (epsil != 0.0f) {
+                /* calculate norm - stopping rules using the Thrust library */
+                dTVResidCalc3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, dimZ, ImSize);
+                checkCudaErrors( cudaDeviceSynchronize() );
+                checkCudaErrors(cudaPeekAtLastError() );               
+                
+                thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize); 
+                float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
+                thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);
+                float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
+                    
+                re = (reduction/reduction2);      
+                if (re < epsil)  count++;
+                    if (count > 4) break;       
+             
+                dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, dimZ, ImSize);
+                checkCudaErrors( cudaDeviceSynchronize() );
+                checkCudaErrors(cudaPeekAtLastError() );
+            }
+        
+            dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, dimZ, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );
+        
+            dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, dimZ, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );   
+            
+            dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(P3, P3_prev, dimX, dimY, dimZ, ImSize);
+            checkCudaErrors( cudaDeviceSynchronize() );
+            checkCudaErrors(cudaPeekAtLastError() );      
+ 
+            tk = tkp1;
+        }
+        if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", i);   
+            /***************************************************************/    
+            //copy result matrix from device to host memory
+            cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost);
+    
+            cudaFree(d_input);
+            cudaFree(d_update);
+            if (epsil != 0.0f) cudaFree(d_update_prev);
+            cudaFree(P1);
+            cudaFree(P2);
+            cudaFree(P3);
+            cudaFree(P1_prev);
+            cudaFree(P2_prev);
+            cudaFree(P3_prev);
+            cudaFree(R1);
+            cudaFree(R2);
+            cudaFree(R3);
+            cudaFree(InputRef_x);
+            cudaFree(InputRef_y);
+            cudaFree(InputRef_z);
+            cudaFree(d_InputRef);
+    } 
+    cudaDeviceReset(); 
+}
diff --git a/Core/regularisers_GPU/dTV_FGP_GPU_core.h b/Core/regularisers_GPU/dTV_FGP_GPU_core.h
new file mode 100644
index 0000000..b906636
--- /dev/null
+++ b/Core/regularisers_GPU/dTV_FGP_GPU_core.h
@@ -0,0 +1,10 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+
+#ifndef _dTV_FGP_GPU_
+#define _dTV_FGP_GPU_
+
+extern "C" void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+
+#endif 
diff --git a/Readme.md b/Readme.md
index 056ade5..31d03a1 100644
--- a/Readme.md
+++ b/Readme.md
@@ -18,6 +18,7 @@ can also be used as image denoising iterative filters. The core modules are writ
 2. Fast-Gradient-Projection (FGP) Total Variation [2D/3D GPU/CPU]; (Ref. 2)
 
 ### Multi-channel
+1. Fast-Gradient-Projection (FGP) Directional Total Variation [2D/3D GPU/CPU]; (Ref. 4,2)
 
 ## Installation:
 
@@ -43,6 +44,7 @@ can also be used as image denoising iterative filters. The core modules are writ
 1. Rudin, L.I., Osher, S. and Fatemi, E., 1992. Nonlinear total variation based noise removal algorithms. Physica D: nonlinear phenomena, 60(1-4), pp.259-268.
 2. Beck, A. and Teboulle, M., 2009. Fast gradient-based algorithms for constrained total variation image denoising and deblurring problems. IEEE Transactions on Image Processing, 18(11), pp.2419-2434.
 3. Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590.
+4. Ehrhardt, M.J. and Betcke, M.M., 2016. Multicontrast MRI reconstruction with structure-guided total variation. SIAM Journal on Imaging Sciences, 9(3), pp.1084-1106.
 
 ### License:
 [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
diff --git a/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m b/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m
index 71082e7..dc49d9c 100644
--- a/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m
+++ b/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m
@@ -1,5 +1,6 @@
 % Volume (3D) denoising demo using CCPi-RGL
-
+clear
+close all
 addpath('../mex_compile/installed');
 addpath('../../../data/');
 
@@ -14,31 +15,65 @@ vol3D(vol3D < 0) = 0;
 figure; imshow(vol3D(:,:,15), [0 1]); title('Noisy image');
 
 %%
-fprintf('Denoise using ROF-TV model (CPU) \n');
+fprintf('Denoise a volume using the ROF-TV model (CPU) \n');
 lambda_rof = 0.03; % regularisation parameter
 tau_rof = 0.0025; % time-marching constant 
 iter_rof = 300; % number of ROF iterations
 tic; u_rof = ROF_TV(single(vol3D), lambda_rof, iter_rof, tau_rof); toc; 
 figure; imshow(u_rof(:,:,15), [0 1]); title('ROF-TV denoised volume (CPU)');
 %%
-% fprintf('Denoise using ROF-TV model (GPU) \n');
+% fprintf('Denoise a volume using the ROF-TV model (GPU) \n');
 % lambda_rof = 0.03; % regularisation parameter
 % tau_rof = 0.0025; % time-marching constant 
 % iter_rof = 300; % number of ROF iterations
 % tic; u_rofG = ROF_TV_GPU(single(vol3D), lambda_rof, iter_rof, tau_rof); toc;
 % figure; imshow(u_rofG(:,:,15), [0 1]); title('ROF-TV denoised volume (GPU)');
 %%
-fprintf('Denoise using FGP-TV model (CPU) \n');
+fprintf('Denoise a volume using the FGP-TV model (CPU) \n');
 lambda_fgp = 0.03; % regularisation parameter
 iter_fgp = 300; % number of FGP iterations
 epsil_tol =  1.0e-05; % tolerance
 tic; u_fgp = FGP_TV(single(vol3D), lambda_fgp, iter_fgp, epsil_tol); toc; 
 figure; imshow(u_fgp(:,:,15), [0 1]); title('FGP-TV denoised volume (CPU)');
 %%
-% fprintf('Denoise using FGP-TV model (GPU) \n');
+% fprintf('Denoise a volume using the FGP-TV model (GPU) \n');
 % lambda_fgp = 0.03; % regularisation parameter
 % iter_fgp = 300; % number of FGP iterations
 % epsil_tol =  1.0e-05; % tolerance
 % tic; u_fgpG = FGP_TV_GPU(single(vol3D), lambda_fgp, iter_fgp, epsil_tol); toc; 
 % figure; imshow(u_fgpG(:,:,15), [0 1]); title('FGP-TV denoised volume (GPU)');
 %%
+fprintf('Denoise a volume using the FGP-dTV model (CPU) \n');
+
+% create another volume (reference) with slightly less amount of noise
+vol3D_ref = zeros(N,N,slices, 'single');
+for i = 1:slices
+vol3D_ref(:,:,i) = Im + .01*randn(size(Im)); 
+end
+vol3D_ref(vol3D_ref < 0) = 0;
+% vol3D_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
+
+lambda_fgp = 0.03; % regularisation parameter
+iter_fgp = 300; % number of FGP iterations
+epsil_tol =  1.0e-05; % tolerance
+eta =  0.2; % Reference image gradient smoothing constant
+tic; u_fgp_dtv = FGP_dTV(single(vol3D), single(vol3D_ref), lambda_fgp, iter_fgp, epsil_tol, eta); toc; 
+figure; imshow(u_fgp_dtv(:,:,15), [0 1]); title('FGP-dTV denoised volume (CPU)');
+%%
+fprintf('Denoise a volume using the FGP-dTV model (GPU) \n');
+
+% create another volume (reference) with slightly less amount of noise
+vol3D_ref = zeros(N,N,slices, 'single');
+for i = 1:slices
+vol3D_ref(:,:,i) = Im + .01*randn(size(Im)); 
+end
+vol3D_ref(vol3D_ref < 0) = 0;
+% vol3D_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
+
+lambda_fgp = 0.03; % regularisation parameter
+iter_fgp = 300; % number of FGP iterations
+epsil_tol =  1.0e-05; % tolerance
+eta =  0.2; % Reference image gradient smoothing constant
+tic; u_fgp_dtv_g = FGP_dTV_GPU(single(vol3D), single(vol3D_ref), lambda_fgp, iter_fgp, epsil_tol, eta); toc; 
+figure; imshow(u_fgp_dtv_g(:,:,15), [0 1]); title('FGP-dTV denoised volume (GPU)');
+%%
+\ No newline at end of file
diff --git a/Wrappers/Matlab/demos/demoMatlab_denoise.m b/Wrappers/Matlab/demos/demoMatlab_denoise.m
index 7f87fbb..145f2ff 100644
--- a/Wrappers/Matlab/demos/demoMatlab_denoise.m
+++ b/Wrappers/Matlab/demos/demoMatlab_denoise.m
@@ -1,5 +1,6 @@
 % Image (2D) denoising demo using CCPi-RGL
-
+clear
+close all
 addpath('../mex_compile/installed');
 addpath('../../../data/');
 
@@ -8,31 +9,55 @@ u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0;
 figure; imshow(u0, [0 1]); title('Noisy image');
 
 %%
-fprintf('Denoise using ROF-TV model (CPU) \n');
+fprintf('Denoise using the ROF-TV model (CPU) \n');
 lambda_rof = 0.03; % regularisation parameter
 tau_rof = 0.0025; % time-marching constant 
 iter_rof = 2000; % number of ROF iterations
 tic; u_rof = ROF_TV(single(u0), lambda_rof, iter_rof, tau_rof); toc; 
 figure; imshow(u_rof, [0 1]); title('ROF-TV denoised image (CPU)');
 %%
-% fprintf('Denoise using ROF-TV model (GPU) \n');
+% fprintf('Denoise using the ROF-TV model (GPU) \n');
 % lambda_rof = 0.03; % regularisation parameter
 % tau_rof = 0.0025; % time-marching constant 
 % iter_rof = 2000; % number of ROF iterations
 % tic; u_rofG = ROF_TV_GPU(single(u0), lambda_rof, iter_rof, tau_rof); toc;
 % figure; imshow(u_rofG, [0 1]); title('ROF-TV denoised image (GPU)');
 %%
-fprintf('Denoise using FGP-TV model (CPU) \n');
+fprintf('Denoise using the FGP-TV model (CPU) \n');
 lambda_fgp = 0.03; % regularisation parameter
 iter_fgp = 1000; % number of FGP iterations
-epsil_tol =  1.0e-05; % tolerance
+epsil_tol =  1.0e-06; % tolerance
 tic; u_fgp = FGP_TV(single(u0), lambda_fgp, iter_fgp, epsil_tol); toc; 
 figure; imshow(u_fgp, [0 1]); title('FGP-TV denoised image (CPU)');
 %%
-% fprintf('Denoise using FGP-TV model (GPU) \n');
+% fprintf('Denoise using the FGP-TV model (GPU) \n');
 % lambda_fgp = 0.03; % regularisation parameter
 % iter_fgp = 1000; % number of FGP iterations
 % epsil_tol =  1.0e-05; % tolerance
 % tic; u_fgpG = FGP_TV_GPU(single(u0), lambda_fgp, iter_fgp, epsil_tol); toc; 
 % figure; imshow(u_fgpG, [0 1]); title('FGP-TV denoised image (GPU)');
 %%
+fprintf('Denoise using the FGP-dTV model (CPU) \n');
+% create another image (reference) with slightly less amount of noise
+u_ref = Im + .01*randn(size(Im)); u_ref(u_ref < 0) = 0;
+% u_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
+
+lambda_fgp = 0.03; % regularisation parameter
+iter_fgp = 1000; % number of FGP iterations
+epsil_tol =  1.0e-06; % tolerance
+eta =  0.2; % Reference image gradient smoothing constant
+tic; u_fgp_dtv = FGP_dTV(single(u0), single(u_ref), lambda_fgp, iter_fgp, epsil_tol, eta); toc; 
+figure; imshow(u_fgp_dtv, [0 1]); title('FGP-dTV denoised image (CPU)');
+%%
+% fprintf('Denoise using the FGP-dTV model (GPU) \n');
+% % create another image (reference) with slightly less amount of noise
+% u_ref = Im + .01*randn(size(Im)); u_ref(u_ref < 0) = 0;
+% % u_ref = zeros(size(Im),'single'); % pass zero reference (dTV -> TV)
+% 
+% lambda_fgp = 0.03; % regularisation parameter
+% iter_fgp = 1000; % number of FGP iterations
+% epsil_tol =  1.0e-06; % tolerance
+% eta =  0.2; % Reference image gradient smoothing constant
+% tic; u_fgp_dtvG = FGP_dTV_GPU(single(u0), single(u_ref), lambda_fgp, iter_fgp, epsil_tol, eta); toc; 
+% figure; imshow(u_fgp_dtvG, [0 1]); title('FGP-dTV denoised image (GPU)');
+%%
diff --git a/Wrappers/Matlab/mex_compile/compileCPU_mex.m b/Wrappers/Matlab/mex_compile/compileCPU_mex.m
index 8da81ad..71f345a 100644
--- a/Wrappers/Matlab/mex_compile/compileCPU_mex.m
+++ b/Wrappers/Matlab/mex_compile/compileCPU_mex.m
@@ -11,7 +11,10 @@ movefile ROF_TV.mex* ../installed/
 mex FGP_TV.c FGP_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
 movefile FGP_TV.mex* ../installed/
 
-delete ROF_TV_core* FGP_TV_core* utils.c utils.h CCPiDefines.h
+mex FGP_dTV.c FGP_dTV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile FGP_dTV.mex* ../installed/
+
+delete ROF_TV_core* FGP_TV_core* FGP_dTV_core* utils* CCPiDefines.h
 
 fprintf('%s \n', 'All successfully compiled!');
 
diff --git a/Wrappers/Matlab/mex_compile/compileGPU_mex.m b/Wrappers/Matlab/mex_compile/compileGPU_mex.m
index 45236fa..f58e9bc 100644
--- a/Wrappers/Matlab/mex_compile/compileGPU_mex.m
+++ b/Wrappers/Matlab/mex_compile/compileGPU_mex.m
@@ -23,7 +23,11 @@ movefile ROF_TV_GPU.mex* ../installed/
 mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu FGP_TV_GPU.cpp TV_FGP_GPU_core.o
 movefile FGP_TV_GPU.mex* ../installed/
 
-delete TV_ROF_GPU_core* TV_FGP_GPU_core* CCPiDefines.h
+!/usr/local/cuda/bin/nvcc -O0 -c dTV_FGP_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
+mex -g -I/usr/local/cuda-7.5/include -L/usr/local/cuda-7.5/lib64 -lcudart -lcufft -lmwgpu FGP_dTV_GPU.cpp dTV_FGP_GPU_core.o
+movefile FGP_dTV_GPU.mex* ../installed/
+
+delete TV_ROF_GPU_core* TV_FGP_GPU_core* dTV_FGP_GPU_core* CCPiDefines.h
 fprintf('%s \n', 'All successfully compiled!');
 
 cd ../../
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_TV.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_TV.c
index ba06cc7..aae1cb7 100644
--- a/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_TV.c
+++ b/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_TV.c
@@ -52,7 +52,7 @@ void mexFunction(
     dim_array = mxGetDimensions(prhs[0]);
     
     /*Handling Matlab input data*/
-    if ((nrhs < 2) || (nrhs > 7)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), nonnegativity switch, print switch");
+    if ((nrhs < 2) || (nrhs > 7)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter, Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), nonnegativity switch, print switch");
     
     Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
     lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_TV.c~ b/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_TV.c~
deleted file mode 100644
index 30d61cd..0000000
--- a/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_TV.c~
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * This work is part of the Core Imaging Library developed by
- * Visual Analytics and Imaging System Group of the Science Technology
- * Facilities Council, STFC
- *
- * Copyright 2017 Daniil Kazantsev
- * Copyright 2017 Srikanth Nagella, Edoardo Pasca
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "matrix.h"
-#include "mex.h"
-#include "FGP_TV_core.h"
-
-/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
- *
- * Input Parameters:
- * 1. Noisy image/volume
- * 2. lambdaPar - regularization parameter
- * 3. Number of iterations
- * 4. eplsilon: tolerance constant
- * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
- * 6. nonneg: 'nonnegativity (0 is OFF by default)
- * 7. print information: 0 (off) or 1 (on)
- *
- * Output:
- * [1] Filtered/regularized image
- *
- * This function is based on the Matlab's code and paper by
- * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
- */
-
-
-void mexFunction(
-        int nlhs, mxArray *plhs[],
-        int nrhs, const mxArray *prhs[])
-        
-{
-    int number_of_dims, iter, dimX, dimY, dimZ, methTV, printswitch;
-    const int  *dim_array;
-    float *Input, *Output, lambda, epsil;
-    
-    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
-    dim_array = mxGetDimensions(prhs[0]);
-    
-    /*Handling Matlab input data*/
-    if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), print switch");
-    
-    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
-    lambda =  (float) mxGetScalar(prhs[1]); /* regularization parameter */
-    iter = 300; /* default iterations number */
-    epsil = 0.0001; /* default tolerance constant */
-    methTV = 0;  /* default isotropic TV penalty */
-    printswitch = 0; /*default print is switched off - 0 */
-    
-    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
-    
-    if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6))  iter = (int) mxGetScalar(prhs[2]); /* iterations number */
-    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6))  epsil =  (float) mxGetScalar(prhs[3]); /* tolerance constant */
-    if ((nrhs == 5) || (nrhs == 6))  {
-        char *penalty_type;
-        penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
-        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
-        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
-        mxFree(penalty_type);
-    }
-    if (nrhs == 6)  {
-        printswitch = (int) mxGetScalar(prhs[5]);
-        if ((printswitch != 0) || (printswitch != 1)) {mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0"); }
-    }
-    
-    /*Handling Matlab output data*/
-    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
-    
-    if (number_of_dims == 2) {
-        dimZ = 1; /*2D case*/
-        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
-    }
-    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
-    
-    
-    TV_FGP_CPU_main(Input, Output, lambda, iter, epsil, methTV, nonneg, printswitch, dimX, dimY, dimZ)
-}
diff --git a/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c b/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c
new file mode 100644
index 0000000..bb868c7
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c
@@ -0,0 +1,113 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "FGP_dTV_core.h"
+
+/* C-OMP implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
+ * which employs structural similarity of the level sets of two images/volumes, see [1,2]
+ * The current implementation updates image 1 while image 2 is being fixed.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
+ * 3. lambdaPar - regularization parameter [REQUIRED]
+ * 4. Number of iterations [OPTIONAL]
+ * 5. eplsilon: tolerance constant [OPTIONAL]
+ * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * 
+ * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
+ * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
+ * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
+ *
+ * Output:
+ * [1] Filtered/regularized image/volume
+ *
+ * This function is based on the Matlab's codes and papers by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
+ */
+
+
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, dimX, dimY, dimZ, methTV, printswitch, nonneg;
+    const int  *dim_array;
+    const int  *dim_array2;
+    float *Input, *InputRef, *Output=NULL, lambda, epsil, eta;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    dim_array2 = mxGetDimensions(prhs[1]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs < 3) || (nrhs > 9)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Reference(2D/3D), Regularization parameter, iterations number, tolerance, smoothing constant, penalty type ('iso' or 'l1'), nonnegativity switch, print switch");
+    
+    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+    InputRef  = (float *) mxGetData(prhs[1]); /* reference image (2D/3D) */
+    lambda =  (float) mxGetScalar(prhs[2]); /* regularization parameter */
+    iter = 300; /* default iterations number */
+    epsil = 0.0001; /* default tolerance constant */
+    eta = 0.01; /* default smoothing constant */
+    methTV = 0;  /* default isotropic TV penalty */
+    nonneg = 0; /* default nonnegativity switch, off - 0 */
+    printswitch = 0; /*default print is switched, off - 0 */
+    
+        
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    if (mxGetClassID(prhs[1]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    if (number_of_dims == 2) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1])) mexErrMsgTxt("The input images have different dimensionalities");}
+    if (number_of_dims == 3) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1]) || (dimZ != dim_array2[2])) mexErrMsgTxt("The input volumes have different dimensionalities");}   
+    
+    
+    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9))  iter = (int) mxGetScalar(prhs[3]); /* iterations number */
+    if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9))  epsil =  (float) mxGetScalar(prhs[4]); /* tolerance constant */
+    if ((nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9))  {
+    eta =  (float) mxGetScalar(prhs[5]); /* smoothing constant for the gradient of InputRef */
+    }
+    if ((nrhs == 7) || (nrhs == 8) || (nrhs == 9))  {        
+        char *penalty_type;
+        penalty_type = mxArrayToString(prhs[6]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
+        mxFree(penalty_type);
+    }    
+    if ((nrhs == 8) || (nrhs == 9))  {
+        nonneg = (int) mxGetScalar(prhs[7]);
+        if ((nonneg != 0) && (nonneg != 1)) mexErrMsgTxt("Nonnegativity constraint can be enabled by choosing 1 or off - 0");
+    }
+    if (nrhs == 9)  {
+        printswitch = (int) mxGetScalar(prhs[8]);
+        if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
+    }    
+   
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+    }
+    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+    
+    /* running the function */
+    dTV_FGP_CPU_main(Input, InputRef, Output, lambda, iter, epsil, eta, methTV, nonneg, printswitch, dimX, dimY, dimZ);
+}
+\ No newline at end of file
diff --git a/Wrappers/Matlab/mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp b/Wrappers/Matlab/mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp
new file mode 100644
index 0000000..5b80616
--- /dev/null
+++ b/Wrappers/Matlab/mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp
@@ -0,0 +1,111 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "dTV_FGP_GPU_core.h"
+
+/* CUDA implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
+ * which employs structural similarity of the level sets of two images/volumes, see [1,2]
+ * The current implementation updates image 1 while image 2 is being fixed.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
+ * 3. lambdaPar - regularization parameter [REQUIRED]
+ * 4. Number of iterations [OPTIONAL]
+ * 5. eplsilon: tolerance constant [OPTIONAL]
+ * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] * 
+ * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
+ * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
+ * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
+ *
+ * Output:
+ * [1] Filtered/regularized image/volume
+ *
+ * This function is based on the Matlab's codes and papers by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
+ */
+void mexFunction(
+        int nlhs, mxArray *plhs[],
+        int nrhs, const mxArray *prhs[])
+        
+{
+    int number_of_dims, iter, dimX, dimY, dimZ, methTV, printswitch, nonneg;
+    const int  *dim_array;
+    const int  *dim_array2;
+    float *Input, *InputRef, *Output=NULL, lambda, epsil, eta;
+    
+    number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+    dim_array = mxGetDimensions(prhs[0]);
+    dim_array2 = mxGetDimensions(prhs[1]);
+    
+    /*Handling Matlab input data*/
+    if ((nrhs < 3) || (nrhs > 9)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Reference(2D/3D), Regularization parameter, iterations number, tolerance, smoothing constant, penalty type ('iso' or 'l1'), nonnegativity switch, print switch");
+    
+    Input  = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+    InputRef  = (float *) mxGetData(prhs[1]); /* reference image (2D/3D) */
+    lambda =  (float) mxGetScalar(prhs[2]); /* regularization parameter */
+    iter = 300; /* default iterations number */
+    epsil = 0.0001; /* default tolerance constant */
+    eta = 0.01; /* default smoothing constant */
+    methTV = 0;  /* default isotropic TV penalty */
+    nonneg = 0; /* default nonnegativity switch, off - 0 */
+    printswitch = 0; /*default print is switched, off - 0 */
+    
+        
+    if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    if (mxGetClassID(prhs[1]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+    
+    /*Handling Matlab output data*/
+    dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+    if (number_of_dims == 2) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1])) mexErrMsgTxt("The input images have different dimensionalities");}
+    if (number_of_dims == 3) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1]) || (dimZ != dim_array2[2])) mexErrMsgTxt("The input volumes have different dimensionalities");}   
+    
+    
+    if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9))  iter = (int) mxGetScalar(prhs[3]); /* iterations number */
+    if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9))  epsil =  (float) mxGetScalar(prhs[4]); /* tolerance constant */
+    if ((nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9))  {
+    eta =  (float) mxGetScalar(prhs[5]); /* smoothing constant for the gradient of InputRef */
+    }
+    if ((nrhs == 7) || (nrhs == 8) || (nrhs == 9))  {        
+        char *penalty_type;
+        penalty_type = mxArrayToString(prhs[6]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+        if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+        if (strcmp(penalty_type, "l1") == 0)  methTV = 1;  /* enable 'l1' penalty */
+        mxFree(penalty_type);
+    }    
+    if ((nrhs == 8) || (nrhs == 9))  {
+        nonneg = (int) mxGetScalar(prhs[7]);
+        if ((nonneg != 0) && (nonneg != 1)) mexErrMsgTxt("Nonnegativity constraint can be enabled by choosing 1 or off - 0");
+    }
+    if (nrhs == 9)  {
+        printswitch = (int) mxGetScalar(prhs[8]);
+        if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
+    }    
+   
+    if (number_of_dims == 2) {
+        dimZ = 1; /*2D case*/
+        Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+    }
+    if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+    
+    /* running the function */
+    dTV_FGP_GPU_main(Input, InputRef, Output, lambda, iter, epsil, eta, methTV, nonneg, printswitch, dimX, dimY, dimZ);
+}
+\ No newline at end of file
diff --git a/Wrappers/Python/ccpi/filters/regularisers.py b/Wrappers/Python/ccpi/filters/regularisers.py
index 039daab..376cc9c 100644
--- a/Wrappers/Python/ccpi/filters/regularisers.py
+++ b/Wrappers/Python/ccpi/filters/regularisers.py
@@ -2,8 +2,8 @@
 script which assigns a proper device core function based on a flag ('cpu' or 'gpu')
 """
 
-from ccpi.filters.cpu_regularisers_cython import TV_ROF_CPU, TV_FGP_CPU
-from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU
+from ccpi.filters.cpu_regularisers_cython import TV_ROF_CPU, TV_FGP_CPU, dTV_FGP_CPU
+from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU, dTV_FGP_GPU
 
 def ROF_TV(inputData, regularisation_parameter, iterations,
                      time_marching_parameter,device='cpu'):
@@ -42,3 +42,28 @@ def FGP_TV(inputData, regularisation_parameter,iterations,
     else:
         raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
                          .format(device))
+def FGP_dTV(inputData, refdata, regularisation_parameter, iterations,
+                     tolerance_param, eta_const, methodTV, nonneg, printM, device='cpu'):
+    if device == 'cpu':
+        return dTV_FGP_CPU(inputData,
+                     refdata,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     eta_const,
+                     methodTV,
+                     nonneg,
+                     printM)
+    elif device == 'gpu':
+        return dTV_FGP_GPU(inputData,
+                     refdata,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     eta_const,
+                     methodTV,
+                     nonneg,
+                     printM)
+    else:
+        raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+                         .format(device))
diff --git a/Wrappers/Python/test/run_test.py b/Wrappers/Python/conda-recipe/run_test.py.in
index 04bbd40..9a6f4de 100644
--- a/Wrappers/Python/test/run_test.py
+++ b/Wrappers/Python/conda-recipe/run_test.py.in
@@ -1,8 +1,6 @@
 import unittest
 import numpy as np
-import os
 from ccpi.filters.regularisers import ROF_TV, FGP_TV
-import matplotlib.pyplot as plt
 
 def rmse(im1, im2):
     rmse = np.sqrt(np.sum((im1 - im2) ** 2) / float(im1.size))
@@ -14,13 +12,16 @@ class TestRegularisers(unittest.TestCase):
         pass
 
     def test_cpu_regularisers(self):
-        filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
+        #filename = os.path.join(".." , ".." , ".." , "data" ,"testLena.npy")
         
+        Im = np.load('testLena.npy');
+        """
         # read noiseless image
         Im = plt.imread(filename)
         Im = np.asarray(Im, dtype='float32')
 
         Im = Im/255
+        """
         tolerance = 1e-05
         rms_rof_exp = 0.006812507 #expected value for ROF model
         rms_fgp_exp = 0.019152347 #expected value for FGP model
@@ -80,13 +81,11 @@ class TestRegularisers(unittest.TestCase):
         """
         self.assertTrue(res)
     def test_gpu_regularisers(self):
-        filename = os.path.join(".." , ".." , ".." , "data" ,"lena_gray_512.tif")
+        #filename = os.path.join(".." , ".." , ".." , "data" ,"testLena.npy")
         
-        # read noiseless image
-        Im = plt.imread(filename)
-        Im = np.asarray(Im, dtype='float32')
+        Im = np.load('testLena.npy');
 
-        Im = Im/255
+        #Im = Im/255
         tolerance = 1e-05
         rms_rof_exp = 0.006812507 #expected value for ROF model
         rms_fgp_exp = 0.019152347 #expected value for FGP model
@@ -146,4 +145,4 @@ class TestRegularisers(unittest.TestCase):
         """
         self.assertTrue(res)
 if __name__ == '__main__':
-    unittest.main()
-\ No newline at end of file
+    unittest.main()
diff --git a/Wrappers/Python/conda-recipe/testLena.npy b/Wrappers/Python/conda-recipe/testLena.npy
new file mode 100644
index 0000000..14bc0e3
--- /dev/null
+++ b/Wrappers/Python/conda-recipe/testLena.npy
diff --git a/Wrappers/Python/demos/demo_cpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_regularisers.py
index 929f0af..00beb0b 100644
--- a/Wrappers/Python/demos/demo_cpu_regularisers.py
+++ b/Wrappers/Python/demos/demo_cpu_regularisers.py
@@ -12,7 +12,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 import os
 import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, FGP_dTV
 from qualitymetrics import rmse
 ###############################################################################
 def printParametersToString(pars):
@@ -22,6 +22,8 @@ def printParametersToString(pars):
                 txt += "{0} = {1}".format(key, value.__name__)
             elif key == 'input':
                 txt += "{0} = {1}".format(key, np.shape(value))
+            elif key == 'refdata':
+                txt += "{0} = {1}".format(key, np.shape(value))
             else:
                 txt += "{0} = {1}".format(key, value)
             txt += '\n'
@@ -39,9 +41,14 @@ perc = 0.05
 u0 = Im + np.random.normal(loc = 0 ,
                                   scale = perc * Im , 
                                   size = np.shape(Im))
+u_ref = Im + np.random.normal(loc = 0 ,
+                                  scale = 0.01 * Im , 
+                                  size = np.shape(Im))
+ 
 # map the u0 u0->u0>0
 # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
 u0 = u0.astype('float32')
+u_ref = u_ref.astype('float32')
 
 
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
@@ -134,6 +141,61 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
 imgplot = plt.imshow(fgp_cpu, cmap="gray")
 plt.title('{}'.format('CPU results'))
 
+
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_____________FGP-dTV (2D)__________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure(3)
+plt.suptitle('Performance of FGP-dTV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_dTV, \
+        'input' : u0,\
+        'refdata' : u_ref,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :2000 ,\
+        'tolerance_constant':1e-06,\
+        'eta_const':0.2,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############FGP dTV CPU####################")
+start_time = timeit.default_timer()
+fgp_dtv_cpu = FGP_dTV(pars['input'], 
+              pars['refdata'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['eta_const'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'cpu')
+             
+rms = rmse(Im, fgp_dtv_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_dtv_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+
+
 # Uncomment to test 3D regularisation performance 
 #%%
 """
@@ -148,10 +210,12 @@ Im = Im/255
 perc = 0.05
 
 noisyVol = np.zeros((slices,N,N),dtype='float32')
+noisyRef = np.zeros((slices,N,N),dtype='float32')
 idealVol = np.zeros((slices,N,N),dtype='float32')
 
 for i in range (slices):
     noisyVol[i,:,:] = Im + np.random.normal(loc = 0 , scale = perc * Im , size = np.shape(Im))
+    noisyRef[i,:,:] = Im + np.random.normal(loc = 0 , scale = 0.01 * Im , size = np.shape(Im))
     idealVol[i,:,:] = Im
 
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
@@ -159,7 +223,7 @@ print ("_______________ROF-TV (3D)_________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(3)
+fig = plt.figure(4)
 plt.suptitle('Performance of ROF-TV regulariser using the CPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy 15th slice of a volume')
@@ -199,7 +263,7 @@ print ("_______________FGP-TV (3D)__________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(4)
+fig = plt.figure(5)
 plt.suptitle('Performance of FGP-TV regulariser using the CPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
@@ -242,5 +306,59 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
          verticalalignment='top', bbox=props)
 imgplot = plt.imshow(fgp_cpu3D[10,:,:], cmap="gray")
 plt.title('{}'.format('Recovered volume on the CPU using FGP-TV'))
+
+
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________FGP-dTV (3D)__________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure(6)
+plt.suptitle('Performance of FGP-dTV regulariser using the CPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_dTV,\
+        'input' : noisyVol,\
+        'refdata' : noisyRef,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :300 ,\
+        'tolerance_constant':0.00001,\
+        'eta_const':0.2,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############FGP dTV CPU####################")
+start_time = timeit.default_timer()
+fgp_dTV_cpu3D = FGP_dTV(pars['input'],
+              pars['refdata'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['eta_const'],
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'cpu')
+             
+             
+rms = rmse(idealVol, fgp_dTV_cpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_dTV_cpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the CPU using FGP-dTV'))
 """
-#%%
-\ No newline at end of file
+#%%
diff --git a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
index cfe2e7d..310cf75 100644
--- a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
+++ b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py
@@ -12,7 +12,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 import os
 import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, FGP_dTV
 from qualitymetrics import rmse
 ###############################################################################
 def printParametersToString(pars):
@@ -22,6 +22,8 @@ def printParametersToString(pars):
                 txt += "{0} = {1}".format(key, value.__name__)
             elif key == 'input':
                 txt += "{0} = {1}".format(key, np.shape(value))
+            elif key == 'refdata':
+                txt += "{0} = {1}".format(key, np.shape(value))
             else:
                 txt += "{0} = {1}".format(key, value)
             txt += '\n'
@@ -39,10 +41,14 @@ perc = 0.05
 u0 = Im + np.random.normal(loc = 0 ,
                                   scale = perc * Im , 
                                   size = np.shape(Im))
+u_ref = Im + np.random.normal(loc = 0 ,
+                                  scale = 0.01 * Im , 
+                                  size = np.shape(Im))
+
 # map the u0 u0->u0>0
 # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
 u0 = u0.astype('float32')
-
+u_ref = u_ref.astype('float32')
 
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("____________ROF-TV bench___________________")
@@ -213,3 +219,96 @@ else:
     print ("Arrays match")
 
 
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________FGP-dTV bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure(3)
+plt.suptitle('Comparison of FGP-dTV regulariser using CPU and GPU implementations')
+a=fig.add_subplot(1,4,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_dTV, \
+        'input' : u0,\
+        'refdata' : u_ref,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :2000 ,\
+        'tolerance_constant':1e-06,\
+        'eta_const':0.2,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+        
+print ("#############FGP dTV CPU####################")
+start_time = timeit.default_timer()
+fgp_dtv_cpu = FGP_dTV(pars['input'], 
+              pars['refdata'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['eta_const'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'cpu')
+             
+             
+rms = rmse(Im, fgp_dtv_cpu)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_dtv_cpu, cmap="gray")
+plt.title('{}'.format('CPU results'))
+
+print ("##############FGP dTV GPU##################")
+start_time = timeit.default_timer()
+fgp_dtv_gpu = FGP_dTV(pars['input'], 
+              pars['refdata'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['eta_const'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'gpu')
+rms = rmse(Im, fgp_dtv_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = FGP_dTV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,4,3)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_dtv_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
+
+print ("--------Compare the results--------")
+tolerance = 1e-05
+diff_im = np.zeros(np.shape(rof_cpu))
+diff_im = abs(fgp_dtv_cpu - fgp_dtv_gpu)
+diff_im[diff_im > tolerance] = 1
+a=fig.add_subplot(1,4,4)
+imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray")
+plt.title('{}'.format('Pixels larger threshold difference'))
+if (diff_im.sum() > 1):
+    print ("Arrays do not match!")
+else:
+    print ("Arrays match")
diff --git a/Wrappers/Python/demos/demo_gpu_regularisers.py b/Wrappers/Python/demos/demo_gpu_regularisers.py
index c496e1c..24a3c88 100644
--- a/Wrappers/Python/demos/demo_gpu_regularisers.py
+++ b/Wrappers/Python/demos/demo_gpu_regularisers.py
@@ -12,7 +12,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 import os
 import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, FGP_dTV
 from qualitymetrics import rmse
 ###############################################################################
 def printParametersToString(pars):
@@ -22,6 +22,8 @@ def printParametersToString(pars):
                 txt += "{0} = {1}".format(key, value.__name__)
             elif key == 'input':
                 txt += "{0} = {1}".format(key, np.shape(value))
+            elif key == 'refdata':
+                txt += "{0} = {1}".format(key, np.shape(value))
             else:
                 txt += "{0} = {1}".format(key, value)
             txt += '\n'
@@ -39,10 +41,13 @@ perc = 0.05
 u0 = Im + np.random.normal(loc = 0 ,
                                   scale = perc * Im , 
                                   size = np.shape(Im))
+u_ref = Im + np.random.normal(loc = 0 ,
+                                  scale = 0.01 * Im , 
+                                  size = np.shape(Im))
 # map the u0 u0->u0>0
 # f = np.frompyfunc(lambda x: 0 if x < 0 else x, 1,1)
 u0 = u0.astype('float32')
-
+u_ref = u_ref.astype('float32')
 
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 print ("____________ROF-TV bench___________________")
@@ -134,6 +139,58 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
 imgplot = plt.imshow(fgp_gpu, cmap="gray")
 plt.title('{}'.format('GPU results'))
 
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("____________FGP-dTV bench___________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure(3)
+plt.suptitle('Performance of the FGP-dTV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(u0,cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_dTV, \
+        'input' : u0,\
+        'refdata' : u_ref,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :2000 ,\
+        'tolerance_constant':1e-06,\
+        'eta_const':0.2,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+
+print ("##############FGP dTV GPU##################")
+start_time = timeit.default_timer()
+fgp_dtv_gpu = FGP_dTV(pars['input'], 
+              pars['refdata'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['eta_const'], 
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'gpu')
+                                   
+rms = rmse(Im, fgp_dtv_gpu)
+pars['rmse'] = rms
+pars['algorithm'] = FGP_dTV
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_dtv_gpu, cmap="gray")
+plt.title('{}'.format('GPU results'))
+
 
 # Uncomment to test 3D regularisation performance 
 #%%
@@ -149,10 +206,12 @@ Im = Im/255
 perc = 0.05
 
 noisyVol = np.zeros((slices,N,N),dtype='float32')
+noisyRef = np.zeros((slices,N,N),dtype='float32')
 idealVol = np.zeros((slices,N,N),dtype='float32')
 
 for i in range (slices):
     noisyVol[i,:,:] = Im + np.random.normal(loc = 0 , scale = perc * Im , size = np.shape(Im))
+    noisyRef[i,:,:] = Im + np.random.normal(loc = 0 , scale = 0.01 * Im , size = np.shape(Im))
     idealVol[i,:,:] = Im
 
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
@@ -160,7 +219,7 @@ print ("_______________ROF-TV (3D)_________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(3)
+fig = plt.figure(4)
 plt.suptitle('Performance of ROF-TV regulariser using the GPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy 15th slice of a volume')
@@ -200,7 +259,7 @@ print ("_______________FGP-TV (3D)__________________")
 print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
 
 ## plot 
-fig = plt.figure(4)
+fig = plt.figure(5)
 plt.suptitle('Performance of FGP-TV regulariser using the GPU')
 a=fig.add_subplot(1,2,1)
 a.set_title('Noisy Image')
@@ -242,6 +301,58 @@ a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
          verticalalignment='top', bbox=props)
 imgplot = plt.imshow(fgp_gpu3D[10,:,:], cmap="gray")
 plt.title('{}'.format('Recovered volume on the GPU using FGP-TV'))
-#%%
-"""
 
+
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+print ("_______________FGP-dTV (3D)________________")
+print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
+
+## plot 
+fig = plt.figure(6)
+plt.suptitle('Performance of FGP-dTV regulariser using the GPU')
+a=fig.add_subplot(1,2,1)
+a.set_title('Noisy Image')
+imgplot = plt.imshow(noisyVol[10,:,:],cmap="gray")
+
+# set parameters
+pars = {'algorithm' : FGP_dTV, \
+        'input' : noisyVol,\
+        'refdata' : noisyRef,\
+        'regularisation_parameter':0.04, \
+        'number_of_iterations' :300 ,\
+        'tolerance_constant':0.00001,\
+        'eta_const':0.2,\
+        'methodTV': 0 ,\
+        'nonneg': 0 ,\
+        'printingOut': 0 
+        }
+
+print ("#############FGP TV GPU####################")
+start_time = timeit.default_timer()
+fgp_dTV_gpu3D = FGP_dTV(pars['input'],
+              pars['refdata'], 
+              pars['regularisation_parameter'],
+              pars['number_of_iterations'],
+              pars['tolerance_constant'], 
+              pars['eta_const'],
+              pars['methodTV'],
+              pars['nonneg'],
+              pars['printingOut'],'gpu')
+
+rms = rmse(idealVol, fgp_dTV_gpu3D)
+pars['rmse'] = rms
+
+txtstr = printParametersToString(pars)
+txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time)
+print (txtstr)
+a=fig.add_subplot(1,2,2)
+
+# these are matplotlib.patch.Patch properties
+props = dict(boxstyle='round', facecolor='wheat', alpha=0.75)
+# place a text box in upper left in axes coords
+a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14,
+         verticalalignment='top', bbox=props)
+imgplot = plt.imshow(fgp_dTV_gpu3D[10,:,:], cmap="gray")
+plt.title('{}'.format('Recovered volume on the GPU using FGP-dTV'))
+"""
+#%%
diff --git a/Wrappers/Python/setup-regularisers.py.in b/Wrappers/Python/setup-regularisers.py.in
index a1c1ab6..c7ebb5c 100644
--- a/Wrappers/Python/setup-regularisers.py.in
+++ b/Wrappers/Python/setup-regularisers.py.in
@@ -36,6 +36,7 @@ extra_include_dirs += [os.path.join(".." , ".." , "Core"),
                        os.path.join(".." , ".." , "Core",  "regularisers_CPU"),
                        os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TV_FGP" ) , 
                        os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "TV_ROF" ) , 
+                       os.path.join(".." , ".." , "Core",  "regularisers_GPU" , "dTV_FGP" ) , 
 						   "."]
 
 if platform.system() == 'Windows':				   
diff --git a/Wrappers/Python/src/cpu_regularisers.pyx b/Wrappers/Python/src/cpu_regularisers.pyx
index 0f08f7f..1661375 100644
--- a/Wrappers/Python/src/cpu_regularisers.pyx
+++ b/Wrappers/Python/src/cpu_regularisers.pyx
@@ -20,6 +20,7 @@ cimport numpy as np
 
 cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
 cdef extern float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+cdef extern float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
 
 
 #****************************************************************#
@@ -89,7 +90,7 @@ def TV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
     cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
             np.zeros([dims[0],dims[1]], dtype='float32')
                    
-    #/* Run ROF iterations for 2D data */
+    #/* Run FGP-TV iterations for 2D data */
     TV_FGP_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, 
                        iterationsNumb, 
                        tolerance_param,
@@ -115,7 +116,7 @@ def TV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
     cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
             np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
            
-    #/* Run ROF iterations for 3D data */
+    #/* Run FGP-TV iterations for 3D data */
     TV_FGP_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter,
                        iterationsNumb, 
                        tolerance_param,
@@ -124,3 +125,69 @@ def TV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
                        printM,
                        dims[2], dims[1], dims[0])
     return outputData 
+#****************************************************************#
+#**************Directional Total-variation FGP ******************#
+#****************************************************************#
+#******** Directional TV Fast-Gradient-Projection (FGP)*********#
+def dTV_FGP_CPU(inputData, refdata, regularisation_parameter, iterationsNumb, tolerance_param, eta_const, methodTV, nonneg, printM):
+    if inputData.ndim == 2:
+        return dTV_FGP_2D(inputData, refdata, regularisation_parameter, iterationsNumb, tolerance_param, eta_const, methodTV, nonneg, printM)
+    elif inputData.ndim == 3:
+        return dTV_FGP_3D(inputData, refdata, regularisation_parameter, iterationsNumb, tolerance_param, eta_const, methodTV, nonneg, printM)
+
+def dTV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+               np.ndarray[np.float32_t, ndim=2, mode="c"] refdata,
+                     float regularisation_parameter,
+                     int iterationsNumb, 
+                     float tolerance_param,
+                     float eta_const,
+                     int methodTV,
+                     int nonneg,
+                     int printM):
+                         
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+            np.zeros([dims[0],dims[1]], dtype='float32')
+                   
+    #/* Run FGP-dTV iterations for 2D data */
+    dTV_FGP_CPU_main(&inputData[0,0], &refdata[0,0], &outputData[0,0], regularisation_parameter, 
+                       iterationsNumb, 
+                       tolerance_param,
+                       eta_const,
+                       methodTV,                       
+                       nonneg,
+                       printM,
+                       dims[0], dims[1], 1)
+    
+    return outputData        
+            
+def dTV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+               np.ndarray[np.float32_t, ndim=3, mode="c"] refdata,
+                     float regularisation_parameter,
+                     int iterationsNumb, 
+                     float tolerance_param,
+                     float eta_const,
+                     int methodTV,
+                     int nonneg,
+                     int printM):
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+    
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+            np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
+           
+    #/* Run FGP-dTV iterations for 3D data */
+    dTV_FGP_CPU_main(&inputData[0,0,0], &refdata[0,0,0], &outputData[0,0,0], regularisation_parameter,
+                       iterationsNumb, 
+                       tolerance_param,
+                       eta_const,
+                       methodTV,
+                       nonneg,
+                       printM,
+                       dims[2], dims[1], dims[0])
+    return outputData
diff --git a/Wrappers/Python/src/gpu_regularisers.pyx b/Wrappers/Python/src/gpu_regularisers.pyx
index ea746d3..18efdcd 100644
--- a/Wrappers/Python/src/gpu_regularisers.pyx
+++ b/Wrappers/Python/src/gpu_regularisers.pyx
@@ -20,6 +20,7 @@ cimport numpy as np
 
 cdef extern void TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
 cdef extern void TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z);
+cdef extern void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z);
 
 # Total-variation Rudin-Osher-Fatemi (ROF)
 def TV_ROF_GPU(inputData,
@@ -61,7 +62,36 @@ def TV_FGP_GPU(inputData,
                      methodTV,
                      nonneg,
                      printM)
-                     
+# Directional Total-variation Fast-Gradient-Projection (FGP)
+def dTV_FGP_GPU(inputData,
+                     refdata,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     eta_const,
+                     methodTV,
+                     nonneg,
+                     printM):
+    if inputData.ndim == 2:
+        return FGPdTV2D(inputData,
+                     refdata,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     eta_const,
+                     methodTV,
+                     nonneg,
+                     printM)
+    elif inputData.ndim == 3:
+        return FGPdTV3D(inputData,
+                     refdata,
+                     regularisation_parameter,
+                     iterations, 
+                     tolerance_param,
+                     eta_const,
+                     methodTV,
+                     nonneg,
+                     printM)
 #****************************************************************#
 #********************** Total-variation ROF *********************#
 #****************************************************************#
@@ -157,8 +187,7 @@ def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
 		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
           
     # Running CUDA code here    
-    TV_FGP_GPU_main(            
-            &inputData[0,0,0], &outputData[0,0,0], 
+    TV_FGP_GPU_main(&inputData[0,0,0], &outputData[0,0,0], 
                        regularisation_parameter , 
                        iterations, 
                        tolerance_param,
@@ -167,4 +196,68 @@ def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
                        printM,
                        dims[2], dims[1], dims[0]);   
      
-    return outputData    
+    return outputData 
+    
+#****************************************************************#
+#**************Directional Total-variation FGP ******************#
+#****************************************************************#
+#******** Directional TV Fast-Gradient-Projection (FGP)*********#
+def FGPdTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, 
+             np.ndarray[np.float32_t, ndim=2, mode="c"] refdata,
+                     float regularisation_parameter,
+                     int iterations, 
+                     float tolerance_param,
+                     float eta_const,
+                     int methodTV,
+                     int nonneg,
+                     int printM):
+    
+    cdef long dims[2]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+
+    cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+		    np.zeros([dims[0],dims[1]], dtype='float32')
+          
+    # Running CUDA code here    
+    dTV_FGP_GPU_main(&inputData[0,0], &refdata[0,0], &outputData[0,0],                        
+                       regularisation_parameter, 
+                       iterations, 
+                       tolerance_param,
+                       eta_const,
+                       methodTV,
+                       nonneg,
+                       printM,
+                       dims[0], dims[1], 1);   
+     
+    return outputData
+    
+def FGPdTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, 
+             np.ndarray[np.float32_t, ndim=3, mode="c"] refdata, 
+                     float regularisation_parameter,
+                     int iterations, 
+                     float tolerance_param,
+                     float eta_const,
+                     int methodTV,
+                     int nonneg,
+                     int printM):
+    
+    cdef long dims[3]
+    dims[0] = inputData.shape[0]
+    dims[1] = inputData.shape[1]
+    dims[2] = inputData.shape[2]
+
+    cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+		    np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+          
+    # Running CUDA code here    
+    dTV_FGP_GPU_main(&inputData[0,0,0], &refdata[0,0,0], &outputData[0,0,0], 
+                       regularisation_parameter , 
+                       iterations, 
+                       tolerance_param,
+                       eta_const,
+                       methodTV,
+                       nonneg,
+                       printM,
+                       dims[2], dims[1], dims[0]);
+    return outputData 
diff --git a/Wrappers/Python/test/__pycache__/metrics.cpython-35.pyc b/Wrappers/Python/test/__pycache__/metrics.cpython-35.pyc
deleted file mode 100644
index 2196a53..0000000
--- a/Wrappers/Python/test/__pycache__/metrics.cpython-35.pyc
+++ /dev/null
author	Daniil Kazantsev <dkazanc3@googlemail.com>	2018-04-12 12:09:38 +0100
committer	GitHub <noreply@github.com>	2018-04-12 12:09:38 +0100
commit	7ae26b005c5f3d9ca0181ab1cf06b6ee8df5ed69 (patch)
tree	225dcf0db9dc7e0f0fc5fc001a7efb14c19658f8
parent	aa99eb8a9bd47ecd6e4d3d1e8c9f0cfbefb4f7bb (diff)
parent	22f6e22cbe6db04c6bbe8d259ce761e3748d7102 (diff)
download	regularization-7ae26b005c5f3d9ca0181ab1cf06b6ee8df5ed69.tar.gz regularization-7ae26b005c5f3d9ca0181ab1cf06b6ee8df5ed69.tar.bz2 regularization-7ae26b005c5f3d9ca0181ab1cf06b6ee8df5ed69.tar.xz regularization-7ae26b005c5f3d9ca0181ab1cf06b6ee8df5ed69.zip