summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt20
-rw-r--r--src/Core/CCPiDefines.h35
-rw-r--r--src/Core/CMakeLists.txt151
-rw-r--r--src/Core/inpainters_CPU/Diffusion_Inpaint_core.c322
-rw-r--r--src/Core/inpainters_CPU/Diffusion_Inpaint_core.h61
-rw-r--r--src/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.c188
-rw-r--r--src/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.h54
-rw-r--r--src/Core/regularisers_CPU/Diffus4th_order_core.c250
-rw-r--r--src/Core/regularisers_CPU/Diffus4th_order_core.h55
-rw-r--r--src/Core/regularisers_CPU/Diffusion_core.c307
-rw-r--r--src/Core/regularisers_CPU/Diffusion_core.h59
-rw-r--r--src/Core/regularisers_CPU/FGP_TV_core.c321
-rw-r--r--src/Core/regularisers_CPU/FGP_TV_core.h63
-rw-r--r--src/Core/regularisers_CPU/FGP_dTV_core.c441
-rw-r--r--src/Core/regularisers_CPU/FGP_dTV_core.h72
-rw-r--r--src/Core/regularisers_CPU/LLT_ROF_core.c410
-rw-r--r--src/Core/regularisers_CPU/LLT_ROF_core.h65
-rw-r--r--src/Core/regularisers_CPU/Nonlocal_TV_core.c173
-rw-r--r--src/Core/regularisers_CPU/Nonlocal_TV_core.h61
-rw-r--r--src/Core/regularisers_CPU/PatchSelect_core.c345
-rw-r--r--src/Core/regularisers_CPU/PatchSelect_core.h63
-rw-r--r--src/Core/regularisers_CPU/ROF_TV_core.c289
-rw-r--r--src/Core/regularisers_CPU/ROF_TV_core.h57
-rwxr-xr-xsrc/Core/regularisers_CPU/SB_TV_core.c368
-rw-r--r--src/Core/regularisers_CPU/SB_TV_core.h61
-rw-r--r--src/Core/regularisers_CPU/TGV_core.c532
-rw-r--r--src/Core/regularisers_CPU/TGV_core.h73
-rwxr-xr-xsrc/Core/regularisers_CPU/TNV_core.c452
-rw-r--r--src/Core/regularisers_CPU/TNV_core.h47
-rw-r--r--src/Core/regularisers_CPU/utils.c117
-rw-r--r--src/Core/regularisers_CPU/utils.h34
-rw-r--r--src/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu268
-rw-r--r--src/Core/regularisers_GPU/Diffus_4thO_GPU_core.h8
-rw-r--r--src/Core/regularisers_GPU/LLT_ROF_GPU_core.cu473
-rw-r--r--src/Core/regularisers_GPU/LLT_ROF_GPU_core.h8
-rw-r--r--src/Core/regularisers_GPU/NonlDiff_GPU_core.cu345
-rw-r--r--src/Core/regularisers_GPU/NonlDiff_GPU_core.h8
-rw-r--r--src/Core/regularisers_GPU/PatchSelect_GPU_core.cu460
-rw-r--r--src/Core/regularisers_GPU/PatchSelect_GPU_core.h8
-rw-r--r--src/Core/regularisers_GPU/TGV_GPU_core.cu728
-rw-r--r--src/Core/regularisers_GPU/TGV_GPU_core.h8
-rwxr-xr-xsrc/Core/regularisers_GPU/TV_FGP_GPU_core.cu564
-rwxr-xr-xsrc/Core/regularisers_GPU/TV_FGP_GPU_core.h9
-rwxr-xr-xsrc/Core/regularisers_GPU/TV_ROF_GPU_core.cu358
-rwxr-xr-xsrc/Core/regularisers_GPU/TV_ROF_GPU_core.h8
-rwxr-xr-xsrc/Core/regularisers_GPU/TV_SB_GPU_core.cu552
-rwxr-xr-xsrc/Core/regularisers_GPU/TV_SB_GPU_core.h10
-rw-r--r--src/Core/regularisers_GPU/dTV_FGP_GPU_core.cu741
-rw-r--r--src/Core/regularisers_GPU/dTV_FGP_GPU_core.h9
-rw-r--r--src/Core/regularisers_GPU/shared.h42
-rwxr-xr-xsrc/Matlab/CMakeLists.txt147
-rw-r--r--src/Matlab/mex_compile/compileCPU_mex_Linux.m81
-rw-r--r--src/Matlab/mex_compile/compileCPU_mex_WINDOWS.m135
-rw-r--r--src/Matlab/mex_compile/compileGPU_mex.m74
-rw-r--r--src/Matlab/mex_compile/installed/MEXed_files_location.txt0
-rw-r--r--src/Matlab/mex_compile/regularisers_CPU/Diffusion_4thO.c77
-rw-r--r--src/Matlab/mex_compile/regularisers_CPU/FGP_TV.c97
-rw-r--r--src/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c114
-rw-r--r--src/Matlab/mex_compile/regularisers_CPU/LLT_ROF.c82
-rw-r--r--src/Matlab/mex_compile/regularisers_CPU/NonlDiff.c89
-rw-r--r--src/Matlab/mex_compile/regularisers_CPU/NonlDiff_Inp.c103
-rw-r--r--src/Matlab/mex_compile/regularisers_CPU/NonlocalMarching_Inpaint.c84
-rw-r--r--src/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c88
-rw-r--r--src/Matlab/mex_compile/regularisers_CPU/PatchSelect.c92
-rw-r--r--src/Matlab/mex_compile/regularisers_CPU/ROF_TV.c77
-rw-r--r--src/Matlab/mex_compile/regularisers_CPU/SB_TV.c91
-rw-r--r--src/Matlab/mex_compile/regularisers_CPU/TGV.c83
-rw-r--r--src/Matlab/mex_compile/regularisers_CPU/TNV.c74
-rw-r--r--src/Matlab/mex_compile/regularisers_CPU/TV_energy.c72
-rw-r--r--src/Matlab/mex_compile/regularisers_GPU/Diffusion_4thO_GPU.cpp77
-rw-r--r--src/Matlab/mex_compile/regularisers_GPU/FGP_TV_GPU.cpp97
-rw-r--r--src/Matlab/mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp113
-rw-r--r--src/Matlab/mex_compile/regularisers_GPU/LLT_ROF_GPU.cpp83
-rw-r--r--src/Matlab/mex_compile/regularisers_GPU/NonlDiff_GPU.cpp92
-rw-r--r--src/Matlab/mex_compile/regularisers_GPU/ROF_TV_GPU.cpp74
-rw-r--r--src/Matlab/mex_compile/regularisers_GPU/SB_TV_GPU.cpp91
-rw-r--r--src/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp81
-rw-r--r--src/Matlab/supp/RMSE.m7
-rw-r--r--src/Matlab/supp/my_red_yellowMAP.matbin0 -> 1761 bytes
-rw-r--r--src/Python/CMakeLists.txt141
-rw-r--r--src/Python/ccpi/__init__.py0
-rw-r--r--src/Python/ccpi/filters/__init__.py0
-rw-r--r--src/Python/ccpi/filters/regularisers.py214
-rw-r--r--src/Python/setup-regularisers.py.in75
-rw-r--r--src/Python/src/cpu_regularisers.pyx685
-rw-r--r--src/Python/src/gpu_regularisers.pyx640
86 files changed, 14283 insertions, 0 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000..5fe1a57
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,20 @@
+# Copyright 2017 Edoardo Pasca
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+add_subdirectory(Core)
+if (BUILD_MATLAB_WRAPPER)
+ add_subdirectory(Matlab)
+endif()
+if (BUILD_PYTHON_WRAPPER)
+ add_subdirectory(Python)
+endif() \ No newline at end of file
diff --git a/src/Core/CCPiDefines.h b/src/Core/CCPiDefines.h
new file mode 100644
index 0000000..d3038f9
--- /dev/null
+++ b/src/Core/CCPiDefines.h
@@ -0,0 +1,35 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Srikanth Nagella, Edoardo Pasca, Daniil Kazantsev
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+#ifndef CCPIDEFINES_H
+#define CCPIDEFINES_H
+
+#if defined(_WIN32) || defined(__WIN32__)
+ #if defined(CCPiCore_EXPORTS) || defined(CCPiNexusWidget_EXPORTS) || defined(ContourTreeSegmentation_EXPORTS) || defined(ContourTree_EXPORTS)// add by CMake
+ #define CCPI_EXPORT __declspec(dllexport)
+ #define EXPIMP_TEMPLATE
+ #else
+ #define CCPI_EXPORT __declspec(dllimport)
+ #define EXPIMP_TEMPLATE extern
+ #endif /* CCPi_EXPORTS */
+#elif defined(linux) || defined(__linux) || defined(__APPLE__)
+ #define CCPI_EXPORT
+#endif
+
+#endif
diff --git a/src/Core/CMakeLists.txt b/src/Core/CMakeLists.txt
new file mode 100644
index 0000000..b3c0dfb
--- /dev/null
+++ b/src/Core/CMakeLists.txt
@@ -0,0 +1,151 @@
+# Copyright 2018 Edoardo Pasca
+#cmake_minimum_required (VERSION 3.0)
+
+project(RGL_core)
+#https://stackoverflow.com/questions/13298504/using-cmake-with-setup-py
+
+# The version number.
+
+set (CIL_VERSION $ENV{CIL_VERSION} CACHE INTERNAL "Core Imaging Library version" FORCE)
+
+# conda orchestrated build
+message("CIL_VERSION ${CIL_VERSION}")
+#include (GenerateExportHeader)
+
+
+find_package(OpenMP)
+if (OPENMP_FOUND)
+ set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+ set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS} ${OpenMP_CXX_FLAGS}")
+ set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS} ${OpenMP_CXX_FLAGS}")
+ set (CMAKE_STATIC_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_STATIC_LINKER_FLAGS} ${OpenMP_CXX_FLAGS}")
+
+endif()
+
+## Build the regularisers package as a library
+message("Creating Regularisers as a shared library")
+
+message("CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}")
+message("CMAKE_C_FLAGS ${CMAKE_C_FLAGS}")
+message("CMAKE_EXE_LINKER_FLAGS ${CMAKE_EXE_LINKER_FLAGS}")
+message("CMAKE_SHARED_LINKER_FLAGS ${CMAKE_SHARED_LINKER_FLAGS}")
+message("CMAKE_STATIC_LINKER_FLAGS ${CMAKE_STATIC_LINKER_FLAGS}")
+
+set(CMAKE_BUILD_TYPE "Release")
+
+if(WIN32)
+ set (FLAGS "/DWIN32 /EHsc /DCCPiCore_EXPORTS /openmp")
+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")
+ set (CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")
+ set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRT.lib")
+
+ set (EXTRA_LIBRARIES)
+
+ message("library lib: ${LIBRARY_LIB}")
+
+elseif(UNIX)
+ set (FLAGS "-O2 -funsigned-char -Wall -Wl,--no-undefined -DCCPiReconstructionIterative_EXPORTS ")
+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")
+ set (CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")
+
+ set (EXTRA_LIBRARIES
+ "gomp"
+ "m"
+ )
+
+endif()
+message("CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}")
+
+## Build the regularisers package as a library
+message("Adding regularisers as a shared library")
+
+#set(CMAKE_C_COMPILER /apps/pgi/linux86-64/17.4/bin/pgcc)
+#set(CMAKE_C_FLAGS "-acc -Minfo -ta=tesla:cc20 -openmp")
+#set(CMAKE_C_FLAGS "-acc -Minfo -ta=multicore -openmp -fPIC")
+add_library(cilreg SHARED
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/FGP_TV_core.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/SB_TV_core.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/TGV_core.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/Diffusion_core.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/Diffus4th_order_core.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/LLT_ROF_core.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/ROF_TV_core.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/FGP_dTV_core.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/TNV_core.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/Nonlocal_TV_core.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/PatchSelect_core.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/utils.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/inpainters_CPU/Diffusion_Inpaint_core.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/inpainters_CPU/NonlocalMarching_Inpaint_core.c
+ )
+target_link_libraries(cilreg ${EXTRA_LIBRARIES} )
+include_directories(cilreg PUBLIC
+ ${LIBRARY_INC}/include
+ ${CMAKE_CURRENT_SOURCE_DIR}
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_CPU/
+ ${CMAKE_CURRENT_SOURCE_DIR}/inpainters_CPU/ )
+
+## Install
+
+if (UNIX)
+message ("I'd install into ${CMAKE_INSTALL_PREFIX}/lib")
+install(TARGETS cilreg
+ LIBRARY DESTINATION lib
+ CONFIGURATIONS ${CMAKE_BUILD_TYPE}
+ )
+elseif(WIN32)
+message ("I'd install into ${CMAKE_INSTALL_PREFIX} lib bin")
+ install(TARGETS cilreg
+ RUNTIME DESTINATION bin
+ ARCHIVE DESTINATION lib
+ CONFIGURATIONS ${CMAKE_BUILD_TYPE}
+ )
+endif()
+
+
+
+# GPU Regularisers
+if (BUILD_CUDA)
+ find_package(CUDA)
+ if (CUDA_FOUND)
+ set(CUDA_NVCC_FLAGS "-Xcompiler -fPIC -shared -D_FORCE_INLINES")
+ message("CUDA FLAGS ${CUDA_NVCC_FLAGS}")
+ CUDA_ADD_LIBRARY(cilregcuda SHARED
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_ROF_GPU_core.cu
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_FGP_GPU_core.cu
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TV_SB_GPU_core.cu
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/LLT_ROF_GPU_core.cu
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/TGV_GPU_core.cu
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/dTV_FGP_GPU_core.cu
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/NonlDiff_GPU_core.cu
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/Diffus_4thO_GPU_core.cu
+ ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/PatchSelect_GPU_core.cu
+ )
+ if (UNIX)
+ message ("I'd install into ${CMAKE_INSTALL_PREFIX}/lib")
+ install(TARGETS cilregcuda
+ LIBRARY DESTINATION lib
+ CONFIGURATIONS ${CMAKE_BUILD_TYPE}
+ )
+ elseif(WIN32)
+ message ("I'd install into ${CMAKE_INSTALL_PREFIX} lib bin")
+ install(TARGETS cilregcuda
+ RUNTIME DESTINATION bin
+ ARCHIVE DESTINATION lib
+ CONFIGURATIONS ${CMAKE_BUILD_TYPE}
+ )
+ endif()
+ else()
+ message("CUDA NOT FOUND")
+ endif()
+endif()
+
+if (${BUILD_MATLAB_WRAPPER})
+ if (WIN32)
+ install(TARGETS cilreg DESTINATION ${MATLAB_DEST})
+ if (CUDA_FOUND)
+ install(TARGETS cilregcuda DESTINATION ${MATLAB_DEST})
+ endif()
+ endif()
+endif()
diff --git a/src/Core/inpainters_CPU/Diffusion_Inpaint_core.c b/src/Core/inpainters_CPU/Diffusion_Inpaint_core.c
new file mode 100644
index 0000000..08b168a
--- /dev/null
+++ b/src/Core/inpainters_CPU/Diffusion_Inpaint_core.c
@@ -0,0 +1,322 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Diffusion_Inpaint_core.h"
+#include "utils.h"
+
+/*sign function*/
+int signNDF_inc(float x) {
+ return (x > 0) - (x < 0);
+}
+
+/* C-OMP implementation of linear and nonlinear diffusion [1,2] for inpainting task (2D/3D case)
+ * The minimisation is performed using explicit scheme.
+ *
+ * Input Parameters:
+ * 1. Image/volume to inpaint
+ * 2. Mask of the same size as (1) in 'unsigned char' format (ones mark the region to inpaint, zeros belong to the data)
+ * 3. lambda - regularization parameter
+ * 4. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
+ * 5. Number of iterations, for explicit scheme >= 150 is recommended
+ * 6. tau - time-marching step for explicit scheme
+ * 7. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
+ *
+ * Output:
+ * [1] Inpainted image/volume
+ *
+ * This function is based on the paper by
+ * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
+ * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
+ */
+
+float Diffusion_Inpaint_CPU_main(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int dimX, int dimY, int dimZ)
+{
+ long i, pointsone;
+ float sigmaPar2;
+ sigmaPar2 = sigmaPar/sqrt(2.0f);
+
+ /* copy into output */
+ copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+ pointsone = 0;
+ for (i=0; i<dimY*dimX*dimZ; i++) if (Mask[i] == 1) pointsone++;
+
+ if (pointsone == 0) printf("%s \n", "Nothing to inpaint, zero mask!");
+ else {
+
+ if (dimZ == 1) {
+ /* running 2D diffusion iterations */
+ for(i=0; i < iterationsNumb; i++) {
+ if (sigmaPar == 0.0f) LinearDiff_Inp_2D(Input, Mask, Output, lambdaPar, tau, (long)(dimX), (long)(dimY)); /* linear diffusion (heat equation) */
+ else NonLinearDiff_Inp_2D(Input, Mask, Output, lambdaPar, sigmaPar2, tau, penaltytype, (long)(dimX), (long)(dimY)); /* nonlinear diffusion */
+ }
+ }
+ else {
+ /* running 3D diffusion iterations */
+ for(i=0; i < iterationsNumb; i++) {
+ if (sigmaPar == 0.0f) LinearDiff_Inp_3D(Input, Mask, Output, lambdaPar, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
+ else NonLinearDiff_Inp_3D(Input, Mask, Output, lambdaPar, sigmaPar2, tau, penaltytype, (long)(dimX), (long)(dimY), (long)(dimZ));
+ }
+ }
+ }
+ return *Output;
+}
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+/* linear diffusion (heat equation) */
+float LinearDiff_Inp_2D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float tau, long dimX, long dimY)
+{
+ long i,j,i1,i2,j1,j2,index;
+ float e,w,n,s,e1,w1,n1,s1;
+
+#pragma omp parallel for shared(Input,Mask) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1)
+ for(i=0; i<dimX; i++) {
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ for(j=0; j<dimY; j++) {
+ /* symmetric boundary conditions (Neuman) */
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+ index = j*dimX+i;
+
+ if (Mask[index] > 0) {
+ /*inpainting process*/
+ e = Output[j*dimX+i1];
+ w = Output[j*dimX+i2];
+ n = Output[j1*dimX+i];
+ s = Output[j2*dimX+i];
+
+ e1 = e - Output[index];
+ w1 = w - Output[index];
+ n1 = n - Output[index];
+ s1 = s - Output[index];
+
+ Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index]));
+ }
+ }}
+ return *Output;
+}
+
+/* nonlinear diffusion */
+float NonLinearDiff_Inp_2D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY)
+{
+ long i,j,i1,i2,j1,j2,index;
+ float e,w,n,s,e1,w1,n1,s1;
+
+#pragma omp parallel for shared(Input,Mask) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1)
+ for(i=0; i<dimX; i++) {
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ for(j=0; j<dimY; j++) {
+ /* symmetric boundary conditions (Neuman) */
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+ index = j*dimX+i;
+
+ if (Mask[index] > 0) {
+ /*inpainting process*/
+ e = Output[j*dimX+i1];
+ w = Output[j*dimX+i2];
+ n = Output[j1*dimX+i];
+ s = Output[j2*dimX+i];
+
+ e1 = e - Output[index];
+ w1 = w - Output[index];
+ n1 = n - Output[index];
+ s1 = s - Output[index];
+
+ if (penaltytype == 1){
+ /* Huber penalty */
+ if (fabs(e1) > sigmaPar) e1 = signNDF_inc(e1);
+ else e1 = e1/sigmaPar;
+
+ if (fabs(w1) > sigmaPar) w1 = signNDF_inc(w1);
+ else w1 = w1/sigmaPar;
+
+ if (fabs(n1) > sigmaPar) n1 = signNDF_inc(n1);
+ else n1 = n1/sigmaPar;
+
+ if (fabs(s1) > sigmaPar) s1 = signNDF_inc(s1);
+ else s1 = s1/sigmaPar;
+ }
+ else if (penaltytype == 2) {
+ /* Perona-Malik */
+ e1 = (e1)/(1.0f + powf((e1/sigmaPar),2));
+ w1 = (w1)/(1.0f + powf((w1/sigmaPar),2));
+ n1 = (n1)/(1.0f + powf((n1/sigmaPar),2));
+ s1 = (s1)/(1.0f + powf((s1/sigmaPar),2));
+ }
+ else if (penaltytype == 3) {
+ /* Tukey Biweight */
+ if (fabs(e1) <= sigmaPar) e1 = e1*powf((1.0f - powf((e1/sigmaPar),2)), 2);
+ else e1 = 0.0f;
+ if (fabs(w1) <= sigmaPar) w1 = w1*powf((1.0f - powf((w1/sigmaPar),2)), 2);
+ else w1 = 0.0f;
+ if (fabs(n1) <= sigmaPar) n1 = n1*powf((1.0f - powf((n1/sigmaPar),2)), 2);
+ else n1 = 0.0f;
+ if (fabs(s1) <= sigmaPar) s1 = s1*powf((1.0f - powf((s1/sigmaPar),2)), 2);
+ else s1 = 0.0f;
+ }
+ else {
+ printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
+ break;
+ }
+ Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index]));
+ }
+ }}
+ return *Output;
+}
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+/* linear diffusion (heat equation) */
+float LinearDiff_Inp_3D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float tau, long dimX, long dimY, long dimZ)
+{
+ long i,j,k,i1,i2,j1,j2,k1,k2,index;
+ float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
+
+#pragma omp parallel for shared(Input,Mask) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1,k,k1,k2,u1,d1,u,d)
+for(k=0; k<dimZ; k++) {
+ k1 = k+1; if (k1 == dimZ) k1 = k-1;
+ k2 = k-1; if (k2 < 0) k2 = k+1;
+ for(i=0; i<dimX; i++) {
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ for(j=0; j<dimY; j++) {
+ /* symmetric boundary conditions (Neuman) */
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ if (Mask[index] > 0) {
+ /*inpainting process*/
+
+ e = Output[(dimX*dimY)*k + j*dimX+i1];
+ w = Output[(dimX*dimY)*k + j*dimX+i2];
+ n = Output[(dimX*dimY)*k + j1*dimX+i];
+ s = Output[(dimX*dimY)*k + j2*dimX+i];
+ u = Output[(dimX*dimY)*k1 + j*dimX+i];
+ d = Output[(dimX*dimY)*k2 + j*dimX+i];
+
+ e1 = e - Output[index];
+ w1 = w - Output[index];
+ n1 = n - Output[index];
+ s1 = s - Output[index];
+ u1 = u - Output[index];
+ d1 = d - Output[index];
+
+ Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index]));
+ }
+ }}}
+ return *Output;
+}
+
+float NonLinearDiff_Inp_3D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY, long dimZ)
+{
+ long i,j,k,i1,i2,j1,j2,k1,k2,index;
+ float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
+
+#pragma omp parallel for shared(Input,Mask) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1,k,k1,k2,u1,d1,u,d)
+for(k=0; k<dimZ; k++) {
+ k1 = k+1; if (k1 == dimZ) k1 = k-1;
+ k2 = k-1; if (k2 < 0) k2 = k+1;
+ for(i=0; i<dimX; i++) {
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ for(j=0; j<dimY; j++) {
+ /* symmetric boundary conditions (Neuman) */
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ if (Mask[index] > 0) {
+ /*inpainting process*/
+ e = Output[(dimX*dimY)*k + j*dimX+i1];
+ w = Output[(dimX*dimY)*k + j*dimX+i2];
+ n = Output[(dimX*dimY)*k + j1*dimX+i];
+ s = Output[(dimX*dimY)*k + j2*dimX+i];
+ u = Output[(dimX*dimY)*k1 + j*dimX+i];
+ d = Output[(dimX*dimY)*k2 + j*dimX+i];
+
+ e1 = e - Output[index];
+ w1 = w - Output[index];
+ n1 = n - Output[index];
+ s1 = s - Output[index];
+ u1 = u - Output[index];
+ d1 = d - Output[index];
+
+ if (penaltytype == 1){
+ /* Huber penalty */
+ if (fabs(e1) > sigmaPar) e1 = signNDF_inc(e1);
+ else e1 = e1/sigmaPar;
+
+ if (fabs(w1) > sigmaPar) w1 = signNDF_inc(w1);
+ else w1 = w1/sigmaPar;
+
+ if (fabs(n1) > sigmaPar) n1 = signNDF_inc(n1);
+ else n1 = n1/sigmaPar;
+
+ if (fabs(s1) > sigmaPar) s1 = signNDF_inc(s1);
+ else s1 = s1/sigmaPar;
+
+ if (fabs(u1) > sigmaPar) u1 = signNDF_inc(u1);
+ else u1 = u1/sigmaPar;
+
+ if (fabs(d1) > sigmaPar) d1 = signNDF_inc(d1);
+ else d1 = d1/sigmaPar;
+ }
+ else if (penaltytype == 2) {
+ /* Perona-Malik */
+ e1 = (e1)/(1.0f + powf((e1/sigmaPar),2));
+ w1 = (w1)/(1.0f + powf((w1/sigmaPar),2));
+ n1 = (n1)/(1.0f + powf((n1/sigmaPar),2));
+ s1 = (s1)/(1.0f + powf((s1/sigmaPar),2));
+ u1 = (u1)/(1.0f + powf((u1/sigmaPar),2));
+ d1 = (d1)/(1.0f + powf((d1/sigmaPar),2));
+ }
+ else if (penaltytype == 3) {
+ /* Tukey Biweight */
+ if (fabs(e1) <= sigmaPar) e1 = e1*powf((1.0f - powf((e1/sigmaPar),2)), 2);
+ else e1 = 0.0f;
+ if (fabs(w1) <= sigmaPar) w1 = w1*powf((1.0f - powf((w1/sigmaPar),2)), 2);
+ else w1 = 0.0f;
+ if (fabs(n1) <= sigmaPar) n1 = n1*powf((1.0f - powf((n1/sigmaPar),2)), 2);
+ else n1 = 0.0f;
+ if (fabs(s1) <= sigmaPar) s1 = s1*powf((1.0f - powf((s1/sigmaPar),2)), 2);
+ else s1 = 0.0f;
+ if (fabs(u1) <= sigmaPar) u1 = u1*powf((1.0f - powf((u1/sigmaPar),2)), 2);
+ else u1 = 0.0f;
+ if (fabs(d1) <= sigmaPar) d1 = d1*powf((1.0f - powf((d1/sigmaPar),2)), 2);
+ else d1 = 0.0f;
+ }
+ else {
+ printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
+ break;
+ }
+
+ Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index]));
+ }
+ }}}
+ return *Output;
+}
diff --git a/src/Core/inpainters_CPU/Diffusion_Inpaint_core.h b/src/Core/inpainters_CPU/Diffusion_Inpaint_core.h
new file mode 100644
index 0000000..a96fe79
--- /dev/null
+++ b/src/Core/inpainters_CPU/Diffusion_Inpaint_core.h
@@ -0,0 +1,61 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+
+/* C-OMP implementation of linear and nonlinear diffusion [1,2] for inpainting task (2D/3D case)
+ * The minimisation is performed using explicit scheme.
+ *
+ * Input Parameters:
+ * 1. Image/volume to inpaint
+ * 2. Mask of the same size as (1) in 'unsigned char' format (ones mark the region to inpaint, zeros belong to the data)
+ * 3. lambda - regularization parameter
+ * 4. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
+ * 5. Number of iterations, for explicit scheme >= 150 is recommended
+ * 6. tau - time-marching step for explicit scheme
+ * 7. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
+ *
+ * Output:
+ * [1] Inpainted image/volume
+ *
+ * This function is based on the paper by
+ * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
+ * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
+ */
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float Diffusion_Inpaint_CPU_main(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int dimX, int dimY, int dimZ);
+
+CCPI_EXPORT float LinearDiff_Inp_2D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float tau, long dimX, long dimY);
+CCPI_EXPORT float NonLinearDiff_Inp_2D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY);
+CCPI_EXPORT float LinearDiff_Inp_3D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float tau, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float NonLinearDiff_Inp_3D(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY, long dimZ);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.c b/src/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.c
new file mode 100644
index 0000000..b488ca4
--- /dev/null
+++ b/src/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.c
@@ -0,0 +1,188 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NonlocalMarching_Inpaint_core.h"
+#include "utils.h"
+
+
+/* C-OMP implementation of Nonlocal Vertical Marching inpainting method (2D case)
+ * The method is heuristic but computationally efficent (especially for larger images).
+ * It developed specifically to smoothly inpaint horizontal or inclined missing data regions in sinograms
+ * The method WILL not work satisfactory if you have lengthy vertical stripes of missing data
+ *
+ * Input:
+ * 1. 2D image or sinogram with horizontal or inclined regions of missing data
+ * 2. Mask of the same size as A in 'unsigned char' format (ones mark the region to inpaint, zeros belong to the data)
+ * 3. Linear increment to increase searching window size in iterations, values from 1-3 is a good choice
+ *
+ * Output:
+ * 1. Inpainted image or a sinogram
+ * 2. updated mask
+ *
+ * Reference: D. Kazantsev (paper in preparation)
+ */
+
+float NonlocalMarching_Inpaint_main(float *Input, unsigned char *M, float *Output, unsigned char *M_upd, int SW_increment, int iterationsNumb, int trigger, int dimX, int dimY, int dimZ)
+{
+ int i, j, i_m, j_m, counter, iter, iterations_number, W_fullsize, switchmask, switchcurr, counterElements;
+ float *Gauss_weights;
+
+ /* copying M to M_upd */
+ copyIm_unchar(M, M_upd, dimX, dimY, 1);
+
+ /* Copying the image */
+ copyIm(Input, Output, dimX, dimY, 1);
+
+ /* Find how many inpainting iterations (equal to the number of ones) required based on a mask */
+ if (iterationsNumb == 0) {
+ iterations_number = 0;
+ for (i=0; i<dimY*dimX; i++) {
+ if (M[i] == 1) iterations_number++;
+ }
+ if ((int)(iterations_number/dimY) > dimX) iterations_number = dimX;
+ }
+ else iterations_number = iterationsNumb;
+
+ if (iterations_number == 0) printf("%s \n", "Nothing to inpaint, zero mask!");
+ else {
+
+ printf("%s %i \n", "Max iteration number equals to:", iterations_number);
+
+ /* Inpainting iterations run here*/
+ int W_halfsize = 1;
+ for(iter=0; iter < iterations_number; iter++) {
+
+ //if (mod (iter, 2) == 0) {W_halfsize += 1;}
+ // printf("%i \n", W_halfsize);
+
+ /* pre-calculation of Gaussian distance weights */
+ W_fullsize = (int)(2*W_halfsize + 1); /*full size of similarity window */
+ Gauss_weights = (float*)calloc(W_fullsize*W_fullsize,sizeof(float ));
+ counter = 0;
+ for(i_m=-W_halfsize; i_m<=W_halfsize; i_m++) {
+ for(j_m=-W_halfsize; j_m<=W_halfsize; j_m++) {
+ Gauss_weights[counter] = exp(-(pow((i_m), 2) + pow((j_m), 2))/(2*W_halfsize*W_halfsize));
+ counter++;
+ }
+ }
+
+ if (trigger == 0) {
+ /*Matlab*/
+#pragma omp parallel for shared(Output, M_upd, Gauss_weights) private(i, j, switchmask, switchcurr)
+ for(j=0; j<dimY; j++) {
+ switchmask = 0;
+ for(i=0; i<dimX; i++) {
+ switchcurr = 0;
+ if ((M_upd[j*dimX + i] == 1) && (switchmask == 0)) {
+ /* perform inpainting of the current pixel */
+ inpaint_func(Output, M_upd, Gauss_weights, i, j, dimX, dimY, W_halfsize, W_fullsize);
+ /* add value to the mask*/
+ M_upd[j*dimX + i] = 0;
+ switchmask = 1; switchcurr = 1;
+ }
+ if ((M_upd[j*dimX + i] == 0) && (switchmask == 1) && (switchcurr == 0)) {
+ /* perform inpainting of the previous (i-1) pixel */
+ inpaint_func(Output, M_upd, Gauss_weights, i-1, j, dimX, dimY, W_halfsize, W_fullsize);
+ /* add value to the mask*/
+ M_upd[(j)*dimX + i-1] = 0;
+ switchmask = 0;
+ }
+ }
+ }
+ }
+ else {
+ /*Python*/
+ /* find a point in the mask to inpaint */
+#pragma omp parallel for shared(Output, M_upd, Gauss_weights) private(i, j, switchmask, switchcurr)
+ for(i=0; i<dimX; i++) {
+ switchmask = 0;
+ for(j=0; j<dimY; j++) {
+ switchcurr = 0;
+ if ((M_upd[j*dimX + i] == 1) && (switchmask == 0)) {
+ /* perform inpainting of the current pixel */
+ inpaint_func(Output, M_upd, Gauss_weights, i, j, dimX, dimY, W_halfsize, W_fullsize);
+ /* add value to the mask*/
+ M_upd[j*dimX + i] = 0;
+ switchmask = 1; switchcurr = 1;
+ }
+ if ((M_upd[j*dimX + i] == 0) && (switchmask == 1) && (switchcurr == 0)) {
+ /* perform inpainting of the previous (j-1) pixel */
+ inpaint_func(Output, M_upd, Gauss_weights, i, j-1, dimX, dimY, W_halfsize, W_fullsize);
+ /* add value to the mask*/
+ M_upd[(j-1)*dimX + i] = 0;
+ switchmask = 0;
+ }
+ }
+ }
+ }
+ free(Gauss_weights);
+
+ /* check if possible to terminate iterations earlier */
+ counterElements = 0;
+ for(i=0; i<dimX*dimY; i++) if (M_upd[i] == 0) counterElements++;
+
+ if (counterElements == dimX*dimY) {
+ printf("%s \n", "Padding completed!");
+ break;
+ }
+ W_halfsize += SW_increment;
+ }
+ printf("%s %i \n", "Iterations stopped at:", iter);
+ }
+ return *Output;
+}
+
+float inpaint_func(float *U, unsigned char *M_upd, float *Gauss_weights, int i, int j, int dimX, int dimY, int W_halfsize, int W_fullsize)
+{
+ int i1, j1, i_m, j_m, counter;
+ float sum_val, sumweight;
+
+ /*method 1: inpainting based on Euclidian weights */
+ sumweight = 0.0f;
+ counter = 0; sum_val = 0.0f;
+ for(i_m=-W_halfsize; i_m<=W_halfsize; i_m++) {
+ i1 = i+i_m;
+ for(j_m=-W_halfsize; j_m<=W_halfsize; j_m++) {
+ j1 = j+j_m;
+ if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY))) {
+ if (M_upd[j1*dimX + i1] == 0) {
+ sumweight += Gauss_weights[counter];
+ }
+ }
+ counter++;
+ }
+ }
+ counter = 0; sum_val = 0.0f;
+ for(i_m=-W_halfsize; i_m<=W_halfsize; i_m++) {
+ i1 = i+i_m;
+ for(j_m=-W_halfsize; j_m<=W_halfsize; j_m++) {
+ j1 = j+j_m;
+ if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY))) {
+ if ((M_upd[j1*dimX + i1] == 0) && (sumweight != 0.0f)) {
+ /* we have data so add it with Euc weight */
+ sum_val += (Gauss_weights[counter]/sumweight)*U[j1*dimX + i1];
+ }
+ }
+ counter++;
+ }
+ }
+ U[j*dimX + i] = sum_val;
+ return *U;
+}
+
diff --git a/src/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.h b/src/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.h
new file mode 100644
index 0000000..0f99ed4
--- /dev/null
+++ b/src/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.h
@@ -0,0 +1,54 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+
+/* C-OMP implementation of Nonlocal Vertical Marching inpainting method (2D case)
+ * The method is heuristic but computationally efficent (especially for larger images).
+ * It developed specifically to smoothly inpaint horizontal or inclined missing data regions in sinograms
+ * The method WILL not work satisfactory if you have lengthy vertical stripes of missing data
+ *
+ * Inputs:
+ * 1. 2D image or sinogram with horizontal or inclined regions of missing data
+ * 2. Mask of the same size as A in 'unsigned char' format (ones mark the region to inpaint, zeros belong to the data)
+ * 3. Linear increment to increase searching window size in iterations, values from 1-3 is a good choice
+
+ * Output:
+ * 1. Inpainted image or a sinogram
+ * 2. updated mask
+ *
+ * Reference: TBA
+ */
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float NonlocalMarching_Inpaint_main(float *Input, unsigned char *M, float *Output, unsigned char *M_upd, int SW_increment, int iterationsNumb, int trigger, int dimX, int dimY, int dimZ);
+CCPI_EXPORT float inpaint_func(float *U, unsigned char *M_upd, float *Gauss_weights, int i, int j, int dimX, int dimY, int W_halfsize, int W_fullsize);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/Diffus4th_order_core.c b/src/Core/regularisers_CPU/Diffus4th_order_core.c
new file mode 100644
index 0000000..01f4f64
--- /dev/null
+++ b/src/Core/regularisers_CPU/Diffus4th_order_core.c
@@ -0,0 +1,250 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Diffus4th_order_core.h"
+#include "utils.h"
+
+#define EPS 1.0e-7
+
+/* C-OMP implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case)
+ * The minimisation is performed using explicit scheme.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambda - regularization parameter
+ * 3. Edge-preserving parameter (sigma)
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended
+ * 5. tau - time-marching step for the explicit scheme
+ *
+ * Output:
+ * [1] Regularized image/volume
+ *
+ * This function is based on the paper by
+ * [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.
+ */
+
+float Diffus4th_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ)
+{
+ int i,DimTotal;
+ float sigmaPar2;
+ float *W_Lapl=NULL;
+ sigmaPar2 = sigmaPar*sigmaPar;
+ DimTotal = dimX*dimY*dimZ;
+
+ W_Lapl = calloc(DimTotal, sizeof(float));
+
+ /* copy into output */
+ copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+ if (dimZ == 1) {
+ /* running 2D diffusion iterations */
+ for(i=0; i < iterationsNumb; i++) {
+ /* Calculating weighted Laplacian */
+ Weighted_Laplc2D(W_Lapl, Output, sigmaPar2, dimX, dimY);
+ /* Perform iteration step */
+ Diffusion_update_step2D(Output, Input, W_Lapl, lambdaPar, sigmaPar2, tau, (long)(dimX), (long)(dimY));
+ }
+ }
+ else {
+ /* running 3D diffusion iterations */
+ for(i=0; i < iterationsNumb; i++) {
+ /* Calculating weighted Laplacian */
+ Weighted_Laplc3D(W_Lapl, Output, sigmaPar2, dimX, dimY, dimZ);
+ /* Perform iteration step */
+ Diffusion_update_step3D(Output, Input, W_Lapl, lambdaPar, sigmaPar2, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
+ }
+ }
+ free(W_Lapl);
+ return *Output;
+}
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+float Weighted_Laplc2D(float *W_Lapl, float *U0, float sigma, long dimX, long dimY)
+{
+ long i,j,i1,i2,j1,j2,index;
+ float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq;
+
+ #pragma omp parallel for shared(W_Lapl) private(i,j,i1,i2,j1,j2,index,gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq)
+ for(i=0; i<dimX; i++) {
+ /* symmetric boundary conditions */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ for(j=0; j<dimY; j++) {
+ /* symmetric boundary conditions */
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+
+ index = j*dimX+i;
+
+ gradX = 0.5f*(U0[j*dimX+i2] - U0[j*dimX+i1]);
+ gradX_sq = pow(gradX,2);
+
+ gradY = 0.5f*(U0[j2*dimX+i] - U0[j1*dimX+i]);
+ gradY_sq = pow(gradY,2);
+
+ gradXX = U0[j*dimX+i2] + U0[j*dimX+i1] - 2*U0[index];
+ gradYY = U0[j2*dimX+i] + U0[j1*dimX+i] - 2*U0[index];
+
+ gradXY = 0.25f*(U0[j2*dimX+i2] + U0[j1*dimX+i1] - U0[j1*dimX+i2] - U0[j2*dimX+i1]);
+ xy_2 = 2.0f*gradX*gradY*gradXY;
+
+ denom = gradX_sq + gradY_sq;
+
+ if (denom <= EPS) {
+ V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/EPS;
+ V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/EPS;
+ }
+ else {
+ V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/denom;
+ V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/denom;
+ }
+
+ c = 1.0f/(1.0f + denom/sigma);
+ c_sq = c*c;
+
+ W_Lapl[index] = c_sq*V_norm + c*V_orth;
+ }
+ }
+ return *W_Lapl;
+}
+
+float Diffusion_update_step2D(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, long dimX, long dimY)
+{
+ long i,j,i1,i2,j1,j2,index;
+ float gradXXc, gradYYc;
+
+ #pragma omp parallel for shared(Output, Input, W_Lapl) private(i,j,i1,i2,j1,j2,index,gradXXc,gradYYc)
+ for(i=0; i<dimX; i++) {
+ /* symmetric boundary conditions */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ for(j=0; j<dimY; j++) {
+ /* symmetric boundary conditions */
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+ index = j*dimX+i;
+
+ gradXXc = W_Lapl[j*dimX+i2] + W_Lapl[j*dimX+i1] - 2*W_Lapl[index];
+ gradYYc = W_Lapl[j2*dimX+i] + W_Lapl[j1*dimX+i] - 2*W_Lapl[index];
+
+ Output[index] += tau*(-lambdaPar*(gradXXc + gradYYc) - (Output[index] - Input[index]));
+ }
+ }
+ return *Output;
+}
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+float Weighted_Laplc3D(float *W_Lapl, float *U0, float sigma, long dimX, long dimY, long dimZ)
+{
+ long i,j,k,i1,i2,j1,j2,k1,k2,index;
+ float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq, gradZ, gradZ_sq, gradZZ, gradXZ, gradYZ, xyz_1, xyz_2;
+
+ #pragma omp parallel for shared(W_Lapl) private(i,j,k,i1,i2,j1,j2,k1,k2,index,gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq, gradZ, gradZ_sq, gradZZ, gradXZ, gradYZ, xyz_1, xyz_2)
+ for(i=0; i<dimX; i++) {
+ /* symmetric boundary conditions */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ for(j=0; j<dimY; j++) {
+ /* symmetric boundary conditions */
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+
+ for(k=0; k<dimZ; k++) {
+ /* symmetric boundary conditions */
+ k1 = k+1; if (k1 == dimZ) k1 = k-1;
+ k2 = k-1; if (k2 < 0) k2 = k+1;
+
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ gradX = 0.5f*(U0[(dimX*dimY)*k + j*dimX+i2] - U0[(dimX*dimY)*k + j*dimX+i1]);
+ gradX_sq = pow(gradX,2);
+
+ gradY = 0.5f*(U0[(dimX*dimY)*k + j2*dimX+i] - U0[(dimX*dimY)*k + j1*dimX+i]);
+ gradY_sq = pow(gradY,2);
+
+ gradZ = 0.5f*(U0[(dimX*dimY)*k2 + j*dimX+i] - U0[(dimX*dimY)*k1 + j*dimX+i]);
+ gradZ_sq = pow(gradZ,2);
+
+ gradXX = U0[(dimX*dimY)*k + j*dimX+i2] + U0[(dimX*dimY)*k + j*dimX+i1] - 2*U0[index];
+ gradYY = U0[(dimX*dimY)*k + j2*dimX+i] + U0[(dimX*dimY)*k + j1*dimX+i] - 2*U0[index];
+ gradZZ = U0[(dimX*dimY)*k2 + j*dimX+i] + U0[(dimX*dimY)*k1 + j*dimX+i] - 2*U0[index];
+
+ gradXY = 0.25f*(U0[(dimX*dimY)*k + j2*dimX+i2] + U0[(dimX*dimY)*k + j1*dimX+i1] - U0[(dimX*dimY)*k + j1*dimX+i2] - U0[(dimX*dimY)*k + j2*dimX+i1]);
+ gradXZ = 0.25f*(U0[(dimX*dimY)*k2 + j*dimX+i2] - U0[(dimX*dimY)*k2+j*dimX+i1] - U0[(dimX*dimY)*k1+j*dimX+i2] + U0[(dimX*dimY)*k1+j*dimX+i1]);
+ gradYZ = 0.25f*(U0[(dimX*dimY)*k2 +j2*dimX+i] - U0[(dimX*dimY)*k2+j1*dimX+i] - U0[(dimX*dimY)*k1+j2*dimX+i] + U0[(dimX*dimY)*k1+j1*dimX+i]);
+
+ xy_2 = 2.0f*gradX*gradY*gradXY;
+ xyz_1 = 2.0f*gradX*gradZ*gradXZ;
+ xyz_2 = 2.0f*gradY*gradZ*gradYZ;
+
+ denom = gradX_sq + gradY_sq + gradZ_sq;
+
+ if (denom <= EPS) {
+ V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/EPS;
+ V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/EPS;
+ }
+ else {
+ V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/denom;
+ V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/denom;
+ }
+
+ c = 1.0f/(1.0f + denom/sigma);
+ c_sq = c*c;
+
+ W_Lapl[index] = c_sq*V_norm + c*V_orth;
+ }
+ }
+ }
+ return *W_Lapl;
+}
+
+float Diffusion_update_step3D(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, long dimX, long dimY, long dimZ)
+{
+ long i,j,i1,i2,j1,j2,index,k,k1,k2;
+ float gradXXc, gradYYc, gradZZc;
+
+ #pragma omp parallel for shared(Output, Input, W_Lapl) private(i,j,i1,i2,j1,j2,k,k1,k2,index,gradXXc,gradYYc,gradZZc)
+ for(i=0; i<dimX; i++) {
+ /* symmetric boundary conditions */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ for(j=0; j<dimY; j++) {
+ /* symmetric boundary conditions */
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+
+ for(k=0; k<dimZ; k++) {
+ /* symmetric boundary conditions */
+ k1 = k+1; if (k1 == dimZ) k1 = k-1;
+ k2 = k-1; if (k2 < 0) k2 = k+1;
+
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ gradXXc = W_Lapl[(dimX*dimY)*k + j*dimX+i2] + W_Lapl[(dimX*dimY)*k + j*dimX+i1] - 2*W_Lapl[index];
+ gradYYc = W_Lapl[(dimX*dimY)*k + j2*dimX+i] + W_Lapl[(dimX*dimY)*k + j1*dimX+i] - 2*W_Lapl[index];
+ gradZZc = W_Lapl[(dimX*dimY)*k2 + j*dimX+i] + W_Lapl[(dimX*dimY)*k1 + j*dimX+i] - 2*W_Lapl[index];
+
+ Output[index] += tau*(-lambdaPar*(gradXXc + gradYYc + gradZZc) - (Output[index] - Input[index]));
+ }
+ }
+ }
+ return *Output;
+}
diff --git a/src/Core/regularisers_CPU/Diffus4th_order_core.h b/src/Core/regularisers_CPU/Diffus4th_order_core.h
new file mode 100644
index 0000000..d81afcb
--- /dev/null
+++ b/src/Core/regularisers_CPU/Diffus4th_order_core.h
@@ -0,0 +1,55 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+/* C-OMP implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case)
+ * The minimisation is performed using explicit scheme.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambda - regularization parameter
+ * 3. Edge-preserving parameter (sigma)
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended
+ * 5. tau - time-marching step for explicit scheme
+ *
+ * Output:
+ * [1] Regularized image/volume
+ *
+ * This function is based on the paper by
+ * [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float Diffus4th_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
+CCPI_EXPORT float Weighted_Laplc2D(float *W_Lapl, float *U0, float sigma, long dimX, long dimY);
+CCPI_EXPORT float Diffusion_update_step2D(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, long dimX, long dimY);
+CCPI_EXPORT float Weighted_Laplc3D(float *W_Lapl, float *U0, float sigma, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float Diffusion_update_step3D(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, long dimX, long dimY, long dimZ);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/Diffusion_core.c b/src/Core/regularisers_CPU/Diffusion_core.c
new file mode 100644
index 0000000..b765796
--- /dev/null
+++ b/src/Core/regularisers_CPU/Diffusion_core.c
@@ -0,0 +1,307 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Diffusion_core.h"
+#include "utils.h"
+
+#define EPS 1.0e-5
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+/*sign function*/
+int signNDFc(float x) {
+ return (x > 0) - (x < 0);
+}
+
+/* C-OMP implementation of linear and nonlinear diffusion with the regularisation model [1,2] (2D/3D case)
+ * The minimisation is performed using explicit scheme.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambda - regularization parameter
+ * 3. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended
+ * 5. tau - time-marching step for explicit scheme
+ * 6. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
+ *
+ * Output:
+ * [1] Regularized image/volume
+ *
+ * This function is based on the paper by
+ * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
+ * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
+ */
+
+float Diffusion_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int dimX, int dimY, int dimZ)
+{
+ int i;
+ float sigmaPar2;
+ sigmaPar2 = sigmaPar/sqrt(2.0f);
+
+ /* copy into output */
+ copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+ if (dimZ == 1) {
+ /* running 2D diffusion iterations */
+ for(i=0; i < iterationsNumb; i++) {
+ if (sigmaPar == 0.0f) LinearDiff2D(Input, Output, lambdaPar, tau, (long)(dimX), (long)(dimY)); /* linear diffusion (heat equation) */
+ else NonLinearDiff2D(Input, Output, lambdaPar, sigmaPar2, tau, penaltytype, (long)(dimX), (long)(dimY)); /* nonlinear diffusion */
+ }
+ }
+ else {
+ /* running 3D diffusion iterations */
+ for(i=0; i < iterationsNumb; i++) {
+ if (sigmaPar == 0.0f) LinearDiff3D(Input, Output, lambdaPar, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
+ else NonLinearDiff3D(Input, Output, lambdaPar, sigmaPar2, tau, penaltytype, (long)(dimX), (long)(dimY), (long)(dimZ));
+ }
+ }
+ return *Output;
+}
+
+
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+/* linear diffusion (heat equation) */
+float LinearDiff2D(float *Input, float *Output, float lambdaPar, float tau, long dimX, long dimY)
+{
+ long i,j,i1,i2,j1,j2,index;
+ float e,w,n,s,e1,w1,n1,s1;
+
+#pragma omp parallel for shared(Input) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1)
+ for(i=0; i<dimX; i++) {
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ for(j=0; j<dimY; j++) {
+ /* symmetric boundary conditions (Neuman) */
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+ index = j*dimX+i;
+
+ e = Output[j*dimX+i1];
+ w = Output[j*dimX+i2];
+ n = Output[j1*dimX+i];
+ s = Output[j2*dimX+i];
+
+ e1 = e - Output[index];
+ w1 = w - Output[index];
+ n1 = n - Output[index];
+ s1 = s - Output[index];
+
+ Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index]));
+ }}
+ return *Output;
+}
+
+/* nonlinear diffusion */
+float NonLinearDiff2D(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY)
+{
+ long i,j,i1,i2,j1,j2,index;
+ float e,w,n,s,e1,w1,n1,s1;
+
+#pragma omp parallel for shared(Input) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1)
+ for(i=0; i<dimX; i++) {
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ for(j=0; j<dimY; j++) {
+ /* symmetric boundary conditions (Neuman) */
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+ index = j*dimX+i;
+
+ e = Output[j*dimX+i1];
+ w = Output[j*dimX+i2];
+ n = Output[j1*dimX+i];
+ s = Output[j2*dimX+i];
+
+ e1 = e - Output[index];
+ w1 = w - Output[index];
+ n1 = n - Output[index];
+ s1 = s - Output[index];
+
+ if (penaltytype == 1){
+ /* Huber penalty */
+ if (fabs(e1) > sigmaPar) e1 = signNDFc(e1);
+ else e1 = e1/sigmaPar;
+
+ if (fabs(w1) > sigmaPar) w1 = signNDFc(w1);
+ else w1 = w1/sigmaPar;
+
+ if (fabs(n1) > sigmaPar) n1 = signNDFc(n1);
+ else n1 = n1/sigmaPar;
+
+ if (fabs(s1) > sigmaPar) s1 = signNDFc(s1);
+ else s1 = s1/sigmaPar;
+ }
+ else if (penaltytype == 2) {
+ /* Perona-Malik */
+ e1 = (e1)/(1.0f + powf((e1/sigmaPar),2));
+ w1 = (w1)/(1.0f + powf((w1/sigmaPar),2));
+ n1 = (n1)/(1.0f + powf((n1/sigmaPar),2));
+ s1 = (s1)/(1.0f + powf((s1/sigmaPar),2));
+ }
+ else if (penaltytype == 3) {
+ /* Tukey Biweight */
+ if (fabs(e1) <= sigmaPar) e1 = e1*powf((1.0f - powf((e1/sigmaPar),2)), 2);
+ else e1 = 0.0f;
+ if (fabs(w1) <= sigmaPar) w1 = w1*powf((1.0f - powf((w1/sigmaPar),2)), 2);
+ else w1 = 0.0f;
+ if (fabs(n1) <= sigmaPar) n1 = n1*powf((1.0f - powf((n1/sigmaPar),2)), 2);
+ else n1 = 0.0f;
+ if (fabs(s1) <= sigmaPar) s1 = s1*powf((1.0f - powf((s1/sigmaPar),2)), 2);
+ else s1 = 0.0f;
+ }
+ else {
+ printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
+ break;
+ }
+ Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index]));
+ }}
+ return *Output;
+}
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+/* linear diffusion (heat equation) */
+float LinearDiff3D(float *Input, float *Output, float lambdaPar, float tau, long dimX, long dimY, long dimZ)
+{
+ long i,j,k,i1,i2,j1,j2,k1,k2,index;
+ float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
+
+#pragma omp parallel for shared(Input) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1,k,k1,k2,u1,d1,u,d)
+for(k=0; k<dimZ; k++) {
+ k1 = k+1; if (k1 == dimZ) k1 = k-1;
+ k2 = k-1; if (k2 < 0) k2 = k+1;
+ for(i=0; i<dimX; i++) {
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ for(j=0; j<dimY; j++) {
+ /* symmetric boundary conditions (Neuman) */
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ e = Output[(dimX*dimY)*k + j*dimX+i1];
+ w = Output[(dimX*dimY)*k + j*dimX+i2];
+ n = Output[(dimX*dimY)*k + j1*dimX+i];
+ s = Output[(dimX*dimY)*k + j2*dimX+i];
+ u = Output[(dimX*dimY)*k1 + j*dimX+i];
+ d = Output[(dimX*dimY)*k2 + j*dimX+i];
+
+ e1 = e - Output[index];
+ w1 = w - Output[index];
+ n1 = n - Output[index];
+ s1 = s - Output[index];
+ u1 = u - Output[index];
+ d1 = d - Output[index];
+
+ Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index]));
+ }}}
+ return *Output;
+}
+
+float NonLinearDiff3D(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY, long dimZ)
+{
+ long i,j,k,i1,i2,j1,j2,k1,k2,index;
+ float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
+
+#pragma omp parallel for shared(Input) private(index,i,j,i1,i2,j1,j2,e,w,n,s,e1,w1,n1,s1,k,k1,k2,u1,d1,u,d)
+for(k=0; k<dimZ; k++) {
+ k1 = k+1; if (k1 == dimZ) k1 = k-1;
+ k2 = k-1; if (k2 < 0) k2 = k+1;
+ for(i=0; i<dimX; i++) {
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ for(j=0; j<dimY; j++) {
+ /* symmetric boundary conditions (Neuman) */
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ e = Output[(dimX*dimY)*k + j*dimX+i1];
+ w = Output[(dimX*dimY)*k + j*dimX+i2];
+ n = Output[(dimX*dimY)*k + j1*dimX+i];
+ s = Output[(dimX*dimY)*k + j2*dimX+i];
+ u = Output[(dimX*dimY)*k1 + j*dimX+i];
+ d = Output[(dimX*dimY)*k2 + j*dimX+i];
+
+ e1 = e - Output[index];
+ w1 = w - Output[index];
+ n1 = n - Output[index];
+ s1 = s - Output[index];
+ u1 = u - Output[index];
+ d1 = d - Output[index];
+
+ if (penaltytype == 1){
+ /* Huber penalty */
+ if (fabs(e1) > sigmaPar) e1 = signNDFc(e1);
+ else e1 = e1/sigmaPar;
+
+ if (fabs(w1) > sigmaPar) w1 = signNDFc(w1);
+ else w1 = w1/sigmaPar;
+
+ if (fabs(n1) > sigmaPar) n1 = signNDFc(n1);
+ else n1 = n1/sigmaPar;
+
+ if (fabs(s1) > sigmaPar) s1 = signNDFc(s1);
+ else s1 = s1/sigmaPar;
+
+ if (fabs(u1) > sigmaPar) u1 = signNDFc(u1);
+ else u1 = u1/sigmaPar;
+
+ if (fabs(d1) > sigmaPar) d1 = signNDFc(d1);
+ else d1 = d1/sigmaPar;
+ }
+ else if (penaltytype == 2) {
+ /* Perona-Malik */
+ e1 = (e1)/(1.0f + powf((e1/sigmaPar),2));
+ w1 = (w1)/(1.0f + powf((w1/sigmaPar),2));
+ n1 = (n1)/(1.0f + powf((n1/sigmaPar),2));
+ s1 = (s1)/(1.0f + powf((s1/sigmaPar),2));
+ u1 = (u1)/(1.0f + powf((u1/sigmaPar),2));
+ d1 = (d1)/(1.0f + powf((d1/sigmaPar),2));
+ }
+ else if (penaltytype == 3) {
+ /* Tukey Biweight */
+ if (fabs(e1) <= sigmaPar) e1 = e1*powf((1.0f - powf((e1/sigmaPar),2)), 2);
+ else e1 = 0.0f;
+ if (fabs(w1) <= sigmaPar) w1 = w1*powf((1.0f - powf((w1/sigmaPar),2)), 2);
+ else w1 = 0.0f;
+ if (fabs(n1) <= sigmaPar) n1 = n1*powf((1.0f - powf((n1/sigmaPar),2)), 2);
+ else n1 = 0.0f;
+ if (fabs(s1) <= sigmaPar) s1 = s1*powf((1.0f - powf((s1/sigmaPar),2)), 2);
+ else s1 = 0.0f;
+ if (fabs(u1) <= sigmaPar) u1 = u1*powf((1.0f - powf((u1/sigmaPar),2)), 2);
+ else u1 = 0.0f;
+ if (fabs(d1) <= sigmaPar) d1 = d1*powf((1.0f - powf((d1/sigmaPar),2)), 2);
+ else d1 = 0.0f;
+ }
+ else {
+ printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
+ break;
+ }
+
+ Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index]));
+ }}}
+ return *Output;
+}
diff --git a/src/Core/regularisers_CPU/Diffusion_core.h b/src/Core/regularisers_CPU/Diffusion_core.h
new file mode 100644
index 0000000..cc36dad
--- /dev/null
+++ b/src/Core/regularisers_CPU/Diffusion_core.h
@@ -0,0 +1,59 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+
+/* C-OMP implementation of linear and nonlinear diffusion with the regularisation model [1,2] (2D/3D case)
+ * The minimisation is performed using explicit scheme.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambda - regularization parameter
+ * 3. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended
+ * 5. tau - time-marching step for explicit scheme
+ * 6. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
+ *
+ * Output:
+ * [1] Regularized image/volume
+ *
+ * This function is based on the paper by
+ * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
+ * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
+ */
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float Diffusion_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int dimX, int dimY, int dimZ);
+CCPI_EXPORT float LinearDiff2D(float *Input, float *Output, float lambdaPar, float tau, long dimX, long dimY);
+CCPI_EXPORT float NonLinearDiff2D(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY);
+CCPI_EXPORT float LinearDiff3D(float *Input, float *Output, float lambdaPar, float tau, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float NonLinearDiff3D(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, long dimX, long dimY, long dimZ);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/FGP_TV_core.c b/src/Core/regularisers_CPU/FGP_TV_core.c
new file mode 100644
index 0000000..68d58b7
--- /dev/null
+++ b/src/Core/regularisers_CPU/FGP_TV_core.c
@@ -0,0 +1,321 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "FGP_TV_core.h"
+
+/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambdaPar - regularization parameter
+ * 3. Number of iterations
+ * 4. eplsilon: tolerance constant
+ * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
+ * 6. nonneg: 'nonnegativity (0 is OFF by default)
+ * 7. print information: 0 (off) or 1 (on)
+ *
+ * Output:
+ * [1] Filtered/regularized image
+ *
+ * This function is based on the Matlab's code and paper by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ */
+
+float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+{
+ int ll;
+ long j, DimTotal;
+ float re, re1;
+ float tk = 1.0f;
+ float tkp1=1.0f;
+ int count = 0;
+
+ if (dimZ <= 1) {
+ /*2D case */
+ float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL;
+ DimTotal = (long)(dimX*dimY);
+
+ Output_prev = calloc(DimTotal, sizeof(float));
+ P1 = calloc(DimTotal, sizeof(float));
+ P2 = calloc(DimTotal, sizeof(float));
+ P1_prev = calloc(DimTotal, sizeof(float));
+ P2_prev = calloc(DimTotal, sizeof(float));
+ R1 = calloc(DimTotal, sizeof(float));
+ R2 = calloc(DimTotal, sizeof(float));
+
+ /* begin iterations */
+ for(ll=0; ll<iterationsNumb; ll++) {
+
+ /* computing the gradient of the objective function */
+ Obj_func2D(Input, Output, R1, R2, lambdaPar, (long)(dimX), (long)(dimY));
+
+ /* apply nonnegativity */
+ if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}
+
+ /*Taking a step towards minus of the gradient*/
+ Grad_func2D(P1, P2, Output, R1, R2, lambdaPar, (long)(dimX), (long)(dimY));
+
+ /* projection step */
+ Proj_func2D(P1, P2, methodTV, DimTotal);
+
+ /*updating R and t*/
+ tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+ Rupd_func2D(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, DimTotal);
+
+ /* check early stopping criteria */
+ re = 0.0f; re1 = 0.0f;
+ for(j=0; j<DimTotal; j++)
+ {
+ re += pow(Output[j] - Output_prev[j],2);
+ re1 += pow(Output[j],2);
+ }
+ re = sqrt(re)/sqrt(re1);
+ if (re < epsil) count++;
+ if (count > 4) break;
+
+ /*storing old values*/
+ copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
+ copyIm(P1, P1_prev, (long)(dimX), (long)(dimY), 1l);
+ copyIm(P2, P2_prev, (long)(dimX), (long)(dimY), 1l);
+ tk = tkp1;
+ }
+ if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", ll);
+ free(Output_prev); free(P1); free(P2); free(P1_prev); free(P2_prev); free(R1); free(R2);
+ }
+ else {
+ /*3D case*/
+ float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL;
+ DimTotal = (long)(dimX*dimY*dimZ);
+
+ Output_prev = calloc(DimTotal, sizeof(float));
+ P1 = calloc(DimTotal, sizeof(float));
+ P2 = calloc(DimTotal, sizeof(float));
+ P3 = calloc(DimTotal, sizeof(float));
+ P1_prev = calloc(DimTotal, sizeof(float));
+ P2_prev = calloc(DimTotal, sizeof(float));
+ P3_prev = calloc(DimTotal, sizeof(float));
+ R1 = calloc(DimTotal, sizeof(float));
+ R2 = calloc(DimTotal, sizeof(float));
+ R3 = calloc(DimTotal, sizeof(float));
+
+ /* begin iterations */
+ for(ll=0; ll<iterationsNumb; ll++) {
+
+ /* computing the gradient of the objective function */
+ Obj_func3D(Input, Output, R1, R2, R3, lambdaPar, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+ /* apply nonnegativity */
+ if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}
+
+ /*Taking a step towards minus of the gradient*/
+ Grad_func3D(P1, P2, P3, Output, R1, R2, R3, lambdaPar, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+ /* projection step */
+ Proj_func3D(P1, P2, P3, methodTV, DimTotal);
+
+ /*updating R and t*/
+ tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+ Rupd_func3D(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, DimTotal);
+
+ /* calculate norm - stopping rules*/
+ re = 0.0f; re1 = 0.0f;
+ for(j=0; j<DimTotal; j++)
+ {
+ re += pow(Output[j] - Output_prev[j],2);
+ re1 += pow(Output[j],2);
+ }
+ re = sqrt(re)/sqrt(re1);
+ /* stop if the norm residual is less than the tolerance EPS */
+ if (re < epsil) count++;
+ if (count > 4) break;
+
+ /*storing old values*/
+ copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+ copyIm(P1, P1_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+ copyIm(P2, P2_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+ copyIm(P3, P3_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+ tk = tkp1;
+ }
+ if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", ll);
+ free(Output_prev); free(P1); free(P2); free(P3); free(P1_prev); free(P2_prev); free(P3_prev); free(R1); free(R2); free(R3);
+ }
+ return *Output;
+}
+
+float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, long dimX, long dimY)
+{
+ float val1, val2;
+ long i,j,index;
+#pragma omp parallel for shared(A,D,R1,R2) private(index,i,j,val1,val2)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ index = j*dimX+i;
+ /* boundary conditions */
+ if (i == 0) {val1 = 0.0f;} else {val1 = R1[j*dimX + (i-1)];}
+ if (j == 0) {val2 = 0.0f;} else {val2 = R2[(j-1)*dimX + i];}
+ D[index] = A[index] - lambda*(R1[index] + R2[index] - val1 - val2);
+ }}
+ return *D;
+}
+float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, long dimX, long dimY)
+{
+ float val1, val2, multip;
+ long i,j,index;
+ multip = (1.0f/(8.0f*lambda));
+#pragma omp parallel for shared(P1,P2,D,R1,R2,multip) private(index,i,j,val1,val2)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ index = j*dimX+i;
+ /* boundary conditions */
+ if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[j*dimX + (i+1)];
+ if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(j+1)*dimX + i];
+ P1[index] = R1[index] + multip*val1;
+ P2[index] = R2[index] + multip*val2;
+ }}
+ return 1;
+}
+float Proj_func2D(float *P1, float *P2, int methTV, long DimTotal)
+{
+ float val1, val2, denom, sq_denom;
+ long i;
+ if (methTV == 0) {
+ /* isotropic TV*/
+#pragma omp parallel for shared(P1,P2) private(i,denom,sq_denom)
+ for(i=0; i<DimTotal; i++) {
+ denom = powf(P1[i],2) + powf(P2[i],2);
+ if (denom > 1.0f) {
+ sq_denom = 1.0f/sqrtf(denom);
+ P1[i] = P1[i]*sq_denom;
+ P2[i] = P2[i]*sq_denom;
+ }
+ }
+ }
+ else {
+ /* anisotropic TV*/
+#pragma omp parallel for shared(P1,P2) private(i,val1,val2)
+ for(i=0; i<DimTotal; i++) {
+ val1 = fabs(P1[i]);
+ val2 = fabs(P2[i]);
+ if (val1 < 1.0f) {val1 = 1.0f;}
+ if (val2 < 1.0f) {val2 = 1.0f;}
+ P1[i] = P1[i]/val1;
+ P2[i] = P2[i]/val2;
+ }
+ }
+ return 1;
+}
+float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, long DimTotal)
+{
+ long i;
+ float multip;
+ multip = ((tk-1.0f)/tkp1);
+#pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i)
+ for(i=0; i<DimTotal; i++) {
+ R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
+ R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
+ }
+ return 1;
+}
+
+/* 3D-case related Functions */
+/*****************************************************************/
+float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, long dimX, long dimY, long dimZ)
+{
+ float val1, val2, val3;
+ long i,j,k,index;
+#pragma omp parallel for shared(A,D,R1,R2,R3) private(index,i,j,k,val1,val2,val3)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* boundary conditions */
+ if (i == 0) {val1 = 0.0f;} else {val1 = R1[(dimX*dimY)*k + j*dimX + (i-1)];}
+ if (j == 0) {val2 = 0.0f;} else {val2 = R2[(dimX*dimY)*k + (j-1)*dimX + i];}
+ if (k == 0) {val3 = 0.0f;} else {val3 = R3[(dimX*dimY)*(k-1) + j*dimX + i];}
+ D[index] = A[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3);
+ }}}
+ return *D;
+}
+float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, long dimX, long dimY, long dimZ)
+{
+ float val1, val2, val3, multip;
+ long i,j,k, index;
+ multip = (1.0f/(26.0f*lambda));
+#pragma omp parallel for shared(P1,P2,P3,D,R1,R2,R3,multip) private(index,i,j,k,val1,val2,val3)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* boundary conditions */
+ if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[(dimX*dimY)*k + j*dimX + (i+1)];
+ if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(dimX*dimY)*k + (j+1)*dimX + i];
+ if (k == dimZ-1) val3 = 0.0f; else val3 = D[index] - D[(dimX*dimY)*(k+1) + j*dimX + i];
+ P1[index] = R1[index] + multip*val1;
+ P2[index] = R2[index] + multip*val2;
+ P3[index] = R3[index] + multip*val3;
+ }}}
+ return 1;
+}
+float Proj_func3D(float *P1, float *P2, float *P3, int methTV, long DimTotal)
+{
+ float val1, val2, val3, denom, sq_denom;
+ long i;
+ if (methTV == 0) {
+ /* isotropic TV*/
+ #pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3,sq_denom)
+ for(i=0; i<DimTotal; i++) {
+ denom = powf(P1[i],2) + powf(P2[i],2) + powf(P3[i],2);
+ if (denom > 1.0f) {
+ sq_denom = 1.0f/sqrtf(denom);
+ P1[i] = P1[i]*sq_denom;
+ P2[i] = P2[i]*sq_denom;
+ P3[i] = P3[i]*sq_denom;
+ }
+ }
+ }
+ else {
+ /* anisotropic TV*/
+#pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3)
+ for(i=0; i<DimTotal; i++) {
+ val1 = fabs(P1[i]);
+ val2 = fabs(P2[i]);
+ val3 = fabs(P3[i]);
+ if (val1 < 1.0f) {val1 = 1.0f;}
+ if (val2 < 1.0f) {val2 = 1.0f;}
+ if (val3 < 1.0f) {val3 = 1.0f;}
+ P1[i] = P1[i]/val1;
+ P2[i] = P2[i]/val2;
+ P3[i] = P3[i]/val3;
+ }
+ }
+ return 1;
+}
+float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, long DimTotal)
+{
+ long i;
+ float multip;
+ multip = ((tk-1.0f)/tkp1);
+#pragma omp parallel for shared(P1,P2,P3,P1_old,P2_old,P3_old,R1,R2,R3,multip) private(i)
+ for(i=0; i<DimTotal; i++) {
+ R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
+ R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
+ R3[i] = P3[i] + multip*(P3[i] - P3_old[i]);
+ }
+ return 1;
+}
diff --git a/src/Core/regularisers_CPU/FGP_TV_core.h b/src/Core/regularisers_CPU/FGP_TV_core.h
new file mode 100644
index 0000000..3418604
--- /dev/null
+++ b/src/Core/regularisers_CPU/FGP_TV_core.h
@@ -0,0 +1,63 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+//#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambda - regularization parameter
+ * 3. Number of iterations
+ * 4. eplsilon: tolerance constant
+ * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
+ * 6. nonneg: 'nonnegativity (0 is OFF by default)
+ * 7. print information: 0 (off) or 1 (on)
+ *
+ * Output:
+ * [1] Filtered/regularized image
+ *
+ * This function is based on the Matlab's code and paper by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+
+CCPI_EXPORT float Obj_func2D(float *A, float *D, float *R1, float *R2, float lambda, long dimX, long dimY);
+CCPI_EXPORT float Grad_func2D(float *P1, float *P2, float *D, float *R1, float *R2, float lambda, long dimX, long dimY);
+CCPI_EXPORT float Proj_func2D(float *P1, float *P2, int methTV, long DimTotal);
+CCPI_EXPORT float Rupd_func2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, long DimTotal);
+
+CCPI_EXPORT float Obj_func3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float Grad_func3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float lambda, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float Proj_func3D(float *P1, float *P2, float *P3, int methTV, long DimTotal);
+CCPI_EXPORT float Rupd_func3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, long DimTotal);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/FGP_dTV_core.c b/src/Core/regularisers_CPU/FGP_dTV_core.c
new file mode 100644
index 0000000..17b75ff
--- /dev/null
+++ b/src/Core/regularisers_CPU/FGP_dTV_core.c
@@ -0,0 +1,441 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "FGP_dTV_core.h"
+
+/* C-OMP implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
+ * which employs structural similarity of the level sets of two images/volumes, see [1,2]
+ * The current implementation updates image 1 while image 2 is being fixed.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
+ * 3. lambdaPar - regularization parameter [REQUIRED]
+ * 4. Number of iterations [OPTIONAL]
+ * 5. eplsilon: tolerance constant [OPTIONAL]
+ * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] *
+ * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
+ * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
+ * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
+ *
+ * Output:
+ * [1] Filtered/regularized image/volume
+ *
+ * This function is based on the Matlab's codes and papers by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
+ */
+
+float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+{
+ int ll;
+ long j, DimTotal;
+ float re, re1;
+ float tk = 1.0f;
+ float tkp1=1.0f;
+ int count = 0;
+
+ if (dimZ <= 1) {
+ /*2D case */
+ float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL, *InputRef_x=NULL, *InputRef_y=NULL;
+ DimTotal = (long)(dimX*dimY);
+
+ Output_prev = calloc(DimTotal, sizeof(float));
+ P1 = calloc(DimTotal, sizeof(float));
+ P2 = calloc(DimTotal, sizeof(float));
+ P1_prev = calloc(DimTotal, sizeof(float));
+ P2_prev = calloc(DimTotal, sizeof(float));
+ R1 = calloc(DimTotal, sizeof(float));
+ R2 = calloc(DimTotal, sizeof(float));
+ InputRef_x = calloc(DimTotal, sizeof(float));
+ InputRef_y = calloc(DimTotal, sizeof(float));
+
+ /* calculate gradient field (smoothed) for the reference image */
+ GradNorm_func2D(InputRef, InputRef_x, InputRef_y, eta, (long)(dimX), (long)(dimY));
+
+ /* begin iterations */
+ for(ll=0; ll<iterationsNumb; ll++) {
+
+ /*projects a 2D vector field R-1,2 onto the orthogonal complement of another 2D vector field InputRef_xy*/
+ ProjectVect_func2D(R1, R2, InputRef_x, InputRef_y, (long)(dimX), (long)(dimY));
+
+ /* computing the gradient of the objective function */
+ Obj_dfunc2D(Input, Output, R1, R2, lambdaPar, (long)(dimX), (long)(dimY));
+
+ /* apply nonnegativity */
+ if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}
+
+ /*Taking a step towards minus of the gradient*/
+ Grad_dfunc2D(P1, P2, Output, R1, R2, InputRef_x, InputRef_y, lambdaPar, (long)(dimX), (long)(dimY));
+
+ /* projection step */
+ Proj_dfunc2D(P1, P2, methodTV, DimTotal);
+
+ /*updating R and t*/
+ tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+ Rupd_dfunc2D(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, DimTotal);
+
+ /* check early stopping criteria */
+ re = 0.0f; re1 = 0.0f;
+ for(j=0; j<DimTotal; j++)
+ {
+ re += pow(Output[j] - Output_prev[j],2);
+ re1 += pow(Output[j],2);
+ }
+ re = sqrt(re)/sqrt(re1);
+ if (re < epsil) count++;
+ if (count > 4) break;
+
+ /*storing old values*/
+ copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
+ copyIm(P1, P1_prev, (long)(dimX), (long)(dimY), 1l);
+ copyIm(P2, P2_prev, (long)(dimX), (long)(dimY), 1l);
+ tk = tkp1;
+ }
+ if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", ll);
+ free(Output_prev); free(P1); free(P2); free(P1_prev); free(P2_prev); free(R1); free(R2); free(InputRef_x); free(InputRef_y);
+ }
+ else {
+ /*3D case*/
+ float *Output_prev=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL, *InputRef_x=NULL, *InputRef_y=NULL, *InputRef_z=NULL;
+ DimTotal = (long)(dimX*dimY*dimZ);
+
+ Output_prev = calloc(DimTotal, sizeof(float));
+ P1 = calloc(DimTotal, sizeof(float));
+ P2 = calloc(DimTotal, sizeof(float));
+ P3 = calloc(DimTotal, sizeof(float));
+ P1_prev = calloc(DimTotal, sizeof(float));
+ P2_prev = calloc(DimTotal, sizeof(float));
+ P3_prev = calloc(DimTotal, sizeof(float));
+ R1 = calloc(DimTotal, sizeof(float));
+ R2 = calloc(DimTotal, sizeof(float));
+ R3 = calloc(DimTotal, sizeof(float));
+ InputRef_x = calloc(DimTotal, sizeof(float));
+ InputRef_y = calloc(DimTotal, sizeof(float));
+ InputRef_z = calloc(DimTotal, sizeof(float));
+
+ /* calculate gradient field (smoothed) for the reference volume */
+ GradNorm_func3D(InputRef, InputRef_x, InputRef_y, InputRef_z, eta, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+ /* begin iterations */
+ for(ll=0; ll<iterationsNumb; ll++) {
+
+ /*projects a 3D vector field R-1,2,3 onto the orthogonal complement of another 3D vector field InputRef_xyz*/
+ ProjectVect_func3D(R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+ /* computing the gradient of the objective function */
+ Obj_dfunc3D(Input, Output, R1, R2, R3, lambdaPar, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+ /* apply nonnegativity */
+ if (nonneg == 1) for(j=0; j<DimTotal; j++) {if (Output[j] < 0.0f) Output[j] = 0.0f;}
+
+ /*Taking a step towards minus of the gradient*/
+ Grad_dfunc3D(P1, P2, P3, Output, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, lambdaPar, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+ /* projection step */
+ Proj_dfunc3D(P1, P2, P3, methodTV, DimTotal);
+
+ /*updating R and t*/
+ tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+ Rupd_dfunc3D(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, DimTotal);
+
+ /* calculate norm - stopping rules*/
+ re = 0.0f; re1 = 0.0f;
+ for(j=0; j<DimTotal; j++)
+ {
+ re += pow(Output[j] - Output_prev[j],2);
+ re1 += pow(Output[j],2);
+ }
+ re = sqrt(re)/sqrt(re1);
+ /* stop if the norm residual is less than the tolerance EPS */
+ if (re < epsil) count++;
+ if (count > 4) break;
+
+ /*storing old values*/
+ copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+ copyIm(P1, P1_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+ copyIm(P2, P2_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+ copyIm(P3, P3_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+ tk = tkp1;
+ }
+ if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", ll);
+ free(Output_prev); free(P1); free(P2); free(P3); free(P1_prev); free(P2_prev); free(P3_prev); free(R1); free(R2); free(R3); free(InputRef_x); free(InputRef_y); free(InputRef_z);
+ }
+ return *Output;
+}
+
+
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+
+float GradNorm_func2D(float *B, float *B_x, float *B_y, float eta, long dimX, long dimY)
+{
+ long i,j,index;
+ float val1, val2, gradX, gradY, magn;
+#pragma omp parallel for shared(B, B_x, B_y) private(i,j,index,val1,val2,gradX,gradY,magn)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ index = j*dimX+i;
+ /* zero boundary conditions */
+ if (i == dimX-1) {val1 = 0.0f;} else {val1 = B[j*dimX + (i+1)];}
+ if (j == dimY-1) {val2 = 0.0f;} else {val2 = B[(j+1)*dimX + i];}
+ gradX = val1 - B[index];
+ gradY = val2 - B[index];
+ magn = pow(gradX,2) + pow(gradY,2);
+ magn = sqrt(magn + pow(eta,2)); /* the eta-smoothed gradients magnitude */
+ B_x[index] = gradX/magn;
+ B_y[index] = gradY/magn;
+ }}
+ return 1;
+}
+
+float ProjectVect_func2D(float *R1, float *R2, float *B_x, float *B_y, long dimX, long dimY)
+{
+ long i,j,index;
+ float in_prod;
+#pragma omp parallel for shared(R1, R2, B_x, B_y) private(index,i,j,in_prod)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ index = j*dimX+i;
+ in_prod = R1[index]*B_x[index] + R2[index]*B_y[index]; /* calculate inner product */
+ R1[index] = R1[index] - in_prod*B_x[index];
+ R2[index] = R2[index] - in_prod*B_y[index];
+ }}
+ return 1;
+}
+
+float Obj_dfunc2D(float *A, float *D, float *R1, float *R2, float lambda, long dimX, long dimY)
+{
+ float val1, val2;
+ long i,j,index;
+#pragma omp parallel for shared(A,D,R1,R2) private(index,i,j,val1,val2)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ index = j*dimX+i;
+ /* boundary conditions */
+ if (i == 0) {val1 = 0.0f;} else {val1 = R1[j*dimX + (i-1)];}
+ if (j == 0) {val2 = 0.0f;} else {val2 = R2[(j-1)*dimX + i];}
+ D[index] = A[index] - lambda*(R1[index] + R2[index] - val1 - val2);
+ }}
+ return *D;
+}
+float Grad_dfunc2D(float *P1, float *P2, float *D, float *R1, float *R2, float *B_x, float *B_y, float lambda, long dimX, long dimY)
+{
+ float val1, val2, multip, in_prod;
+ long i,j,index;
+ multip = (1.0f/(8.0f*lambda));
+#pragma omp parallel for shared(P1,P2,D,R1,R2,B_x,B_y,multip) private(i,j,index,val1,val2,in_prod)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ index = j*dimX+i;
+ /* boundary conditions */
+ if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[j*dimX + (i+1)];
+ if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(j+1)*dimX + i];
+
+ in_prod = val1*B_x[index] + val2*B_y[index]; /* calculate inner product */
+ val1 = val1 - in_prod*B_x[index];
+ val2 = val2 - in_prod*B_y[index];
+
+ P1[index] = R1[index] + multip*val1;
+ P2[index] = R2[index] + multip*val2;
+
+ }}
+ return 1;
+}
+float Proj_dfunc2D(float *P1, float *P2, int methTV, long DimTotal)
+{
+ float val1, val2, denom, sq_denom;
+ long i;
+ if (methTV == 0) {
+ /* isotropic TV*/
+#pragma omp parallel for shared(P1,P2) private(i,denom,sq_denom)
+ for(i=0; i<DimTotal; i++) {
+ denom = powf(P1[i],2) + powf(P2[i],2);
+ if (denom > 1.0f) {
+ sq_denom = 1.0f/sqrtf(denom);
+ P1[i] = P1[i]*sq_denom;
+ P2[i] = P2[i]*sq_denom;
+ }
+ }
+ }
+ else {
+ /* anisotropic TV*/
+#pragma omp parallel for shared(P1,P2) private(i,val1,val2)
+ for(i=0; i<DimTotal; i++) {
+ val1 = fabs(P1[i]);
+ val2 = fabs(P2[i]);
+ if (val1 < 1.0f) {val1 = 1.0f;}
+ if (val2 < 1.0f) {val2 = 1.0f;}
+ P1[i] = P1[i]/val1;
+ P2[i] = P2[i]/val2;
+ }
+ }
+ return 1;
+}
+float Rupd_dfunc2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, long DimTotal)
+{
+ long i;
+ float multip;
+ multip = ((tk-1.0f)/tkp1);
+#pragma omp parallel for shared(P1,P2,P1_old,P2_old,R1,R2,multip) private(i)
+ for(i=0; i<DimTotal; i++) {
+ R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
+ R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
+ }
+ return 1;
+}
+
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+float GradNorm_func3D(float *B, float *B_x, float *B_y, float *B_z, float eta, long dimX, long dimY, long dimZ)
+{
+ long i, j, k, index;
+ float val1, val2, val3, gradX, gradY, gradZ, magn;
+#pragma omp parallel for shared(B, B_x, B_y, B_z) private(i,j,k,index,val1,val2,val3,gradX,gradY,gradZ,magn)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ /* zero boundary conditions */
+ if (i == dimX-1) {val1 = 0.0f;} else {val1 = B[(dimX*dimY)*k + j*dimX+(i+1)];}
+ if (j == dimY-1) {val2 = 0.0f;} else {val2 = B[(dimX*dimY)*k + (j+1)*dimX+i];}
+ if (k == dimZ-1) {val3 = 0.0f;} else {val3 = B[(dimX*dimY)*(k+1) + (j)*dimX+i];}
+
+ gradX = val1 - B[index];
+ gradY = val2 - B[index];
+ gradZ = val3 - B[index];
+ magn = pow(gradX,2) + pow(gradY,2) + pow(gradZ,2);
+ magn = sqrt(magn + pow(eta,2)); /* the eta-smoothed gradients magnitude */
+ B_x[index] = gradX/magn;
+ B_y[index] = gradY/magn;
+ B_z[index] = gradZ/magn;
+ }}}
+ return 1;
+}
+
+float ProjectVect_func3D(float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, long dimX, long dimY, long dimZ)
+{
+ long i,j,k,index;
+ float in_prod;
+#pragma omp parallel for shared(R1, R2, R3, B_x, B_y, B_z) private(index,i,j,k,in_prod)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ in_prod = R1[index]*B_x[index] + R2[index]*B_y[index] + R3[index]*B_z[index]; /* calculate inner product */
+ R1[index] = R1[index] - in_prod*B_x[index];
+ R2[index] = R2[index] - in_prod*B_y[index];
+ R3[index] = R3[index] - in_prod*B_z[index];
+ }}}
+ return 1;
+}
+
+float Obj_dfunc3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, long dimX, long dimY, long dimZ)
+{
+ float val1, val2, val3;
+ long i,j,k,index;
+#pragma omp parallel for shared(A,D,R1,R2,R3) private(index,i,j,k,val1,val2,val3)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* boundary conditions */
+ if (i == 0) {val1 = 0.0f;} else {val1 = R1[(dimX*dimY)*k + j*dimX + (i-1)];}
+ if (j == 0) {val2 = 0.0f;} else {val2 = R2[(dimX*dimY)*k + (j-1)*dimX + i];}
+ if (k == 0) {val3 = 0.0f;} else {val3 = R3[(dimX*dimY)*(k-1) + j*dimX + i];}
+ D[index] = A[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3);
+ }}}
+ return *D;
+}
+float Grad_dfunc3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, float lambda, long dimX, long dimY, long dimZ)
+{
+ float val1, val2, val3, multip, in_prod;
+ long i,j,k, index;
+ multip = (1.0f/(26.0f*lambda));
+#pragma omp parallel for shared(P1,P2,P3,D,R1,R2,R3,multip) private(index,i,j,k,val1,val2,val3,in_prod)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* boundary conditions */
+ if (i == dimX-1) val1 = 0.0f; else val1 = D[index] - D[(dimX*dimY)*k + j*dimX + (i+1)];
+ if (j == dimY-1) val2 = 0.0f; else val2 = D[index] - D[(dimX*dimY)*k + (j+1)*dimX + i];
+ if (k == dimZ-1) val3 = 0.0f; else val3 = D[index] - D[(dimX*dimY)*(k+1) + j*dimX + i];
+
+ in_prod = val1*B_x[index] + val2*B_y[index] + val3*B_z[index]; /* calculate inner product */
+ val1 = val1 - in_prod*B_x[index];
+ val2 = val2 - in_prod*B_y[index];
+ val3 = val3 - in_prod*B_z[index];
+
+ P1[index] = R1[index] + multip*val1;
+ P2[index] = R2[index] + multip*val2;
+ P3[index] = R3[index] + multip*val3;
+ }}}
+ return 1;
+}
+float Proj_dfunc3D(float *P1, float *P2, float *P3, int methTV, long DimTotal)
+{
+ float val1, val2, val3, denom, sq_denom;
+ long i;
+ if (methTV == 0) {
+ /* isotropic TV*/
+ #pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3,sq_denom)
+ for(i=0; i<DimTotal; i++) {
+ denom = powf(P1[i],2) + powf(P2[i],2) + powf(P3[i],2);
+ if (denom > 1.0f) {
+ sq_denom = 1.0f/sqrtf(denom);
+ P1[i] = P1[i]*sq_denom;
+ P2[i] = P2[i]*sq_denom;
+ P3[i] = P3[i]*sq_denom;
+ }
+ }
+ }
+ else {
+ /* anisotropic TV*/
+#pragma omp parallel for shared(P1,P2,P3) private(i,val1,val2,val3)
+ for(i=0; i<DimTotal; i++) {
+ val1 = fabs(P1[i]);
+ val2 = fabs(P2[i]);
+ val3 = fabs(P3[i]);
+ if (val1 < 1.0f) {val1 = 1.0f;}
+ if (val2 < 1.0f) {val2 = 1.0f;}
+ if (val3 < 1.0f) {val3 = 1.0f;}
+ P1[i] = P1[i]/val1;
+ P2[i] = P2[i]/val2;
+ P3[i] = P3[i]/val3;
+ }
+ }
+ return 1;
+}
+float Rupd_dfunc3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, long DimTotal)
+{
+ long i;
+ float multip;
+ multip = ((tk-1.0f)/tkp1);
+#pragma omp parallel for shared(P1,P2,P3,P1_old,P2_old,P3_old,R1,R2,R3,multip) private(i)
+ for(i=0; i<DimTotal; i++) {
+ R1[i] = P1[i] + multip*(P1[i] - P1_old[i]);
+ R2[i] = P2[i] + multip*(P2[i] - P2_old[i]);
+ R3[i] = P3[i] + multip*(P3[i] - P3_old[i]);
+ }
+ return 1;
+}
diff --git a/src/Core/regularisers_CPU/FGP_dTV_core.h b/src/Core/regularisers_CPU/FGP_dTV_core.h
new file mode 100644
index 0000000..442dd30
--- /dev/null
+++ b/src/Core/regularisers_CPU/FGP_dTV_core.h
@@ -0,0 +1,72 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+//#include <matrix.h>
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+/* C-OMP implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
+ * which employs structural similarity of the level sets of two images/volumes, see [1,2]
+ * The current implementation updates image 1 while image 2 is being fixed.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
+ * 3. lambdaPar - regularization parameter [REQUIRED]
+ * 4. Number of iterations [OPTIONAL]
+ * 5. eplsilon: tolerance constant [OPTIONAL]
+ * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] *
+ * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
+ * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
+ * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
+ *
+ * Output:
+ * [1] Filtered/regularized image/volume
+ *
+ * This function is based on the Matlab's codes and papers by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+
+CCPI_EXPORT float GradNorm_func2D(float *B, float *B_x, float *B_y, float eta, long dimX, long dimY);
+CCPI_EXPORT float ProjectVect_func2D(float *R1, float *R2, float *B_x, float *B_y, long dimX, long dimY);
+CCPI_EXPORT float Obj_dfunc2D(float *A, float *D, float *R1, float *R2, float lambda, long dimX, long dimY);
+CCPI_EXPORT float Grad_dfunc2D(float *P1, float *P2, float *D, float *R1, float *R2, float *B_x, float *B_y, float lambda, long dimX, long dimY);
+CCPI_EXPORT float Proj_dfunc2D(float *P1, float *P2, int methTV, long DimTotal);
+CCPI_EXPORT float Rupd_dfunc2D(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, long DimTotal);
+
+CCPI_EXPORT float GradNorm_func3D(float *B, float *B_x, float *B_y, float *B_z, float eta, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float ProjectVect_func3D(float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float Obj_dfunc3D(float *A, float *D, float *R1, float *R2, float *R3, float lambda, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float Grad_dfunc3D(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *B_x, float *B_y, float *B_z, float lambda, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float Proj_dfunc3D(float *P1, float *P2, float *P3, int methTV, long DimTotal);
+CCPI_EXPORT float Rupd_dfunc3D(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, long DimTotal);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/LLT_ROF_core.c b/src/Core/regularisers_CPU/LLT_ROF_core.c
new file mode 100644
index 0000000..8416a14
--- /dev/null
+++ b/src/Core/regularisers_CPU/LLT_ROF_core.c
@@ -0,0 +1,410 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "LLT_ROF_core.h"
+#define EPS_LLT 0.01
+#define EPS_ROF 1.0e-12
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+/*sign function*/
+int signLLT(float x) {
+ return (x > 0) - (x < 0);
+}
+
+/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.
+ *
+* This penalty can deliver visually pleasant piecewise-smooth recovery if regularisation parameters are selected well.
+* The rule of thumb for selection is to start with lambdaLLT = 0 (just the ROF-TV model) and then proceed to increase
+* lambdaLLT starting with smaller values.
+*
+* Input Parameters:
+* 1. U0 - original noise image/volume
+* 2. lambdaROF - ROF-related regularisation parameter
+* 3. lambdaLLT - LLT-related regularisation parameter
+* 4. tau - time-marching step
+* 5. iter - iterations number (for both models)
+*
+* Output:
+* Filtered/regularised image
+*
+* References:
+* [1] Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590.
+* [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+*/
+
+float LLT_ROF_CPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int dimX, int dimY, int dimZ)
+{
+ long DimTotal;
+ int ll;
+ float *D1_LLT=NULL, *D2_LLT=NULL, *D3_LLT=NULL, *D1_ROF=NULL, *D2_ROF=NULL, *D3_ROF=NULL;
+
+ DimTotal = (long)(dimX*dimY*dimZ);
+
+ D1_ROF = calloc(DimTotal, sizeof(float));
+ D2_ROF = calloc(DimTotal, sizeof(float));
+ D3_ROF = calloc(DimTotal, sizeof(float));
+
+ D1_LLT = calloc(DimTotal, sizeof(float));
+ D2_LLT = calloc(DimTotal, sizeof(float));
+ D3_LLT = calloc(DimTotal, sizeof(float));
+
+ copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ)); /* initialize */
+
+ for(ll = 0; ll < iterationsNumb; ll++) {
+ if (dimZ == 1) {
+ /* 2D case */
+ /****************ROF******************/
+ /* calculate first-order differences */
+ D1_func_ROF(Output, D1_ROF, (long)(dimX), (long)(dimY), 1l);
+ D2_func_ROF(Output, D2_ROF, (long)(dimX), (long)(dimY), 1l);
+ /****************LLT******************/
+ /* estimate second-order derrivatives */
+ der2D_LLT(Output, D1_LLT, D2_LLT, (long)(dimX), (long)(dimY), 1l);
+ /* Joint update for ROF and LLT models */
+ Update2D_LLT_ROF(Input, Output, D1_LLT, D2_LLT, D1_ROF, D2_ROF, lambdaROF, lambdaLLT, tau, (long)(dimX), (long)(dimY), 1l);
+ }
+ else {
+ /* 3D case */
+ /* calculate first-order differences */
+ D1_func_ROF(Output, D1_ROF, (long)(dimX), (long)(dimY), (long)(dimZ));
+ D2_func_ROF(Output, D2_ROF, (long)(dimX), (long)(dimY), (long)(dimZ));
+ D3_func_ROF(Output, D3_ROF, (long)(dimX), (long)(dimY), (long)(dimZ));
+ /****************LLT******************/
+ /* estimate second-order derrivatives */
+ der3D_LLT(Output, D1_LLT, D2_LLT, D3_LLT,(long)(dimX), (long)(dimY), (long)(dimZ));
+ /* Joint update for ROF and LLT models */
+ Update3D_LLT_ROF(Input, Output, D1_LLT, D2_LLT, D3_LLT, D1_ROF, D2_ROF, D3_ROF, lambdaROF, lambdaLLT, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
+ }
+ } /*end of iterations*/
+ free(D1_LLT);free(D2_LLT);free(D3_LLT);
+ free(D1_ROF);free(D2_ROF);free(D3_ROF);
+ return *Output;
+}
+
+/*************************************************************************/
+/**********************LLT-related functions *****************************/
+/*************************************************************************/
+float der2D_LLT(float *U, float *D1, float *D2, long dimX, long dimY, long dimZ)
+{
+ long i, j, index, i_p, i_m, j_m, j_p;
+ float dxx, dyy, denom_xx, denom_yy;
+#pragma omp parallel for shared(U,D1,D2) private(i, j, index, i_p, i_m, j_m, j_p, denom_xx, denom_yy, dxx, dyy)
+ for (i = 0; i<dimX; i++) {
+ for (j = 0; j<dimY; j++) {
+ index = j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+ i_m = i - 1; if (i_m < 0) i_m = i + 1;
+ j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+ j_m = j - 1; if (j_m < 0) j_m = j + 1;
+
+ dxx = U[j*dimX+i_p] - 2.0f*U[index] + U[j*dimX+i_m];
+ dyy = U[j_p*dimX+i] - 2.0f*U[index] + U[j_m*dimX+i];
+
+ denom_xx = fabs(dxx) + EPS_LLT;
+ denom_yy = fabs(dyy) + EPS_LLT;
+
+ D1[index] = dxx / denom_xx;
+ D2[index] = dyy / denom_yy;
+ }
+ }
+ return 1;
+}
+
+float der3D_LLT(float *U, float *D1, float *D2, float *D3, long dimX, long dimY, long dimZ)
+ {
+ long i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, index;
+ float dxx, dyy, dzz, denom_xx, denom_yy, denom_zz;
+ #pragma omp parallel for shared(U,D1,D2,D3) private(i, j, index, k, i_p, i_m, j_m, j_p, k_p, k_m, denom_xx, denom_yy, denom_zz, dxx, dyy, dzz)
+ for (i = 0; i<dimX; i++) {
+ for (j = 0; j<dimY; j++) {
+ for (k = 0; k<dimZ; k++) {
+ /* symmetric boundary conditions (Neuman) */
+ i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+ i_m = i - 1; if (i_m < 0) i_m = i + 1;
+ j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+ j_m = j - 1; if (j_m < 0) j_m = j + 1;
+ k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
+ k_m = k - 1; if (k_m < 0) k_m = k + 1;
+
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ dxx = U[(dimX*dimY)*k + j*dimX+i_p] - 2.0f*U[index] + U[(dimX*dimY)*k + j*dimX+i_m];
+ dyy = U[(dimX*dimY)*k + j_p*dimX+i] - 2.0f*U[index] + U[(dimX*dimY)*k + j_m*dimX+i];
+ dzz = U[(dimX*dimY)*k_p + j*dimX+i] - 2.0f*U[index] + U[(dimX*dimY)*k_m + j*dimX+i];
+
+ denom_xx = fabs(dxx) + EPS_LLT;
+ denom_yy = fabs(dyy) + EPS_LLT;
+ denom_zz = fabs(dzz) + EPS_LLT;
+
+ D1[index] = dxx / denom_xx;
+ D2[index] = dyy / denom_yy;
+ D3[index] = dzz / denom_zz;
+ }
+ }
+ }
+ return 1;
+ }
+
+/*************************************************************************/
+/**********************ROF-related functions *****************************/
+/*************************************************************************/
+
+/* calculate differences 1 */
+float D1_func_ROF(float *A, float *D1, long dimX, long dimY, long dimZ)
+{
+ float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1;
+ long i,j,k,i1,i2,k1,j1,j2,k2,index;
+
+ if (dimZ > 1) {
+#pragma omp parallel for shared (A, D1, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1,NOMy_1,NOMy_0,NOMz_1,NOMz_0,denom1,denom2,denom3,T1)
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+ k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+ k2 = k - 1; if (k2 < 0) k2 = k+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = A[(dimX*dimY)*k + j1*dimX + i] - A[index]; /* x+ */
+ NOMy_1 = A[(dimX*dimY)*k + j*dimX + i1] - A[index]; /* y+ */
+ /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */ /* x- */
+ NOMy_0 = A[index] - A[(dimX*dimY)*k + j*dimX + i2]; /* y- */
+
+ NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
+ NOMz_0 = A[index] - A[(dimX*dimY)*k2 + j*dimX + i]; /* z- */
+
+
+ denom1 = NOMx_1*NOMx_1;
+ denom2 = 0.5f*(signLLT(NOMy_1) + signLLT(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
+ denom2 = denom2*denom2;
+ denom3 = 0.5f*(signLLT(NOMz_1) + signLLT(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0)));
+ denom3 = denom3*denom3;
+ T1 = sqrt(denom1 + denom2 + denom3 + EPS_ROF);
+ D1[index] = NOMx_1/T1;
+ }}}
+ }
+ else {
+#pragma omp parallel for shared (A, D1, dimX, dimY) private(i, j, i1, j1, i2, j2,NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1,index)
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ index = j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */
+ NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */
+ /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */ /* x- */
+ NOMy_0 = A[index] - A[(j)*dimX + i2]; /* y- */
+
+ denom1 = NOMx_1*NOMx_1;
+ denom2 = 0.5f*(signLLT(NOMy_1) + signLLT(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
+ denom2 = denom2*denom2;
+ T1 = sqrtf(denom1 + denom2 + EPS_ROF);
+ D1[index] = NOMx_1/T1;
+ }}
+ }
+ return *D1;
+}
+/* calculate differences 2 */
+float D2_func_ROF(float *A, float *D2, long dimX, long dimY, long dimZ)
+{
+ float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2;
+ long i,j,k,i1,i2,k1,j1,j2,k2,index;
+
+ if (dimZ > 1) {
+#pragma omp parallel for shared (A, D2, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2)
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+ k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+ k2 = k - 1; if (k2 < 0) k2 = k+1;
+
+
+ /* Forward-backward differences */
+ NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */
+ NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */
+ NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+ NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
+ NOMz_0 = A[index] - A[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */
+
+
+ denom1 = NOMy_1*NOMy_1;
+ denom2 = 0.5f*(signLLT(NOMx_1) + signLLT(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
+ denom2 = denom2*denom2;
+ denom3 = 0.5f*(signLLT(NOMz_1) + signLLT(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0)));
+ denom3 = denom3*denom3;
+ T2 = sqrtf(denom1 + denom2 + denom3 + EPS_ROF);
+ D2[index] = NOMy_1/T2;
+ }}}
+ }
+ else {
+#pragma omp parallel for shared (A, D2, dimX, dimY) private(i, j, i1, j1, i2, j2, NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2,index)
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ index = j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */
+ NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */
+ NOMx_0 = A[index] - A[j2*dimX + i]; /* x- */
+ /*NOMy_0 = A[(i)*dimY + j] - A[(i)*dimY + j2]; */ /* y- */
+
+ denom1 = NOMy_1*NOMy_1;
+ denom2 = 0.5f*(signLLT(NOMx_1) + signLLT(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
+ denom2 = denom2*denom2;
+ T2 = sqrtf(denom1 + denom2 + EPS_ROF);
+ D2[index] = NOMy_1/T2;
+ }}
+ }
+ return *D2;
+}
+
+/* calculate differences 3 */
+float D3_func_ROF(float *A, float *D3, long dimX, long dimY, long dimZ)
+{
+ float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3;
+ long index,i,j,k,i1,i2,k1,j1,j2,k2;
+
+#pragma omp parallel for shared (A, D3, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1, NOMy_1, NOMy_0, NOMx_0, NOMz_1, denom1, denom2, denom3, T3)
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+ k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+ k2 = k - 1; if (k2 < 0) k2 = k+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */
+ NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */
+ NOMy_0 = A[index] - A[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */
+ NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+ NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
+ /*NOMz_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k2 + (i)*dimY + j]; */ /* z- */
+
+ denom1 = NOMz_1*NOMz_1;
+ denom2 = 0.5f*(signLLT(NOMx_1) + signLLT(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
+ denom2 = denom2*denom2;
+ denom3 = 0.5f*(signLLT(NOMy_1) + signLLT(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
+ denom3 = denom3*denom3;
+ T3 = sqrtf(denom1 + denom2 + denom3 + EPS_ROF);
+ D3[index] = NOMz_1/T3;
+ }}}
+ return *D3;
+}
+
+/*************************************************************************/
+/**********************ROF-LLT-related functions *************************/
+/*************************************************************************/
+
+float Update2D_LLT_ROF(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D1_ROF, float *D2_ROF, float lambdaROF, float lambdaLLT, float tau, long dimX, long dimY, long dimZ)
+{
+ long i, j, index, i_p, i_m, j_m, j_p;
+ float div, laplc, dxx, dyy, dv1, dv2;
+#pragma omp parallel for shared(U,U0) private(i, j, index, i_p, i_m, j_m, j_p, laplc, div, dxx, dyy, dv1, dv2)
+ for (i = 0; i<dimX; i++) {
+ for (j = 0; j<dimY; j++) {
+ index = j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+ i_m = i - 1; if (i_m < 0) i_m = i + 1;
+ j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+ j_m = j - 1; if (j_m < 0) j_m = j + 1;
+
+ /*LLT-related part*/
+ dxx = D1_LLT[j*dimX+i_p] - 2.0f*D1_LLT[index] + D1_LLT[j*dimX+i_m];
+ dyy = D2_LLT[j_p*dimX+i] - 2.0f*D2_LLT[index] + D2_LLT[j_m*dimX+i];
+ laplc = dxx + dyy; /*build Laplacian*/
+
+ /*ROF-related part*/
+ dv1 = D1_ROF[index] - D1_ROF[j_m*dimX + i];
+ dv2 = D2_ROF[index] - D2_ROF[j*dimX + i_m];
+ div = dv1 + dv2; /*build Divirgent*/
+
+ /*combine all into one cost function to minimise */
+ U[index] += tau*(2.0f*lambdaROF*(div) - lambdaLLT*(laplc) - (U[index] - U0[index]));
+ }
+ }
+ return *U;
+}
+
+float Update3D_LLT_ROF(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D3_LLT, float *D1_ROF, float *D2_ROF, float *D3_ROF, float lambdaROF, float lambdaLLT, float tau, long dimX, long dimY, long dimZ)
+{
+ long i, j, k, i_p, i_m, j_m, j_p, k_p, k_m, index;
+ float div, laplc, dxx, dyy, dzz, dv1, dv2, dv3;
+#pragma omp parallel for shared(U,U0) private(i, j, k, index, i_p, i_m, j_m, j_p, k_p, k_m, laplc, div, dxx, dyy, dzz, dv1, dv2, dv3)
+ for (i = 0; i<dimX; i++) {
+ for (j = 0; j<dimY; j++) {
+ for (k = 0; k<dimZ; k++) {
+ /* symmetric boundary conditions (Neuman) */
+ i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+ i_m = i - 1; if (i_m < 0) i_m = i + 1;
+ j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+ j_m = j - 1; if (j_m < 0) j_m = j + 1;
+ k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
+ k_m = k - 1; if (k_m < 0) k_m = k + 1;
+
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ /*LLT-related part*/
+ dxx = D1_LLT[(dimX*dimY)*k + j*dimX+i_p] - 2.0f*D1_LLT[index] + D1_LLT[(dimX*dimY)*k + j*dimX+i_m];
+ dyy = D2_LLT[(dimX*dimY)*k + j_p*dimX+i] - 2.0f*D2_LLT[index] + D2_LLT[(dimX*dimY)*k + j_m*dimX+i];
+ dzz = D3_LLT[(dimX*dimY)*k_p + j*dimX+i] - 2.0f*D3_LLT[index] + D3_LLT[(dimX*dimY)*k_m + j*dimX+i];
+ laplc = dxx + dyy + dzz; /*build Laplacian*/
+
+ /*ROF-related part*/
+ dv1 = D1_ROF[index] - D1_ROF[(dimX*dimY)*k + j_m*dimX+i];
+ dv2 = D2_ROF[index] - D2_ROF[(dimX*dimY)*k + j*dimX+i_m];
+ dv3 = D3_ROF[index] - D3_ROF[(dimX*dimY)*k_m + j*dimX+i];
+ div = dv1 + dv2 + dv3; /*build Divirgent*/
+
+ /*combine all into one cost function to minimise */
+ U[index] += tau*(2.0f*lambdaROF*(div) - lambdaLLT*(laplc) - (U[index] - U0[index]));
+ }
+ }
+ }
+ return *U;
+}
+
diff --git a/src/Core/regularisers_CPU/LLT_ROF_core.h b/src/Core/regularisers_CPU/LLT_ROF_core.h
new file mode 100644
index 0000000..8e6591e
--- /dev/null
+++ b/src/Core/regularisers_CPU/LLT_ROF_core.h
@@ -0,0 +1,65 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.
+ *
+* This penalty can deliver visually pleasant piecewise-smooth recovery if regularisation parameters are selected well.
+* The rule of thumb for selection is to start with lambdaLLT = 0 (just the ROF-TV model) and then proceed to increase
+* lambdaLLT starting with smaller values.
+*
+* Input Parameters:
+* 1. U0 - original noise image/volume
+* 2. lambdaROF - ROF-related regularisation parameter
+* 3. lambdaLLT - LLT-related regularisation parameter
+* 4. tau - time-marching step
+* 5. iter - iterations number (for both models)
+*
+* Output:
+* Filtered/regularised image
+*
+* References:
+* [1] Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590.
+* [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float LLT_ROF_CPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
+
+CCPI_EXPORT float der2D_LLT(float *U, float *D1, float *D2, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float der3D_LLT(float *U, float *D1, float *D2, float *D3, long dimX, long dimY, long dimZ);
+
+CCPI_EXPORT float D1_func_ROF(float *A, float *D1, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float D2_func_ROF(float *A, float *D2, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float D3_func_ROF(float *A, float *D3, long dimX, long dimY, long dimZ);
+
+CCPI_EXPORT float Update2D_LLT_ROF(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D1_ROF, float *D2_ROF, float lambdaROF, float lambdaLLT, float tau, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float Update3D_LLT_ROF(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D3_LLT, float *D1_ROF, float *D2_ROF, float *D3_ROF, float lambdaROF, float lambdaLLT, float tau, long dimX, long dimY, long dimZ);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/Nonlocal_TV_core.c b/src/Core/regularisers_CPU/Nonlocal_TV_core.c
new file mode 100644
index 0000000..c4c9118
--- /dev/null
+++ b/src/Core/regularisers_CPU/Nonlocal_TV_core.c
@@ -0,0 +1,173 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd.
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Nonlocal_TV_core.h"
+
+/* C-OMP implementation of non-local regulariser
+ * Weights and associated indices must be given as an input.
+ * Gauss-Seidel fixed point iteration requires ~ 3 iterations, so the main effort
+ * goes in pre-calculation of weights and selection of patches
+ *
+ *
+ * Input Parameters:
+ * 1. 2D/3D grayscale image/volume
+ * 2. AR_i - indeces of i neighbours
+ * 3. AR_j - indeces of j neighbours
+ * 4. AR_k - indeces of k neighbours (0 - for 2D case)
+ * 5. Weights_ij(k) - associated weights
+ * 6. regularisation parameter
+ * 7. iterations number
+
+ * Output:
+ * 1. denoised image/volume
+ * Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans. Image Processing 17, no. 7 (2008): 1047-1060.
+
+ */
+/*****************************************************************************/
+
+float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb)
+{
+
+ long i, j, k;
+ int iter;
+ lambdaReg = 1.0f/lambdaReg;
+
+ /*****2D INPUT *****/
+ if (dimZ == 0) {
+ copyIm(A_orig, Output, (long)(dimX), (long)(dimY), 1l);
+ /* for each pixel store indeces of the most similar neighbours (patches) */
+ for(iter=0; iter<IterNumb; iter++) {
+#pragma omp parallel for shared (A_orig, Output, Weights, H_i, H_j, iter) private(i,j)
+ for(i=0; i<(long)(dimX); i++) {
+ for(j=0; j<(long)(dimY); j++) {
+ /*NLM_H1_2D(Output, A_orig, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), NumNeighb, lambdaReg);*/ /* NLM - H1 penalty */
+ NLM_TV_2D(Output, A_orig, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), NumNeighb, lambdaReg); /* NLM - TV penalty */
+ }}
+ }
+ }
+ else {
+ /*****3D INPUT *****/
+ copyIm(A_orig, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
+ /* for each pixel store indeces of the most similar neighbours (patches) */
+ for(iter=0; iter<IterNumb; iter++) {
+#pragma omp parallel for shared (A_orig, Output, Weights, H_i, H_j, H_k, iter) private(i,j,k)
+ for(i=0; i<(long)(dimX); i++) {
+ for(j=0; j<(long)(dimY); j++) {
+ for(k=0; k<(long)(dimZ); k++) {
+ /* NLM_H1_3D(Output, A_orig, H_i, H_j, H_k, Weights, i, j, k, dimX, dimY, dimZ, NumNeighb, lambdaReg); */ /* NLM - H1 penalty */
+ NLM_TV_3D(Output, A_orig, H_i, H_j, H_k, Weights, i, j, k, (long)(dimX), (long)(dimY), (long)(dimZ), NumNeighb, lambdaReg); /* NLM - TV penalty */
+ }}}
+ }
+ }
+ return *Output;
+}
+
+/***********<<<<Main Function for NLM - H1 penalty>>>>**********/
+float NLM_H1_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg)
+{
+ long x, i1, j1, index, index_m;
+ float value = 0.0f, normweight = 0.0f;
+
+ index_m = j*dimX+i;
+ for(x=0; x < NumNeighb; x++) {
+ index = (dimX*dimY*x) + j*dimX+i;
+ i1 = H_i[index];
+ j1 = H_j[index];
+ value += A[j1*dimX+i1]*Weights[index];
+ normweight += Weights[index];
+ }
+ A[index_m] = (lambdaReg*A_orig[index_m] + value)/(lambdaReg + normweight);
+ return *A;
+}
+/*3D version*/
+float NLM_H1_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg)
+{
+ long x, i1, j1, k1, index;
+ float value = 0.0f, normweight = 0.0f;
+
+ for(x=0; x < NumNeighb; x++) {
+ index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
+ i1 = H_i[index];
+ j1 = H_j[index];
+ k1 = H_k[index];
+ value += A[(dimX*dimY*k1) + j1*dimX+i1]*Weights[index];
+ normweight += Weights[index];
+ }
+ A[(dimX*dimY*k) + j*dimX+i] = (lambdaReg*A_orig[(dimX*dimY*k) + j*dimX+i] + value)/(lambdaReg + normweight);
+ return *A;
+}
+
+
+/***********<<<<Main Function for NLM - TV penalty>>>>**********/
+float NLM_TV_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg)
+{
+ long x, i1, j1, index, index_m;
+ float value = 0.0f, normweight = 0.0f, NLgrad_magn = 0.0f, NLCoeff;
+
+ index_m = j*dimX+i;
+
+ for(x=0; x < NumNeighb; x++) {
+ index = (dimX*dimY*x) + j*dimX+i; /*c*/
+ i1 = H_i[index];
+ j1 = H_j[index];
+ NLgrad_magn += powf((A[j1*dimX+i1] - A[index_m]),2)*Weights[index];
+ }
+
+ NLgrad_magn = sqrtf(NLgrad_magn); /*Non Local Gradients Magnitude */
+ NLCoeff = 2.0f*(1.0f/(NLgrad_magn + EPS));
+
+ for(x=0; x < NumNeighb; x++) {
+ index = (dimX*dimY*x) + j*dimX+i; /*c*/
+ i1 = H_i[index];
+ j1 = H_j[index];
+ value += A[j1*dimX+i1]*NLCoeff*Weights[index];
+ normweight += Weights[index]*NLCoeff;
+ }
+ A[index_m] = (lambdaReg*A_orig[index_m] + value)/(lambdaReg + normweight);
+ return *A;
+}
+/*3D version*/
+float NLM_TV_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg)
+{
+ long x, i1, j1, k1, index;
+ float value = 0.0f, normweight = 0.0f, NLgrad_magn = 0.0f, NLCoeff;
+
+ for(x=0; x < NumNeighb; x++) {
+ index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
+ i1 = H_i[index];
+ j1 = H_j[index];
+ k1 = H_k[index];
+ NLgrad_magn += powf((A[(dimX*dimY*k1) + j1*dimX+i1] - A[(dimX*dimY*k1) + j*dimX+i]),2)*Weights[index];
+ }
+
+ NLgrad_magn = sqrtf(NLgrad_magn); /*Non Local Gradients Magnitude */
+ NLCoeff = 2.0f*(1.0f/(NLgrad_magn + EPS));
+
+ for(x=0; x < NumNeighb; x++) {
+ index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
+ i1 = H_i[index];
+ j1 = H_j[index];
+ k1 = H_k[index];
+ value += A[(dimX*dimY*k1) + j1*dimX+i1]*NLCoeff*Weights[index];
+ normweight += Weights[index]*NLCoeff;
+ }
+ A[(dimX*dimY*k) + j*dimX+i] = (lambdaReg*A_orig[(dimX*dimY*k) + j*dimX+i] + value)/(lambdaReg + normweight);
+ return *A;
+}
diff --git a/src/Core/regularisers_CPU/Nonlocal_TV_core.h b/src/Core/regularisers_CPU/Nonlocal_TV_core.h
new file mode 100644
index 0000000..6d55101
--- /dev/null
+++ b/src/Core/regularisers_CPU/Nonlocal_TV_core.h
@@ -0,0 +1,61 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd.
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+#define EPS 1.0000e-9
+
+/* C-OMP implementation of non-local regulariser
+ * Weights and associated indices must be given as an input.
+ * Gauss-Seidel fixed point iteration requires ~ 3 iterations, so the main effort
+ * goes in pre-calculation of weights and selection of patches
+ *
+ *
+ * Input Parameters:
+ * 1. 2D/3D grayscale image/volume
+ * 2. AR_i - indeces of i neighbours
+ * 3. AR_j - indeces of j neighbours
+ * 4. AR_k - indeces of k neighbours (0 - for 2D case)
+ * 5. Weights_ij(k) - associated weights
+ * 6. regularisation parameter
+ * 7. iterations number
+
+ * Output:
+ * 1. denoised image/volume
+ * Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans. Image Processing 17, no. 7 (2008): 1047-1060.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb);
+CCPI_EXPORT float NLM_H1_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg);
+CCPI_EXPORT float NLM_TV_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg);
+CCPI_EXPORT float NLM_H1_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg);
+CCPI_EXPORT float NLM_TV_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/PatchSelect_core.c b/src/Core/regularisers_CPU/PatchSelect_core.c
new file mode 100644
index 0000000..cf5cdc7
--- /dev/null
+++ b/src/Core/regularisers_CPU/PatchSelect_core.c
@@ -0,0 +1,345 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd.
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PatchSelect_core.h"
+
+/* C-OMP implementation of non-local weight pre-calculation for non-local priors
+ * Weights and associated indices are stored into pre-allocated arrays and passed
+ * to the regulariser
+ *
+ *
+ * Input Parameters:
+ * 1. 2D/3D grayscale image/volume
+ * 2. Searching window (half-size of the main bigger searching window, e.g. 11)
+ * 3. Similarity window (half-size of the patch window, e.g. 2)
+ * 4. The number of neighbours to take (the most prominent after sorting neighbours will be taken)
+ * 5. noise-related parameter to calculate non-local weights
+ *
+ * Output [2D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. Weights_ij - associated weights
+ *
+ * Output [3D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. AR_k - indeces of j neighbours
+ * 4. Weights_ijk - associated weights
+ */
+
+void swap(float *xp, float *yp)
+{
+ float temp = *xp;
+ *xp = *yp;
+ *yp = temp;
+}
+
+void swapUS(unsigned short *xp, unsigned short *yp)
+{
+ unsigned short temp = *xp;
+ *xp = *yp;
+ *yp = temp;
+}
+/**************************************************/
+
+float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h, int switchM)
+{
+ int counterG;
+ long i, j, k;
+ float *Eucl_Vec, h2;
+ h2 = h*h;
+ /****************2D INPUT ***************/
+ if (dimZ == 0) {
+ /* generate a 2D Gaussian kernel for NLM procedure */
+ Eucl_Vec = (float*) calloc ((2*SimilarWin+1)*(2*SimilarWin+1),sizeof(float));
+ counterG = 0;
+ for(i=-SimilarWin; i<=SimilarWin; i++) {
+ for(j=-SimilarWin; j<=SimilarWin; j++) {
+ Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2))/(2*SimilarWin*SimilarWin));
+ counterG++;
+ }} /*main neighb loop */
+ /* for each pixel store indeces of the most similar neighbours (patches) */
+ if (switchM == 1) {
+#pragma omp parallel for shared (A, Weights, H_i, H_j) private(i,j)
+ for(i=0; i<(long)(dimX); i++) {
+ for(j=0; j<(long)(dimY); j++) {
+ Indeces2D_p(A, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
+ }}
+ }
+ else {
+#pragma omp parallel for shared (A, Weights, H_i, H_j) private(i,j)
+ for(i=0; i<(long)(dimX); i++) {
+ for(j=0; j<(long)(dimY); j++) {
+ Indeces2D(A, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
+ }}
+ }
+ }
+ else {
+ /****************3D INPUT ***************/
+ /* generate a 3D Gaussian kernel for NLM procedure */
+ Eucl_Vec = (float*) calloc ((2*SimilarWin+1)*(2*SimilarWin+1)*(2*SimilarWin+1),sizeof(float));
+ counterG = 0;
+ for(i=-SimilarWin; i<=SimilarWin; i++) {
+ for(j=-SimilarWin; j<=SimilarWin; j++) {
+ for(k=-SimilarWin; k<=SimilarWin; k++) {
+ Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2) + pow(((float) k), 2))/(2*SimilarWin*SimilarWin*SimilarWin));
+ counterG++;
+ }}} /*main neighb loop */
+
+ /* for each voxel store indeces of the most similar neighbours (patches) */
+ if (switchM == 1) {
+#pragma omp parallel for shared (A, Weights, H_i, H_j, H_k) private(i,j,k)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ for(k=0; k<dimZ; k++) {
+ Indeces3D(A, H_i, H_j, H_k, Weights, j, i, (k), (dimX), (dimY), (dimZ), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
+ }}}
+ }
+ else {
+#pragma omp parallel for shared (A, Weights, H_i, H_j, H_k) private(i,j,k)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ for(k=0; k<dimZ; k++) {
+ Indeces3D(A, H_i, H_j, H_k, Weights, (i), (j), (k), (dimX), (dimY), (dimZ), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2);
+ }}}
+ }
+ }
+ free(Eucl_Vec);
+ return 1;
+}
+
+float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2)
+{
+ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, index, sizeWin_tot, counterG;
+ float *Weight_Vec, normsum;
+ unsigned short *ind_i, *ind_j;
+
+ sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1);
+
+ Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float));
+ ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
+ ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
+
+ counter = 0;
+ for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+ for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+ i1 = i+i_m;
+ j1 = j+j_m;
+ if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY))) {
+ normsum = 0.0f; counterG = 0;
+ for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+ for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+ i2 = i1 + i_c;
+ j2 = j1 + j_c;
+ i3 = i + i_c;
+ j3 = j + j_c;
+ if (((i2 >= 0) && (i2 < dimX)) && ((j2 >= 0) && (j2 < dimY))) {
+ if (((i3 >= 0) && (i3 < dimX)) && ((j3 >= 0) && (j3 < dimY))) {
+ normsum += Eucl_Vec[counterG]*pow(Aorig[j3*dimX + (i3)] - Aorig[j2*dimX + (i2)], 2);
+ counterG++;
+ }}
+
+ }}
+ /* writing temporarily into vectors */
+ if (normsum > EPS) {
+ Weight_Vec[counter] = expf(-normsum/h2);
+ ind_i[counter] = i1;
+ ind_j[counter] = j1;
+ counter++;
+ }
+ }
+ }}
+ /* do sorting to choose the most prominent weights [HIGH to LOW] */
+ /* and re-arrange indeces accordingly */
+ for (x = 0; x < counter-1; x++) {
+ for (y = 0; y < counter-x-1; y++) {
+ if (Weight_Vec[y] < Weight_Vec[y+1]) {
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swapUS(&ind_i[y], &ind_i[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
+ }
+ }
+ }
+ /*sorting loop finished*/
+ /*now select the NumNeighb more prominent weights and store into pre-allocated arrays */
+ for(x=0; x < NumNeighb; x++) {
+ index = (dimX*dimY*x) + j*dimX+i;
+ H_i[index] = ind_i[x];
+ H_j[index] = ind_j[x];
+ Weights[index] = Weight_Vec[x];
+ }
+ free(ind_i);
+ free(ind_j);
+ free(Weight_Vec);
+ return 1;
+}
+float Indeces2D_p(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2)
+{
+ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, index, sizeWin_tot, counterG;
+ float *Weight_Vec, normsum;
+ unsigned short *ind_i, *ind_j;
+
+ sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1);
+
+ Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float));
+ ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
+ ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
+
+ counter = 0;
+ for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+ for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+ i1 = i+i_m;
+ j1 = j+j_m;
+ if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY))) {
+ normsum = 0.0f; counterG = 0;
+ for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+ for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+ i2 = i1 + i_c;
+ j2 = j1 + j_c;
+ i3 = i + i_c;
+ j3 = j + j_c;
+ if (((i2 >= 0) && (i2 < dimX)) && ((j2 >= 0) && (j2 < dimY))) {
+ if (((i3 >= 0) && (i3 < dimX)) && ((j3 >= 0) && (j3 < dimY))) {
+ //normsum += Eucl_Vec[counterG]*pow(Aorig[j3*dimX + (i3)] - Aorig[j2*dimX + (i2)], 2);
+ normsum += Eucl_Vec[counterG]*pow(Aorig[i3*dimY + (j3)] - Aorig[i2*dimY + (j2)], 2);
+ counterG++;
+ }}
+
+ }}
+ /* writing temporarily into vectors */
+ if (normsum > EPS) {
+ Weight_Vec[counter] = expf(-normsum/h2);
+ ind_i[counter] = i1;
+ ind_j[counter] = j1;
+ counter++;
+ }
+ }
+ }}
+ /* do sorting to choose the most prominent weights [HIGH to LOW] */
+ /* and re-arrange indeces accordingly */
+ for (x = 0; x < counter-1; x++) {
+ for (y = 0; y < counter-x-1; y++) {
+ if (Weight_Vec[y] < Weight_Vec[y+1]) {
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swapUS(&ind_i[y], &ind_i[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
+ }
+ }
+ }
+ /*sorting loop finished*/
+
+ /*now select the NumNeighb more prominent weights and store into pre-allocated arrays */
+ for(x=0; x < NumNeighb; x++) {
+ index = (dimX*dimY*x) + i*dimY+j;
+ H_i[index] = ind_i[x];
+ H_j[index] = ind_j[x];
+ Weights[index] = Weight_Vec[x];
+ }
+ free(ind_i);
+ free(ind_j);
+ free(Weight_Vec);
+ return 1;
+}
+
+float Indeces3D(float *Aorig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimY, long dimX, long dimZ, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2)
+{
+ long i1, j1, k1, i_m, j_m, k_m, i_c, j_c, k_c, i2, j2, k2, i3, j3, k3, counter, x, y, index, sizeWin_tot, counterG;
+ float *Weight_Vec, normsum, temp;
+ unsigned short *ind_i, *ind_j, *ind_k, temp_i, temp_j, temp_k;
+
+ sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1)*(2*SearchWindow + 1);
+
+ Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float));
+ ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
+ ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
+ ind_k = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short));
+
+ counter = 0l;
+ for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+ for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+ for(k_m=-SearchWindow; k_m<=SearchWindow; k_m++) {
+ k1 = k+k_m;
+ i1 = i+i_m;
+ j1 = j+j_m;
+ if (((i1 >= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY)) && ((k1 >= 0) && (k1 < dimZ))) {
+ normsum = 0.0f; counterG = 0l;
+ for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+ for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+ for(k_c=-SimilarWin; k_c<=SimilarWin; k_c++) {
+ i2 = i1 + i_c;
+ j2 = j1 + j_c;
+ k2 = k1 + k_c;
+ i3 = i + i_c;
+ j3 = j + j_c;
+ k3 = k + k_c;
+ if (((i2 >= 0) && (i2 < dimX)) && ((j2 >= 0) && (j2 < dimY)) && ((k2 >= 0) && (k2 < dimZ))) {
+ if (((i3 >= 0) && (i3 < dimX)) && ((j3 >= 0) && (j3 < dimY)) && ((k3 >= 0) && (k3 < dimZ))) {
+ normsum += Eucl_Vec[counterG]*pow(Aorig[(dimX*dimY*k3) + j3*dimX + (i3)] - Aorig[(dimX*dimY*k2) + j2*dimX + (i2)], 2);
+ counterG++;
+ }}
+ }}}
+ /* writing temporarily into vectors */
+ if (normsum > EPS) {
+ Weight_Vec[counter] = expf(-normsum/h2);
+ ind_i[counter] = i1;
+ ind_j[counter] = j1;
+ ind_k[counter] = k1;
+ counter ++;
+ }
+ }
+ }}}
+ /* do sorting to choose the most prominent weights [HIGH to LOW] */
+ /* and re-arrange indeces accordingly */
+ for (x = 0; x < counter; x++) {
+ for (y = 0; y < counter; y++) {
+ if (Weight_Vec[y] < Weight_Vec[x]) {
+ temp = Weight_Vec[y+1];
+ temp_i = ind_i[y+1];
+ temp_j = ind_j[y+1];
+ temp_k = ind_k[y+1];
+ Weight_Vec[y+1] = Weight_Vec[y];
+ Weight_Vec[y] = temp;
+ ind_i[y+1] = ind_i[y];
+ ind_i[y] = temp_i;
+ ind_j[y+1] = ind_j[y];
+ ind_j[y] = temp_j;
+ ind_k[y+1] = ind_k[y];
+ ind_k[y] = temp_k;
+ }}}
+ /*sorting loop finished*/
+
+ /*now select the NumNeighb more prominent weights and store into arrays */
+ for(x=0; x < NumNeighb; x++) {
+ index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i;
+
+ H_i[index] = ind_i[x];
+ H_j[index] = ind_j[x];
+ H_k[index] = ind_k[x];
+
+ Weights[index] = Weight_Vec[x];
+ }
+
+ free(ind_i);
+ free(ind_j);
+ free(ind_k);
+ free(Weight_Vec);
+ return 1;
+}
+
diff --git a/src/Core/regularisers_CPU/PatchSelect_core.h b/src/Core/regularisers_CPU/PatchSelect_core.h
new file mode 100644
index 0000000..ddaa428
--- /dev/null
+++ b/src/Core/regularisers_CPU/PatchSelect_core.h
@@ -0,0 +1,63 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd.
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+#define EPS 1.0000e-12
+
+/* C-OMP implementation of non-local weight pre-calculation for non-local priors
+ * Weights and associated indices are stored into pre-allocated arrays and passed
+ * to the regulariser
+ *
+ *
+ * Input Parameters:
+ * 1. 2D/3D grayscale image/volume
+ * 2. Searching window (half-size of the main bigger searching window, e.g. 11)
+ * 3. Similarity window (half-size of the patch window, e.g. 2)
+ * 4. The number of neighbours to take (the most prominent after sorting neighbours will be taken)
+ * 5. noise-related parameter to calculate non-local weights
+ *
+ * Output [2D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. Weights_ij - associated weights
+ *
+ * Output [3D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. AR_k - indeces of j neighbours
+ * 4. Weights_ijk - associated weights
+ */
+/*****************************************************************************/
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h, int switchM);
+CCPI_EXPORT float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2);
+CCPI_EXPORT float Indeces2D_p(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2);
+CCPI_EXPORT float Indeces3D(float *Aorig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimY, long dimX, long dimZ, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/ROF_TV_core.c b/src/Core/regularisers_CPU/ROF_TV_core.c
new file mode 100644
index 0000000..1858442
--- /dev/null
+++ b/src/Core/regularisers_CPU/ROF_TV_core.c
@@ -0,0 +1,289 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ROF_TV_core.h"
+
+#define EPS 1.0e-12
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+/*sign function*/
+int sign(float x) {
+ return (x > 0) - (x < 0);
+}
+
+
+/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case)
+ *
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. lambda - regularization parameter [REQUIRED]
+ * 3. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED]
+ *
+ * Output:
+ * [1] Regularized image/volume
+ *
+ * This function is based on the paper by
+ * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+ */
+
+/* Running iterations of TV-ROF function */
+float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ)
+{
+ float *D1, *D2, *D3;
+ int i;
+ long DimTotal;
+ DimTotal = (long)(dimX*dimY*dimZ);
+
+ D1 = calloc(DimTotal, sizeof(float));
+ D2 = calloc(DimTotal, sizeof(float));
+ D3 = calloc(DimTotal, sizeof(float));
+
+ /* copy into output */
+ copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+ /* start TV iterations */
+ for(i=0; i < iterationsNumb; i++) {
+ /* calculate differences */
+ D1_func(Output, D1, (long)(dimX), (long)(dimY), (long)(dimZ));
+ D2_func(Output, D2, (long)(dimX), (long)(dimY), (long)(dimZ));
+ if (dimZ > 1) D3_func(Output, D3, (long)(dimX), (long)(dimY), (long)(dimZ));
+ TV_kernel(D1, D2, D3, Output, Input, lambdaPar, tau, (long)(dimX), (long)(dimY), (long)(dimZ));
+ }
+ free(D1);free(D2); free(D3);
+ return *Output;
+}
+
+/* calculate differences 1 */
+float D1_func(float *A, float *D1, long dimX, long dimY, long dimZ)
+{
+ float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1;
+ long i,j,k,i1,i2,k1,j1,j2,k2,index;
+
+ if (dimZ > 1) {
+#pragma omp parallel for shared (A, D1, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1,NOMy_1,NOMy_0,NOMz_1,NOMz_0,denom1,denom2,denom3,T1)
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+ k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+ k2 = k - 1; if (k2 < 0) k2 = k+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = A[(dimX*dimY)*k + j1*dimX + i] - A[index]; /* x+ */
+ NOMy_1 = A[(dimX*dimY)*k + j*dimX + i1] - A[index]; /* y+ */
+ /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */ /* x- */
+ NOMy_0 = A[index] - A[(dimX*dimY)*k + j*dimX + i2]; /* y- */
+
+ NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
+ NOMz_0 = A[index] - A[(dimX*dimY)*k2 + j*dimX + i]; /* z- */
+
+
+ denom1 = NOMx_1*NOMx_1;
+ denom2 = 0.5f*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
+ denom2 = denom2*denom2;
+ denom3 = 0.5f*(sign(NOMz_1) + sign(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0)));
+ denom3 = denom3*denom3;
+ T1 = sqrt(denom1 + denom2 + denom3 + EPS);
+ D1[index] = NOMx_1/T1;
+ }}}
+ }
+ else {
+#pragma omp parallel for shared (A, D1, dimX, dimY) private(i, j, i1, j1, i2, j2,NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1,index)
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ index = j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */
+ NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */
+ /*NOMx_0 = (A[(i)*dimY + j] - A[(i2)*dimY + j]); */ /* x- */
+ NOMy_0 = A[index] - A[(j)*dimX + i2]; /* y- */
+
+ denom1 = NOMx_1*NOMx_1;
+ denom2 = 0.5f*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
+ denom2 = denom2*denom2;
+ T1 = sqrtf(denom1 + denom2 + EPS);
+ D1[index] = NOMx_1/T1;
+ }}
+ }
+ return *D1;
+}
+/* calculate differences 2 */
+float D2_func(float *A, float *D2, long dimX, long dimY, long dimZ)
+{
+ float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2;
+ long i,j,k,i1,i2,k1,j1,j2,k2,index;
+
+ if (dimZ > 1) {
+#pragma omp parallel for shared (A, D2, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2)
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+ k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+ k2 = k - 1; if (k2 < 0) k2 = k+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */
+ NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */
+ NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+ NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
+ NOMz_0 = A[index] - A[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */
+
+
+ denom1 = NOMy_1*NOMy_1;
+ denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
+ denom2 = denom2*denom2;
+ denom3 = 0.5f*(sign(NOMz_1) + sign(NOMz_0))*(MIN(fabs(NOMz_1),fabs(NOMz_0)));
+ denom3 = denom3*denom3;
+ T2 = sqrtf(denom1 + denom2 + denom3 + EPS);
+ D2[index] = NOMy_1/T2;
+ }}}
+ }
+ else {
+#pragma omp parallel for shared (A, D2, dimX, dimY) private(i, j, i1, j1, i2, j2, NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2,index)
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ index = j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = A[j1*dimX + i] - A[index]; /* x+ */
+ NOMy_1 = A[j*dimX + i1] - A[index]; /* y+ */
+ NOMx_0 = A[index] - A[j2*dimX + i]; /* x- */
+ /*NOMy_0 = A[(i)*dimY + j] - A[(i)*dimY + j2]; */ /* y- */
+
+ denom1 = NOMy_1*NOMy_1;
+ denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
+ denom2 = denom2*denom2;
+ T2 = sqrtf(denom1 + denom2 + EPS);
+ D2[index] = NOMy_1/T2;
+ }}
+ }
+ return *D2;
+}
+
+/* calculate differences 3 */
+float D3_func(float *A, float *D3, long dimX, long dimY, long dimZ)
+{
+ float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3;
+ long index,i,j,k,i1,i2,k1,j1,j2,k2;
+
+#pragma omp parallel for shared (A, D3, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, NOMx_1, NOMy_1, NOMy_0, NOMx_0, NOMz_1, denom1, denom2, denom3, T3)
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+ k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+ k2 = k - 1; if (k2 < 0) k2 = k+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = A[(dimX*dimY)*k + (j1)*dimX + i] - A[index]; /* x+ */
+ NOMy_1 = A[(dimX*dimY)*k + (j)*dimX + i1] - A[index]; /* y+ */
+ NOMy_0 = A[index] - A[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */
+ NOMx_0 = A[index] - A[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+ NOMz_1 = A[(dimX*dimY)*k1 + j*dimX + i] - A[index]; /* z+ */
+ /*NOMz_0 = A[(dimX*dimY)*k + (i)*dimY + j] - A[(dimX*dimY)*k2 + (i)*dimY + j]; */ /* z- */
+
+ denom1 = NOMz_1*NOMz_1;
+ denom2 = 0.5f*(sign(NOMx_1) + sign(NOMx_0))*(MIN(fabs(NOMx_1),fabs(NOMx_0)));
+ denom2 = denom2*denom2;
+ denom3 = 0.5f*(sign(NOMy_1) + sign(NOMy_0))*(MIN(fabs(NOMy_1),fabs(NOMy_0)));
+ denom3 = denom3*denom3;
+ T3 = sqrtf(denom1 + denom2 + denom3 + EPS);
+ D3[index] = NOMz_1/T3;
+ }}}
+ return *D3;
+}
+
+/* calculate divergence */
+float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, long dimX, long dimY, long dimZ)
+{
+ float dv1, dv2, dv3;
+ long index,i,j,k,i1,i2,k1,j1,j2,k2;
+
+ if (dimZ > 1) {
+#pragma omp parallel for shared (D1, D2, D3, B, dimX, dimY, dimZ) private(index, i, j, k, i1, j1, k1, i2, j2, k2, dv1,dv2,dv3)
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+ k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+ k2 = k - 1; if (k2 < 0) k2 = k+1;
+
+ /*divergence components */
+ dv1 = D1[index] - D1[(dimX*dimY)*k + j2*dimX+i];
+ dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2];
+ dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i];
+
+ B[index] += tau*(2.0f*lambda*(dv1 + dv2 + dv3) - (B[index] - A[index]));
+ }}}
+ }
+ else {
+#pragma omp parallel for shared (D1, D2, B, dimX, dimY) private(index, i, j, i1, j1, i2, j2,dv1,dv2)
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ index = j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+
+ /* divergence components */
+ dv1 = D1[index] - D1[j2*dimX + i];
+ dv2 = D2[index] - D2[j*dimX + i2];
+
+ B[index] += tau*(2.0f*lambda*(dv1 + dv2) - (B[index] - A[index]));
+ }}
+ }
+ return *B;
+}
diff --git a/src/Core/regularisers_CPU/ROF_TV_core.h b/src/Core/regularisers_CPU/ROF_TV_core.h
new file mode 100644
index 0000000..4e320e9
--- /dev/null
+++ b/src/Core/regularisers_CPU/ROF_TV_core.h
@@ -0,0 +1,57 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case)
+ *
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. lambda - regularization parameter [REQUIRED]
+ * 3. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED]
+ * 4. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
+ *
+ * Output:
+ * [1] Regularized image/volume
+ *
+ * This function is based on the paper by
+ * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+ *
+ * D. Kazantsev, 2016-18
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
+
+CCPI_EXPORT float TV_kernel(float *D1, float *D2, float *D3, float *B, float *A, float lambda, float tau, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float D1_func(float *A, float *D1, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float D2_func(float *A, float *D2, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float D3_func(float *A, float *D3, long dimX, long dimY, long dimZ);
+#ifdef __cplusplus
+}
+#endif \ No newline at end of file
diff --git a/src/Core/regularisers_CPU/SB_TV_core.c b/src/Core/regularisers_CPU/SB_TV_core.c
new file mode 100755
index 0000000..769ea67
--- /dev/null
+++ b/src/Core/regularisers_CPU/SB_TV_core.c
@@ -0,0 +1,368 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "SB_TV_core.h"
+
+/* C-OMP implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
+*
+* Input Parameters:
+* 1. Noisy image/volume
+* 2. lambda - regularisation parameter
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon - tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+* 6. print information: 0 (off) or 1 (on) [OPTIONAL parameter]
+*
+* Output:
+* 1. Filtered/regularized image
+*
+* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
+*/
+
+float SB_TV_CPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ)
+{
+ int ll;
+ long j, DimTotal;
+ float re, re1, lambda;
+ int count = 0;
+ mu = 1.0f/mu;
+ lambda = 2.0f*mu;
+
+ if (dimZ <= 1) {
+ /* 2D case */
+ float *Output_prev=NULL, *Dx=NULL, *Dy=NULL, *Bx=NULL, *By=NULL;
+ DimTotal = (long)(dimX*dimY);
+
+ Output_prev = calloc(DimTotal, sizeof(float));
+ Dx = calloc(DimTotal, sizeof(float));
+ Dy = calloc(DimTotal, sizeof(float));
+ Bx = calloc(DimTotal, sizeof(float));
+ By = calloc(DimTotal, sizeof(float));
+
+ copyIm(Input, Output, (long)(dimX), (long)(dimY), 1l); /*initialize */
+
+ /* begin outer SB iterations */
+ for(ll=0; ll<iter; ll++) {
+
+ /* storing old estimate */
+ copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
+
+ /* perform two GS iterations (normally 2 is enough for the convergence) */
+ gauss_seidel2D(Output, Input, Output_prev, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY), lambda, mu);
+ copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
+ /*GS iteration */
+ gauss_seidel2D(Output, Input, Output_prev, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY), lambda, mu);
+
+ /* TV-related step */
+ if (methodTV == 1) updDxDy_shrinkAniso2D(Output, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY), lambda);
+ else updDxDy_shrinkIso2D(Output, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY), lambda);
+
+ /* update for Bregman variables */
+ updBxBy2D(Output, Dx, Dy, Bx, By, (long)(dimX), (long)(dimY));
+
+ /* check early stopping criteria if epsilon not equal zero */
+ if (epsil != 0) {
+ re = 0.0f; re1 = 0.0f;
+ for(j=0; j<DimTotal; j++) {
+ re += pow(Output[j] - Output_prev[j],2);
+ re1 += pow(Output[j],2);
+ }
+ re = sqrt(re)/sqrt(re1);
+ if (re < epsil) count++;
+ if (count > 4) break;
+ }
+ /*printf("%f %i %i \n", re, ll, count); */
+ }
+ if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll);
+ free(Output_prev); free(Dx); free(Dy); free(Bx); free(By);
+ }
+ else {
+ /* 3D case */
+ float *Output_prev=NULL, *Dx=NULL, *Dy=NULL, *Dz=NULL, *Bx=NULL, *By=NULL, *Bz=NULL;
+ DimTotal = (long)(dimX*dimY*dimZ);
+
+ Output_prev = calloc(DimTotal, sizeof(float));
+ Dx = calloc(DimTotal, sizeof(float));
+ Dy = calloc(DimTotal, sizeof(float));
+ Dz = calloc(DimTotal, sizeof(float));
+ Bx = calloc(DimTotal, sizeof(float));
+ By = calloc(DimTotal, sizeof(float));
+ Bz = calloc(DimTotal, sizeof(float));
+
+ copyIm(Input, Output, (long)(dimX), (long)(dimY), (long)(dimZ)); /*initialize */
+
+ /* begin outer SB iterations */
+ for(ll=0; ll<iter; ll++) {
+
+ /* storing old estimate */
+ copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+ /* perform two GS iterations (normally 2 is enough for the convergence) */
+ gauss_seidel3D(Output, Input, Output_prev, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ), lambda, mu);
+ copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
+ /*GS iteration */
+ gauss_seidel3D(Output, Input, Output_prev, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ), lambda, mu);
+
+ /* TV-related step */
+ if (methodTV == 1) updDxDyDz_shrinkAniso3D(Output, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ), lambda);
+ else updDxDyDz_shrinkIso3D(Output, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ), lambda);
+
+ /* update for Bregman variables */
+ updBxByBz3D(Output, Dx, Dy, Dz, Bx, By, Bz, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+ /* check early stopping criteria if epsilon not equal zero */
+ if (epsil != 0) {
+ re = 0.0f; re1 = 0.0f;
+ for(j=0; j<DimTotal; j++) {
+ re += pow(Output[j] - Output_prev[j],2);
+ re1 += pow(Output[j],2);
+ }
+ re = sqrt(re)/sqrt(re1);
+ if (re < epsil) count++;
+ if (count > 4) break;
+ }
+ /*printf("%f %i %i \n", re, ll, count); */
+ }
+ if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll);
+ free(Output_prev); free(Dx); free(Dy); free(Dz); free(Bx); free(By); free(Bz);
+ }
+ return *Output;
+}
+
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+float gauss_seidel2D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY, float lambda, float mu)
+{
+ float sum, normConst;
+ long i,j,i1,i2,j1,j2,index;
+ normConst = 1.0f/(mu + 4.0f*lambda);
+
+#pragma omp parallel for shared(U) private(index,i,j,i1,i2,j1,j2,sum)
+ for(i=0; i<dimX; i++) {
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ for(j=0; j<dimY; j++) {
+ /* symmetric boundary conditions (Neuman) */
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+ index = j*dimX+i;
+
+ sum = Dx[j*dimX+i2] - Dx[index] + Dy[j2*dimX+i] - Dy[index] - Bx[j*dimX+i2] + Bx[index] - By[j2*dimX+i] + By[index];
+ sum += U_prev[j*dimX+i1] + U_prev[j*dimX+i2] + U_prev[j1*dimX+i] + U_prev[j2*dimX+i];
+ sum *= lambda;
+ sum += mu*A[index];
+ U[index] = normConst*sum;
+ }}
+ return *U;
+}
+
+float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY, float lambda)
+{
+ long i,j,i1,j1,index;
+ float val1, val11, val2, val22, denom_lam;
+ denom_lam = 1.0f/lambda;
+#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,val1,val11,val2,val22)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ index = j*dimX+i;
+
+ val1 = (U[j*dimX+i1] - U[index]) + Bx[index];
+ val2 = (U[j1*dimX+i] - U[index]) + By[index];
+
+ val11 = fabs(val1) - denom_lam; if (val11 < 0) val11 = 0;
+ val22 = fabs(val2) - denom_lam; if (val22 < 0) val22 = 0;
+
+ if (val1 !=0) Dx[index] = (val1/fabs(val1))*val11; else Dx[index] = 0;
+ if (val2 !=0) Dy[index] = (val2/fabs(val2))*val22; else Dy[index] = 0;
+
+ }}
+ return 1;
+}
+float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY, float lambda)
+{
+ long i,j,i1,j1,index;
+ float val1, val11, val2, denom, denom_lam;
+ denom_lam = 1.0f/lambda;
+
+#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,val1,val11,val2,denom)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ index = j*dimX+i;
+
+ val1 = (U[j*dimX+i1] - U[index]) + Bx[index];
+ val2 = (U[j1*dimX+i] - U[index]) + By[index];
+
+ denom = sqrt(val1*val1 + val2*val2);
+
+ val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
+
+ if (denom != 0.0f) {
+ Dx[index] = val11*(val1/denom);
+ Dy[index] = val11*(val2/denom);
+ }
+ else {
+ Dx[index] = 0;
+ Dy[index] = 0;
+ }
+ }}
+ return 1;
+}
+float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY)
+{
+ long i,j,i1,j1,index;
+#pragma omp parallel for shared(U) private(index,i,j,i1,j1)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ index = j*dimX+i;
+
+ Bx[index] += (U[j*dimX+i1] - U[index]) - Dx[index];
+ By[index] += (U[j1*dimX+i] - U[index]) - Dy[index];
+ }}
+ return 1;
+}
+
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+/*****************************************************************/
+float gauss_seidel3D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ, float lambda, float mu)
+{
+ float normConst, d_val, b_val, sum;
+ long i,j,i1,i2,j1,j2,k,k1,k2,index;
+ normConst = 1.0f/(mu + 6.0f*lambda);
+#pragma omp parallel for shared(U) private(index,i,j,i1,i2,j1,j2,k,k1,k2,d_val,b_val,sum)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ for(k=0; k<dimZ; k++) {
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+ k1 = k+1; if (k1 == dimZ) k1 = k-1;
+ k2 = k-1; if (k2 < 0) k2 = k+1;
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ d_val = Dx[(dimX*dimY)*k + j*dimX+i2] - Dx[index] + Dy[(dimX*dimY)*k + j2*dimX+i] - Dy[index] + Dz[(dimX*dimY)*k2 + j*dimX+i] - Dz[index];
+ b_val = -Bx[(dimX*dimY)*k + j*dimX+i2] + Bx[index] - By[(dimX*dimY)*k + j2*dimX+i] + By[index] - Bz[(dimX*dimY)*k2 + j*dimX+i] + Bz[index];
+ sum = d_val + b_val;
+ sum += U_prev[(dimX*dimY)*k + j*dimX+i1] + U_prev[(dimX*dimY)*k + j*dimX+i2] + U_prev[(dimX*dimY)*k + j1*dimX+i] + U_prev[(dimX*dimY)*k + j2*dimX+i] + U_prev[(dimX*dimY)*k1 + j*dimX+i] + U_prev[(dimX*dimY)*k2 + j*dimX+i];
+ sum *= lambda;
+ sum += mu*A[index];
+ U[index] = normConst*sum;
+ }}}
+ return *U;
+}
+
+float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ, float lambda)
+{
+ long i,j,i1,j1,k,k1,index;
+ float val1, val11, val2, val22, val3, val33, denom_lam;
+ denom_lam = 1.0f/lambda;
+#pragma omp parallel for shared(U,denom_lam) private(index,i,j,i1,j1,k,k1,val1,val11,val2,val22,val3,val33)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ k1 = k+1; if (k1 == dimZ) k1 = k-1;
+
+ val1 = (U[(dimX*dimY)*k + j*dimX+i1] - U[index]) + Bx[index];
+ val2 = (U[(dimX*dimY)*k + j1*dimX+i] - U[index]) + By[index];
+ val3 = (U[(dimX*dimY)*k1 + j*dimX+i] - U[index]) + Bz[index];
+
+ val11 = fabs(val1) - denom_lam; if (val11 < 0.0f) val11 = 0.0f;
+ val22 = fabs(val2) - denom_lam; if (val22 < 0.0f) val22 = 0.0f;
+ val33 = fabs(val3) - denom_lam; if (val33 < 0.0f) val33 = 0.0f;
+
+ if (val1 !=0.0f) Dx[index] = (val1/fabs(val1))*val11; else Dx[index] = 0.0f;
+ if (val2 !=0.0f) Dy[index] = (val2/fabs(val2))*val22; else Dy[index] = 0.0f;
+ if (val3 !=0.0f) Dz[index] = (val3/fabs(val3))*val33; else Dz[index] = 0.0f;
+
+ }}}
+ return 1;
+}
+float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ, float lambda)
+{
+ long i,j,i1,j1,k,k1,index;
+ float val1, val11, val2, val3, denom, denom_lam;
+ denom_lam = 1.0f/lambda;
+#pragma omp parallel for shared(U,denom_lam) private(index,denom,i,j,i1,j1,k,k1,val1,val11,val2,val3)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ k1 = k+1; if (k1 == dimZ) k1 = k-1;
+
+ val1 = (U[(dimX*dimY)*k + j*dimX+i1] - U[index]) + Bx[index];
+ val2 = (U[(dimX*dimY)*k + j1*dimX+i] - U[index]) + By[index];
+ val3 = (U[(dimX*dimY)*k1 + j*dimX+i] - U[index]) + Bz[index];
+
+ denom = sqrt(val1*val1 + val2*val2 + val3*val3);
+
+ val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
+
+ if (denom != 0.0f) {
+ Dx[index] = val11*(val1/denom);
+ Dy[index] = val11*(val2/denom);
+ Dz[index] = val11*(val3/denom);
+ }
+ else {
+ Dx[index] = 0;
+ Dy[index] = 0;
+ Dz[index] = 0;
+ }
+ }}}
+ return 1;
+}
+float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ)
+{
+ long i,j,k,i1,j1,k1,index;
+#pragma omp parallel for shared(U) private(index,i,j,k,i1,j1,k1)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ k1 = k+1; if (k1 == dimZ) k1 = k-1;
+
+ Bx[index] += (U[(dimX*dimY)*k + j*dimX+i1] - U[index]) - Dx[index];
+ By[index] += (U[(dimX*dimY)*k + j1*dimX+i] - U[index]) - Dy[index];
+ Bz[index] += (U[(dimX*dimY)*k1 + j*dimX+i] - U[index]) - Dz[index];
+ }}}
+ return 1;
+}
diff --git a/src/Core/regularisers_CPU/SB_TV_core.h b/src/Core/regularisers_CPU/SB_TV_core.h
new file mode 100644
index 0000000..7485e3b
--- /dev/null
+++ b/src/Core/regularisers_CPU/SB_TV_core.h
@@ -0,0 +1,61 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+
+/* C-OMP implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
+*
+* Input Parameters:
+* 1. Noisy image/volume
+* 2. lambda - regularisation parameter
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon - tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+* 6. print information: 0 (off) or 1 (on) [OPTIONAL parameter]
+*
+* Output:
+* 1. Filtered/regularized image
+*
+* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
+*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float SB_TV_CPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
+
+CCPI_EXPORT float gauss_seidel2D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY, float lambda, float mu);
+CCPI_EXPORT float updDxDy_shrinkAniso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY, float lambda);
+CCPI_EXPORT float updDxDy_shrinkIso2D(float *U, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY, float lambda);
+CCPI_EXPORT float updBxBy2D(float *U, float *Dx, float *Dy, float *Bx, float *By, long dimX, long dimY);
+
+CCPI_EXPORT float gauss_seidel3D(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ, float lambda, float mu);
+CCPI_EXPORT float updDxDyDz_shrinkAniso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ, float lambda);
+CCPI_EXPORT float updDxDyDz_shrinkIso3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ, float lambda);
+CCPI_EXPORT float updBxByBz3D(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, long dimX, long dimY, long dimZ);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/TGV_core.c b/src/Core/regularisers_CPU/TGV_core.c
new file mode 100644
index 0000000..136e0bd
--- /dev/null
+++ b/src/Core/regularisers_CPU/TGV_core.c
@@ -0,0 +1,532 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2019 Daniil Kazantsev
+ * Copyright 2019 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TGV_core.h"
+
+/* C-OMP implementation of Primal-Dual denoising method for
+ * Total Generilized Variation (TGV)-L2 model [1] (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume (2D/3D)
+ * 2. lambda - regularisation parameter
+ * 3. parameter to control the first-order term (alpha1)
+ * 4. parameter to control the second-order term (alpha0)
+ * 5. Number of Chambolle-Pock (Primal-Dual) iterations
+ * 6. Lipshitz constant (default is 12)
+ *
+ * Output:
+ * Filtered/regularised image/volume
+ *
+ * References:
+ * [1] K. Bredies "Total Generalized Variation"
+ *
+ */
+
+float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iter, float L2, int dimX, int dimY, int dimZ)
+{
+ long DimTotal;
+ int ll;
+ float *U_old, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, tau, sigma;
+
+ DimTotal = (long)(dimX*dimY*dimZ);
+ copyIm(U0, U, (long)(dimX), (long)(dimY), (long)(dimZ)); /* initialize */
+ tau = pow(L2,-0.5);
+ sigma = pow(L2,-0.5);
+
+ /* dual variables */
+ P1 = calloc(DimTotal, sizeof(float));
+ P2 = calloc(DimTotal, sizeof(float));
+
+ Q1 = calloc(DimTotal, sizeof(float));
+ Q2 = calloc(DimTotal, sizeof(float));
+ Q3 = calloc(DimTotal, sizeof(float));
+
+ U_old = calloc(DimTotal, sizeof(float));
+
+ V1 = calloc(DimTotal, sizeof(float));
+ V1_old = calloc(DimTotal, sizeof(float));
+ V2 = calloc(DimTotal, sizeof(float));
+ V2_old = calloc(DimTotal, sizeof(float));
+
+ if (dimZ == 1) {
+ /*2D case*/
+
+ /* Primal-dual iterations begin here */
+ for(ll = 0; ll < iter; ll++) {
+
+ /* Calculate Dual Variable P */
+ DualP_2D(U, V1, V2, P1, P2, (long)(dimX), (long)(dimY), sigma);
+
+ /*Projection onto convex set for P*/
+ ProjP_2D(P1, P2, (long)(dimX), (long)(dimY), alpha1);
+
+ /* Calculate Dual Variable Q */
+ DualQ_2D(V1, V2, Q1, Q2, Q3, (long)(dimX), (long)(dimY), sigma);
+
+ /*Projection onto convex set for Q*/
+ ProjQ_2D(Q1, Q2, Q3, (long)(dimX), (long)(dimY), alpha0);
+
+ /*saving U into U_old*/
+ copyIm(U, U_old, (long)(dimX), (long)(dimY), 1l);
+
+ /*adjoint operation -> divergence and projection of P*/
+ DivProjP_2D(U, U0, P1, P2, (long)(dimX), (long)(dimY), lambda, tau);
+
+ /*get updated solution U*/
+ newU(U, U_old, (long)(dimX), (long)(dimY));
+
+ /*saving V into V_old*/
+ copyIm(V1, V1_old, (long)(dimX), (long)(dimY), 1l);
+ copyIm(V2, V2_old, (long)(dimX), (long)(dimY), 1l);
+
+ /* upd V*/
+ UpdV_2D(V1, V2, P1, P2, Q1, Q2, Q3, (long)(dimX), (long)(dimY), tau);
+
+ /*get new V*/
+ newU(V1, V1_old, (long)(dimX), (long)(dimY));
+ newU(V2, V2_old, (long)(dimX), (long)(dimY));
+ } /*end of iterations*/
+ }
+ else {
+ /*3D case*/
+ float *P3, *Q4, *Q5, *Q6, *V3, *V3_old;
+
+ P3 = calloc(DimTotal, sizeof(float));
+ Q4 = calloc(DimTotal, sizeof(float));
+ Q5 = calloc(DimTotal, sizeof(float));
+ Q6 = calloc(DimTotal, sizeof(float));
+ V3 = calloc(DimTotal, sizeof(float));
+ V3_old = calloc(DimTotal, sizeof(float));
+
+ /* Primal-dual iterations begin here */
+ for(ll = 0; ll < iter; ll++) {
+
+ /* Calculate Dual Variable P */
+ DualP_3D(U, V1, V2, V3, P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), sigma);
+
+ /*Projection onto convex set for P*/
+ ProjP_3D(P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), alpha1);
+
+ /* Calculate Dual Variable Q */
+ DualQ_3D(V1, V2, V3, Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), sigma);
+
+ /*Projection onto convex set for Q*/
+ ProjQ_3D(Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), alpha0);
+
+ /*saving U into U_old*/
+ copyIm(U, U_old, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+ /*adjoint operation -> divergence and projection of P*/
+ DivProjP_3D(U, U0, P1, P2, P3, (long)(dimX), (long)(dimY), (long)(dimZ), lambda, tau);
+
+ /*get updated solution U*/
+ newU3D(U, U_old, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+ /*saving V into V_old*/
+ copyIm_3Ar(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+ /* upd V*/
+ UpdV_3D(V1, V2, V3, P1, P2, P3, Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), tau);
+
+ /*get new V*/
+ newU3D_3Ar(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ));
+ } /*end of iterations*/
+ free(P3);free(Q4);free(Q5);free(Q6);free(V3);free(V3_old);
+ }
+
+ /*freeing*/
+ free(P1);free(P2);free(Q1);free(Q2);free(Q3);free(U_old);
+ free(V1);free(V2);free(V1_old);free(V2_old);
+ return *U;
+}
+
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+/*Calculating dual variable P (using forward differences)*/
+float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, long dimX, long dimY, float sigma)
+{
+ long i,j, index;
+#pragma omp parallel for shared(U,V1,V2,P1,P2) private(i,j,index)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ index = j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ if (i == dimX-1) P1[index] += sigma*(-V1[index]);
+ else P1[index] += sigma*((U[j*dimX+(i+1)] - U[index]) - V1[index]);
+ if (j == dimY-1) P2[index] += sigma*(-V2[index]);
+ else P2[index] += sigma*((U[(j+1)*dimX+i] - U[index]) - V2[index]);
+
+ }}
+ return 1;
+}
+/*Projection onto convex set for P*/
+float ProjP_2D(float *P1, float *P2, long dimX, long dimY, float alpha1)
+{
+ float grad_magn;
+ long i,j,index;
+#pragma omp parallel for shared(P1,P2) private(i,j,index,grad_magn)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ index = j*dimX+i;
+ grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2)))/alpha1;
+ if (grad_magn > 1.0f) {
+ P1[index] /= grad_magn;
+ P2[index] /= grad_magn;
+ }
+ }}
+ return 1;
+}
+/*Calculating dual variable Q (using forward differences)*/
+float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float sigma)
+{
+ long i,j,index;
+ float q1, q2, q11, q22;
+#pragma omp parallel for shared(Q1,Q2,Q3,V1,V2) private(i,j,index,q1,q2,q11,q22)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ index = j*dimX+i;
+ q1 = 0.0f; q11 = 0.0f; q2 = 0.0f; q22 = 0.0f;
+ /* boundary conditions (Neuman) */
+ if (i != dimX-1){
+ q1 = V1[j*dimX+(i+1)] - V1[index];
+ q11 = V2[j*dimX+(i+1)] - V2[index];
+ }
+ if (j != dimY-1) {
+ q2 = V2[(j+1)*dimX+i] - V2[index];
+ q22 = V1[(j+1)*dimX+i] - V1[index];
+ }
+ Q1[index] += sigma*(q1);
+ Q2[index] += sigma*(q2);
+ Q3[index] += sigma*(0.5f*(q11 + q22));
+ }}
+ return 1;
+}
+float ProjQ_2D(float *Q1, float *Q2, float *Q3, long dimX, long dimY, float alpha0)
+{
+ float grad_magn;
+ long i,j,index;
+#pragma omp parallel for shared(Q1,Q2,Q3) private(i,j,index,grad_magn)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ index = j*dimX+i;
+ grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2));
+ grad_magn = grad_magn/alpha0;
+ if (grad_magn > 1.0f) {
+ Q1[index] /= grad_magn;
+ Q2[index] /= grad_magn;
+ Q3[index] /= grad_magn;
+ }
+ }}
+ return 1;
+}
+/* Divergence and projection for P (backward differences)*/
+float DivProjP_2D(float *U, float *U0, float *P1, float *P2, long dimX, long dimY, float lambda, float tau)
+{
+ long i,j,index;
+ float P_v1, P_v2, div;
+#pragma omp parallel for shared(U,U0,P1,P2) private(i,j,index,P_v1,P_v2,div)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ index = j*dimX+i;
+
+ if (i == 0) P_v1 = P1[index];
+ else if (i == dimX-1) P_v1 = -P1[j*dimX+(i-1)];
+ else P_v1 = P1[index] - P1[j*dimX+(i-1)];
+
+ if (j == 0) P_v2 = P2[index];
+ else if (j == dimY-1) P_v2 = -P2[(j-1)*dimX+i];
+ else P_v2 = P2[index] - P2[(j-1)*dimX+i];
+
+ div = P_v1 + P_v2;
+ U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
+ }}
+ return *U;
+}
+/*get updated solution U*/
+float newU(float *U, float *U_old, long dimX, long dimY)
+{
+ long i;
+#pragma omp parallel for shared(U,U_old) private(i)
+ for(i=0; i<dimX*dimY; i++) U[i] = 2*U[i] - U_old[i];
+ return *U;
+}
+/*get update for V (backward differences)*/
+float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float tau)
+{
+ long i, j, index;
+ float q1, q3_x, q3_y, q2, div1, div2;
+#pragma omp parallel for shared(V1,V2,P1,P2,Q1,Q2,Q3) private(i, j, index, q1, q3_x, q3_y, q2, div1, div2)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ index = j*dimX+i;
+
+ /* boundary conditions (Neuman) */
+ if (i == 0) {
+ q1 = Q1[index];
+ q3_x = Q3[index]; }
+ else if (i == dimX-1) {
+ q1 = -Q1[j*dimX+(i-1)];
+ q3_x = -Q3[j*dimX+(i-1)]; }
+ else {
+ q1 = Q1[index] - Q1[j*dimX+(i-1)];
+ q3_x = Q3[index] - Q3[j*dimX+(i-1)]; }
+
+ if (j == 0) {
+ q2 = Q2[index];
+ q3_y = Q3[index]; }
+ else if (j == dimY-1) {
+ q2 = -Q2[(j-1)*dimX+i];
+ q3_y = -Q3[(j-1)*dimX+i]; }
+ else {
+ q2 = Q2[index] - Q2[(j-1)*dimX+i];
+ q3_y = Q3[index] - Q3[(j-1)*dimX+i]; }
+
+
+ div1 = q1 + q3_y;
+ div2 = q3_x + q2;
+ V1[index] += tau*(P1[index] + div1);
+ V2[index] += tau*(P2[index] + div2);
+ }}
+ return 1;
+}
+
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+/*Calculating dual variable P (using forward differences)*/
+float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float sigma)
+{
+ long i,j,k, index;
+#pragma omp parallel for shared(U,V1,V2,V3,P1,P2,P3) private(i,j,k,index)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* symmetric boundary conditions (Neuman) */
+ if (i == dimX-1) P1[index] += sigma*(-V1[index]);
+ else P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i+1)] - U[index]) - V1[index]);
+ if (j == dimY-1) P2[index] += sigma*(-V2[index]);
+ else P2[index] += sigma*((U[(dimX*dimY)*k + (j+1)*dimX+i] - U[index]) - V2[index]);
+ if (k == dimZ-1) P3[index] += sigma*(-V3[index]);
+ else P3[index] += sigma*((U[(dimX*dimY)*(k+1) + j*dimX+i] - U[index]) - V3[index]);
+ }}}
+ return 1;
+}
+/*Projection onto convex set for P*/
+float ProjP_3D(float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float alpha1)
+{
+ float grad_magn;
+ long i,j,k,index;
+#pragma omp parallel for shared(P1,P2,P3) private(i,j,k,index,grad_magn)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2)))/alpha1;
+ if (grad_magn > 1.0f) {
+ P1[index] /= grad_magn;
+ P2[index] /= grad_magn;
+ P3[index] /= grad_magn;
+ }
+ }}}
+ return 1;
+}
+/*Calculating dual variable Q (using forward differences)*/
+float DualQ_3D(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float sigma)
+{
+ long i,j,k,index;
+ float q1, q2, q3, q11, q22, q33, q44, q55, q66;
+#pragma omp parallel for shared(Q1,Q2,Q3,Q4,Q5,Q6,V1,V2,V3) private(i,j,k,index,q1,q2,q3,q11,q22,q33,q44,q55,q66)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f;
+ /* symmetric boundary conditions (Neuman) */
+ if (i != dimX-1){
+ q1 = V1[(dimX*dimY)*k + j*dimX+(i+1)] - V1[index];
+ q11 = V2[(dimX*dimY)*k + j*dimX+(i+1)] - V2[index];
+ q33 = V3[(dimX*dimY)*k + j*dimX+(i+1)] - V3[index];
+ }
+ if (j != dimY-1) {
+ q2 = V2[(dimX*dimY)*k + (j+1)*dimX+i] - V2[index];
+ q22 = V1[(dimX*dimY)*k + (j+1)*dimX+i] - V1[index];
+ q55 = V3[(dimX*dimY)*k + (j+1)*dimX+i] - V3[index];
+ }
+ if (k != dimZ-1) {
+ q3 = V3[(dimX*dimY)*(k+1) + j*dimX+i] - V3[index];
+ q44 = V1[(dimX*dimY)*(k+1) + j*dimX+i] - V1[index];
+ q66 = V2[(dimX*dimY)*(k+1) + j*dimX+i] - V2[index];
+ }
+
+ Q1[index] += sigma*(q1); /*Q11*/
+ Q2[index] += sigma*(q2); /*Q22*/
+ Q3[index] += sigma*(q3); /*Q33*/
+ Q4[index] += sigma*(0.5f*(q11 + q22)); /* Q21 / Q12 */
+ Q5[index] += sigma*(0.5f*(q33 + q44)); /* Q31 / Q13 */
+ Q6[index] += sigma*(0.5f*(q55 + q66)); /* Q32 / Q23 */
+ }}}
+ return 1;
+}
+float ProjQ_3D(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float alpha0)
+{
+ float grad_magn;
+ long i,j,k,index;
+#pragma omp parallel for shared(Q1,Q2,Q3,Q4,Q5,Q6) private(i,j,k,index,grad_magn)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + pow(Q3[index],2) + 2.0f*pow(Q4[index],2) + 2.0f*pow(Q5[index],2) + 2.0f*pow(Q6[index],2));
+ grad_magn = grad_magn/alpha0;
+ if (grad_magn > 1.0f) {
+ Q1[index] /= grad_magn;
+ Q2[index] /= grad_magn;
+ Q3[index] /= grad_magn;
+ Q4[index] /= grad_magn;
+ Q5[index] /= grad_magn;
+ Q6[index] /= grad_magn;
+ }
+ }}}
+ return 1;
+}
+/* Divergence and projection for P*/
+float DivProjP_3D(float *U, float *U0, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float lambda, float tau)
+{
+ long i,j,k,index;
+ float P_v1, P_v2, P_v3, div;
+#pragma omp parallel for shared(U,U0,P1,P2,P3) private(i,j,k,index,P_v1,P_v2,P_v3,div)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ if (i == 0) P_v1 = P1[index];
+ else if (i == dimX-1) P_v1 = -P1[(dimX*dimY)*k + j*dimX+(i-1)];
+ else P_v1 = P1[index] - P1[(dimX*dimY)*k + j*dimX+(i-1)];
+ if (j == 0) P_v2 = P2[index];
+ else if (j == dimY-1) P_v2 = -P2[(dimX*dimY)*k + (j-1)*dimX+i];
+ else P_v2 = P2[index] - P2[(dimX*dimY)*k + (j-1)*dimX+i];
+ if (k == 0) P_v3 = P3[index];
+ else if (k == dimZ-1) P_v3 = -P3[(dimX*dimY)*(k-1) + (j)*dimX+i];
+ else P_v3 = P3[index] - P3[(dimX*dimY)*(k-1) + (j)*dimX+i];
+
+ div = P_v1 + P_v2 + P_v3;
+ U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
+ }}}
+ return *U;
+}
+/*get update for V*/
+float UpdV_3D(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float tau)
+{
+ long i,j,k,index;
+ float q1, q4x, q5x, q2, q4y, q6y, q6z, q5z, q3, div1, div2, div3;
+#pragma omp parallel for shared(V1,V2,V3,P1,P2,P3,Q1,Q2,Q3,Q4,Q5,Q6) private(i,j,k,index,q1,q4x,q5x,q2,q4y,q6y,q6z,q5z,q3,div1,div2,div3)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+ for(k=0; k<dimZ; k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ q1 = 0.0f; q4x= 0.0f; q5x= 0.0f; q2= 0.0f; q4y= 0.0f; q6y= 0.0f; q6z= 0.0f; q5z= 0.0f; q3= 0.0f;
+ /* Q1 - Q11, Q2 - Q22, Q3 - Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/
+ /* symmetric boundary conditions (Neuman) */
+
+ if (i == 0) {
+ q1 = Q1[index];
+ q4x = Q4[index];
+ q5x = Q5[index]; }
+ else if (i == dimX-1) {
+ q1 = -Q1[(dimX*dimY)*k + j*dimX+(i-1)];
+ q4x = -Q4[(dimX*dimY)*k + j*dimX+(i-1)];
+ q5x = -Q5[(dimX*dimY)*k + j*dimX+(i-1)]; }
+ else {
+ q1 = Q1[index] - Q1[(dimX*dimY)*k + j*dimX+(i-1)];
+ q4x = Q4[index] - Q4[(dimX*dimY)*k + j*dimX+(i-1)];
+ q5x = Q5[index] - Q5[(dimX*dimY)*k + j*dimX+(i-1)]; }
+ if (j == 0) {
+ q2 = Q2[index];
+ q4y = Q4[index];
+ q6y = Q6[index]; }
+ else if (j == dimY-1) {
+ q2 = -Q2[(dimX*dimY)*k + (j-1)*dimX+i];
+ q4y = -Q4[(dimX*dimY)*k + (j-1)*dimX+i];
+ q6y = -Q6[(dimX*dimY)*k + (j-1)*dimX+i]; }
+ else {
+ q2 = Q2[index] - Q2[(dimX*dimY)*k + (j-1)*dimX+i];
+ q4y = Q4[index] - Q4[(dimX*dimY)*k + (j-1)*dimX+i];
+ q6y = Q6[index] - Q6[(dimX*dimY)*k + (j-1)*dimX+i]; }
+ if (k == 0) {
+ q6z = Q6[index];
+ q5z = Q5[index];
+ q3 = Q3[index]; }
+ else if (k == dimZ-1) {
+ q6z = -Q6[(dimX*dimY)*(k-1) + (j)*dimX+i];
+ q5z = -Q5[(dimX*dimY)*(k-1) + (j)*dimX+i];
+ q3 = -Q3[(dimX*dimY)*(k-1) + (j)*dimX+i]; }
+ else {
+ q6z = Q6[index] - Q6[(dimX*dimY)*(k-1) + (j)*dimX+i];
+ q5z = Q5[index] - Q5[(dimX*dimY)*(k-1) + (j)*dimX+i];
+ q3 = Q3[index] - Q3[(dimX*dimY)*(k-1) + (j)*dimX+i]; }
+
+ div1 = q1 + q4y + q5z;
+ div2 = q4x + q2 + q6z;
+ div3 = q5x + q6y + q3;
+
+ V1[index] += tau*(P1[index] + div1);
+ V2[index] += tau*(P2[index] + div2);
+ V3[index] += tau*(P3[index] + div3);
+ }}}
+ return 1;
+}
+
+float copyIm_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ)
+{
+ long j;
+#pragma omp parallel for shared(V1, V2, V3, V1_old, V2_old, V3_old) private(j)
+ for (j = 0; j<dimX*dimY*dimZ; j++) {
+ V1_old[j] = V1[j];
+ V2_old[j] = V2[j];
+ V3_old[j] = V3[j];
+ }
+ return 1;
+}
+
+/*get updated solution U*/
+float newU3D(float *U, float *U_old, long dimX, long dimY, long dimZ)
+{
+ long i;
+#pragma omp parallel for shared(U, U_old) private(i)
+ for(i=0; i<dimX*dimY*dimZ; i++) U[i] = 2.0f*U[i] - U_old[i];
+ return *U;
+}
+
+
+/*get updated solution U*/
+float newU3D_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ)
+{
+ long i;
+#pragma omp parallel for shared(V1, V2, V3, V1_old, V2_old, V3_old) private(i)
+ for(i=0; i<dimX*dimY*dimZ; i++) {
+ V1[i] = 2.0f*V1[i] - V1_old[i];
+ V2[i] = 2.0f*V2[i] - V2_old[i];
+ V3[i] = 2.0f*V3[i] - V3_old[i];
+ }
+ return 1;
+}
+
diff --git a/src/Core/regularisers_CPU/TGV_core.h b/src/Core/regularisers_CPU/TGV_core.h
new file mode 100644
index 0000000..11b12c1
--- /dev/null
+++ b/src/Core/regularisers_CPU/TGV_core.h
@@ -0,0 +1,73 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+/* C-OMP implementation of Primal-Dual denoising method for
+ * Total Generilized Variation (TGV)-L2 model [1] (2D/3D)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume (2D/3D)
+ * 2. lambda - regularisation parameter
+ * 3. parameter to control the first-order term (alpha1)
+ * 4. parameter to control the second-order term (alpha0)
+ * 5. Number of Chambolle-Pock (Primal-Dual) iterations
+ * 6. Lipshitz constant (default is 12)
+ *
+ * Output:
+ * Filtered/regularised image/volume
+ *
+ * References:
+ * [1] K. Bredies "Total Generalized Variation"
+ */
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+CCPI_EXPORT float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iter, float L2, int dimX, int dimY, int dimZ);
+
+/* 2D functions */
+CCPI_EXPORT float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, long dimX, long dimY, float sigma);
+CCPI_EXPORT float ProjP_2D(float *P1, float *P2, long dimX, long dimY, float alpha1);
+CCPI_EXPORT float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float sigma);
+CCPI_EXPORT float ProjQ_2D(float *Q1, float *Q2, float *Q3, long dimX, long dimY, float alpha0);
+CCPI_EXPORT float DivProjP_2D(float *U, float *U0, float *P1, float *P2, long dimX, long dimY, float lambda, float tau);
+CCPI_EXPORT float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, long dimX, long dimY, float tau);
+CCPI_EXPORT float newU(float *U, float *U_old, long dimX, long dimY);
+/* 3D functions */
+CCPI_EXPORT float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float sigma);
+CCPI_EXPORT float ProjP_3D(float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float alpha1);
+CCPI_EXPORT float DualQ_3D(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float sigma);
+CCPI_EXPORT float ProjQ_3D(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float alpha0);
+CCPI_EXPORT float DivProjP_3D(float *U, float *U0, float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, float lambda, float tau);
+CCPI_EXPORT float UpdV_3D(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float tau);
+CCPI_EXPORT float newU3D(float *U, float *U_old, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float copyIm_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float newU3D_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_CPU/TNV_core.c b/src/Core/regularisers_CPU/TNV_core.c
new file mode 100755
index 0000000..753cc5f
--- /dev/null
+++ b/src/Core/regularisers_CPU/TNV_core.c
@@ -0,0 +1,452 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TNV_core.h"
+
+/*
+ * C-OMP implementation of Total Nuclear Variation regularisation model (2D + channels) [1]
+ * The code is modified from the implementation by Joan Duran <joan.duran@uib.es> see
+ * "denoisingPDHG_ipol.cpp" in Joans Collaborative Total Variation package
+ *
+ * Input Parameters:
+ * 1. Noisy volume of 2D + channel dimension, i.e. 3D volume
+ * 2. lambda - regularisation parameter
+ * 3. Number of iterations [OPTIONAL parameter]
+ * 4. eplsilon - tolerance constant [OPTIONAL parameter]
+ * 5. print information: 0 (off) or 1 (on) [OPTIONAL parameter]
+ *
+ * Output:
+ * 1. Filtered/regularized image
+ *
+ * [1]. Duran, J., Moeller, M., Sbert, C. and Cremers, D., 2016. Collaborative total variation: a general framework for vectorial TV models. SIAM Journal on Imaging Sciences, 9(1), pp.116-151.
+ */
+
+float TNV_CPU_main(float *Input, float *u, float lambda, int maxIter, float tol, int dimX, int dimY, int dimZ)
+{
+ long k, p, q, r, DimTotal;
+ float taulambda;
+ float *u_upd, *gx, *gy, *gx_upd, *gy_upd, *qx, *qy, *qx_upd, *qy_upd, *v, *vx, *vy, *gradx, *grady, *gradx_upd, *grady_upd, *gradx_ubar, *grady_ubar, *div, *div_upd;
+
+ p = 1l;
+ q = 1l;
+ r = 0l;
+
+ lambda = 1.0f/(2.0f*lambda);
+ DimTotal = (long)(dimX*dimY*dimZ);
+ /* PDHG algorithm parameters*/
+ float tau = 0.5f;
+ float sigma = 0.5f;
+ float theta = 1.0f;
+
+ // Auxiliar vectors
+ u_upd = calloc(DimTotal, sizeof(float));
+ gx = calloc(DimTotal, sizeof(float));
+ gy = calloc(DimTotal, sizeof(float));
+ gx_upd = calloc(DimTotal, sizeof(float));
+ gy_upd = calloc(DimTotal, sizeof(float));
+ qx = calloc(DimTotal, sizeof(float));
+ qy = calloc(DimTotal, sizeof(float));
+ qx_upd = calloc(DimTotal, sizeof(float));
+ qy_upd = calloc(DimTotal, sizeof(float));
+ v = calloc(DimTotal, sizeof(float));
+ vx = calloc(DimTotal, sizeof(float));
+ vy = calloc(DimTotal, sizeof(float));
+ gradx = calloc(DimTotal, sizeof(float));
+ grady = calloc(DimTotal, sizeof(float));
+ gradx_upd = calloc(DimTotal, sizeof(float));
+ grady_upd = calloc(DimTotal, sizeof(float));
+ gradx_ubar = calloc(DimTotal, sizeof(float));
+ grady_ubar = calloc(DimTotal, sizeof(float));
+ div = calloc(DimTotal, sizeof(float));
+ div_upd = calloc(DimTotal, sizeof(float));
+
+ // Backtracking parameters
+ float s = 1.0f;
+ float gamma = 0.75f;
+ float beta = 0.95f;
+ float alpha0 = 0.2f;
+ float alpha = alpha0;
+ float delta = 1.5f;
+ float eta = 0.95f;
+
+ // PDHG algorithm parameters
+ taulambda = tau * lambda;
+ float divtau = 1.0f / tau;
+ float divsigma = 1.0f / sigma;
+ float theta1 = 1.0f + theta;
+
+ /*allocate memory for taulambda */
+ //taulambda = (float*) calloc(dimZ, sizeof(float));
+ //for(k=0; k < dimZ; k++) {taulambda[k] = tau*lambda[k];}
+
+ // Apply Primal-Dual Hybrid Gradient scheme
+ int iter = 0;
+ float residual = fLarge;
+ float ubarx, ubary;
+
+ for(iter = 0; iter < maxIter; iter++) {
+ // Argument of proximal mapping of fidelity term
+#pragma omp parallel for shared(v, u) private(k)
+ for(k=0; k<dimX*dimY*dimZ; k++) {v[k] = u[k] + tau*div[k];}
+
+// Proximal solution of fidelity term
+proxG(u_upd, v, Input, taulambda, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+// Gradient of updated primal variable
+gradient(u_upd, gradx_upd, grady_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+// Argument of proximal mapping of regularization term
+#pragma omp parallel for shared(gradx_upd, grady_upd, gradx, grady) private(k, ubarx, ubary)
+for(k=0; k<dimX*dimY*dimZ; k++) {
+ ubarx = theta1 * gradx_upd[k] - theta * gradx[k];
+ ubary = theta1 * grady_upd[k] - theta * grady[k];
+ vx[k] = ubarx + divsigma * qx[k];
+ vy[k] = ubary + divsigma * qy[k];
+ gradx_ubar[k] = ubarx;
+ grady_ubar[k] = ubary;
+}
+
+proxF(gx_upd, gy_upd, vx, vy, sigma, p, q, r, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+// Update dual variable
+#pragma omp parallel for shared(qx_upd, qy_upd) private(k)
+for(k=0; k<dimX*dimY*dimZ; k++) {
+ qx_upd[k] = qx[k] + sigma * (gradx_ubar[k] - gx_upd[k]);
+ qy_upd[k] = qy[k] + sigma * (grady_ubar[k] - gy_upd[k]);
+}
+
+// Divergence of updated dual variable
+#pragma omp parallel for shared(div_upd) private(k)
+for(k=0; k<dimX*dimY*dimZ; k++) {div_upd[k] = 0.0f;}
+divergence(qx_upd, qy_upd, div_upd, dimX, dimY, dimZ);
+
+// Compute primal residual, dual residual, and backtracking condition
+float resprimal = 0.0f;
+float resdual = 0.0f;
+float product = 0.0f;
+float unorm = 0.0f;
+float qnorm = 0.0f;
+
+for(k=0; k<dimX*dimY*dimZ; k++) {
+ float udiff = u[k] - u_upd[k];
+ float qxdiff = qx[k] - qx_upd[k];
+ float qydiff = qy[k] - qy_upd[k];
+ float divdiff = div[k] - div_upd[k];
+ float gradxdiff = gradx[k] - gradx_upd[k];
+ float gradydiff = grady[k] - grady_upd[k];
+
+ resprimal += fabs(divtau*udiff + divdiff);
+ resdual += fabs(divsigma*qxdiff - gradxdiff);
+ resdual += fabs(divsigma*qydiff - gradydiff);
+
+ unorm += (udiff * udiff);
+ qnorm += (qxdiff * qxdiff + qydiff * qydiff);
+ product += (gradxdiff * qxdiff + gradydiff * qydiff);
+}
+
+float b = (2.0f * tau * sigma * product) / (gamma * sigma * unorm +
+ gamma * tau * qnorm);
+
+// Adapt step-size parameters
+float dual_dot_delta = resdual * s * delta;
+float dual_div_delta = (resdual * s) / delta;
+
+if(b > 1)
+{
+ // Decrease step-sizes to fit balancing principle
+ tau = (beta * tau) / b;
+ sigma = (beta * sigma) / b;
+ alpha = alpha0;
+
+ copyIm(u, u_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
+ copyIm(gx, gx_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
+ copyIm(gy, gy_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
+ copyIm(qx, qx_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
+ copyIm(qy, qy_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
+ copyIm(gradx, gradx_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
+ copyIm(grady, grady_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
+ copyIm(div, div_upd, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+} else if(resprimal > dual_dot_delta)
+{
+ // Increase primal step-size and decrease dual step-size
+ tau = tau / (1.0f - alpha);
+ sigma = sigma * (1.0f - alpha);
+ alpha = alpha * eta;
+
+} else if(resprimal < dual_div_delta)
+{
+ // Decrease primal step-size and increase dual step-size
+ tau = tau * (1.0f - alpha);
+ sigma = sigma / (1.0f - alpha);
+ alpha = alpha * eta;
+}
+
+// Update variables
+taulambda = tau * lambda;
+//for(k=0; k < dimZ; k++) taulambda[k] = tau*lambda[k];
+
+divsigma = 1.0f / sigma;
+divtau = 1.0f / tau;
+
+copyIm(u_upd, u, (long)(dimX), (long)(dimY), (long)(dimZ));
+copyIm(gx_upd, gx, (long)(dimX), (long)(dimY), (long)(dimZ));
+copyIm(gy_upd, gy, (long)(dimX), (long)(dimY), (long)(dimZ));
+copyIm(qx_upd, qx, (long)(dimX), (long)(dimY), (long)(dimZ));
+copyIm(qy_upd, qy, (long)(dimX), (long)(dimY), (long)(dimZ));
+copyIm(gradx_upd, gradx, (long)(dimX), (long)(dimY), (long)(dimZ));
+copyIm(grady_upd, grady, (long)(dimX), (long)(dimY), (long)(dimZ));
+copyIm(div_upd, div, (long)(dimX), (long)(dimY), (long)(dimZ));
+
+// Compute residual at current iteration
+residual = (resprimal + resdual) / ((float) (dimX*dimY*dimZ));
+
+// printf("%f \n", residual);
+if (residual < tol) {
+ printf("Iterations stopped at %i with the residual %f \n", iter, residual);
+ break; }
+
+ }
+ printf("Iterations stopped at %i with the residual %f \n", iter, residual);
+ free (u_upd); free(gx); free(gy); free(gx_upd); free(gy_upd);
+ free(qx); free(qy); free(qx_upd); free(qy_upd); free(v); free(vx); free(vy);
+ free(gradx); free(grady); free(gradx_upd); free(grady_upd); free(gradx_ubar);
+ free(grady_ubar); free(div); free(div_upd);
+ return *u;
+}
+
+float proxG(float *u_upd, float *v, float *f, float taulambda, long dimX, long dimY, long dimZ)
+{
+ float constant;
+ long k;
+ constant = 1.0f + taulambda;
+#pragma omp parallel for shared(v, f, u_upd) private(k)
+ for(k=0; k<dimZ*dimX*dimY; k++) {
+ u_upd[k] = (v[k] + taulambda * f[k])/constant;
+ //u_upd[(dimX*dimY)*k + l] = (v[(dimX*dimY)*k + l] + taulambda * f[(dimX*dimY)*k + l])/constant;
+ }
+ return *u_upd;
+}
+
+float gradient(float *u_upd, float *gradx_upd, float *grady_upd, long dimX, long dimY, long dimZ)
+{
+ long i, j, k, l;
+ // Compute discrete gradient using forward differences
+#pragma omp parallel for shared(gradx_upd,grady_upd,u_upd) private(i, j, k, l)
+ for(k = 0; k < dimZ; k++) {
+ for(j = 0; j < dimY; j++) {
+ l = j * dimX;
+ for(i = 0; i < dimX; i++) {
+ // Derivatives in the x-direction
+ if(i != dimX-1)
+ gradx_upd[(dimX*dimY)*k + i+l] = u_upd[(dimX*dimY)*k + i+1+l] - u_upd[(dimX*dimY)*k + i+l];
+ else
+ gradx_upd[(dimX*dimY)*k + i+l] = 0.0f;
+
+ // Derivatives in the y-direction
+ if(j != dimY-1)
+ //grady_upd[(dimX*dimY)*k + i+l] = u_upd[(dimX*dimY)*k + i+dimY+l] -u_upd[(dimX*dimY)*k + i+l];
+ grady_upd[(dimX*dimY)*k + i+l] = u_upd[(dimX*dimY)*k + i+(j+1)*dimX] -u_upd[(dimX*dimY)*k + i+l];
+ else
+ grady_upd[(dimX*dimY)*k + i+l] = 0.0f;
+ }}}
+ return 1;
+}
+
+float proxF(float *gx, float *gy, float *vx, float *vy, float sigma, int p, int q, int r, long dimX, long dimY, long dimZ)
+{
+ // (S^p, \ell^1) norm decouples at each pixel
+// Spl1(gx, gy, vx, vy, sigma, p, num_channels, dim);
+ float divsigma = 1.0f / sigma;
+
+ // $\ell^{1,1,1}$-TV regularization
+// int i,j,k;
+// #pragma omp parallel for shared (gx,gy,vx,vy) private(i,j,k)
+// for(k = 0; k < dimZ; k++) {
+// for(i=0; i<dimX; i++) {
+// for(j=0; j<dimY; j++) {
+// gx[(dimX*dimY)*k + (i)*dimY + (j)] = SIGN(vx[(dimX*dimY)*k + (i)*dimY + (j)]) * MAX(fabs(vx[(dimX*dimY)*k + (i)*dimY + (j)]) - divsigma, 0.0f);
+// gy[(dimX*dimY)*k + (i)*dimY + (j)] = SIGN(vy[(dimX*dimY)*k + (i)*dimY + (j)]) * MAX(fabs(vy[(dimX*dimY)*k + (i)*dimY + (j)]) - divsigma, 0.0f);
+// }}}
+
+ // Auxiliar vector
+ float *proj, sum, shrinkfactor ;
+ float M1,M2,M3,valuex,valuey,T,D,det,eig1,eig2,sig1,sig2,V1, V2, V3, V4, v0,v1,v2, mu1,mu2,sig1_upd,sig2_upd,t1,t2,t3;
+ long i,j,k, ii, num;
+#pragma omp parallel for shared (gx,gy,vx,vy,p) private(i,ii,j,k,proj,num, sum, shrinkfactor, M1,M2,M3,valuex,valuey,T,D,det,eig1,eig2,sig1,sig2,V1, V2, V3, V4,v0,v1,v2,mu1,mu2,sig1_upd,sig2_upd,t1,t2,t3)
+ for(i=0; i<dimX; i++) {
+ for(j=0; j<dimY; j++) {
+
+ proj = (float*) calloc (2,sizeof(float));
+ // Compute matrix $M\in\R^{2\times 2}$
+ M1 = 0.0f;
+ M2 = 0.0f;
+ M3 = 0.0f;
+
+ for(k = 0; k < dimZ; k++)
+ {
+ valuex = vx[(dimX*dimY)*k + (j)*dimX + (i)];
+ valuey = vy[(dimX*dimY)*k + (j)*dimX + (i)];
+
+ M1 += (valuex * valuex);
+ M2 += (valuex * valuey);
+ M3 += (valuey * valuey);
+ }
+
+ // Compute eigenvalues of M
+ T = M1 + M3;
+ D = M1 * M3 - M2 * M2;
+ det = sqrt(MAX((T * T / 4.0f) - D, 0.0f));
+ eig1 = MAX((T / 2.0f) + det, 0.0f);
+ eig2 = MAX((T / 2.0f) - det, 0.0f);
+ sig1 = sqrt(eig1);
+ sig2 = sqrt(eig2);
+
+ // Compute normalized eigenvectors
+ V1 = V2 = V3 = V4 = 0.0f;
+
+ if(M2 != 0.0f)
+ {
+ v0 = M2;
+ v1 = eig1 - M3;
+ v2 = eig2 - M3;
+
+ mu1 = sqrtf(v0 * v0 + v1 * v1);
+ mu2 = sqrtf(v0 * v0 + v2 * v2);
+
+ if(mu1 > fTiny)
+ {
+ V1 = v1 / mu1;
+ V3 = v0 / mu1;
+ }
+
+ if(mu2 > fTiny)
+ {
+ V2 = v2 / mu2;
+ V4 = v0 / mu2;
+ }
+
+ } else
+ {
+ if(M1 > M3)
+ {
+ V1 = V4 = 1.0f;
+ V2 = V3 = 0.0f;
+
+ } else
+ {
+ V1 = V4 = 0.0f;
+ V2 = V3 = 1.0f;
+ }
+ }
+
+ // Compute prox_p of the diagonal entries
+ sig1_upd = sig2_upd = 0.0f;
+
+ if(p == 1)
+ {
+ sig1_upd = MAX(sig1 - divsigma, 0.0f);
+ sig2_upd = MAX(sig2 - divsigma, 0.0f);
+
+ } else if(p == INFNORM)
+ {
+ proj[0] = sigma * fabs(sig1);
+ proj[1] = sigma * fabs(sig2);
+
+ /*l1 projection part */
+ sum = fLarge;
+ num = 0l;
+ shrinkfactor = 0.0f;
+ while(sum > 1.0f)
+ {
+ sum = 0.0f;
+ num = 0;
+
+ for(ii = 0; ii < 2; ii++)
+ {
+ proj[ii] = MAX(proj[ii] - shrinkfactor, 0.0f);
+
+ sum += fabs(proj[ii]);
+ if(proj[ii]!= 0.0f)
+ num++;
+ }
+
+ if(num > 0)
+ shrinkfactor = (sum - 1.0f) / num;
+ else
+ break;
+ }
+ /*l1 proj ends*/
+
+ sig1_upd = sig1 - divsigma * proj[0];
+ sig2_upd = sig2 - divsigma * proj[1];
+ }
+
+ // Compute the diagonal entries of $\widehat{\Sigma}\Sigma^{\dagger}_0$
+ if(sig1 > fTiny)
+ sig1_upd /= sig1;
+
+ if(sig2 > fTiny)
+ sig2_upd /= sig2;
+
+ // Compute solution
+ t1 = sig1_upd * V1 * V1 + sig2_upd * V2 * V2;
+ t2 = sig1_upd * V1 * V3 + sig2_upd * V2 * V4;
+ t3 = sig1_upd * V3 * V3 + sig2_upd * V4 * V4;
+
+ for(k = 0; k < dimZ; k++)
+ {
+ gx[(dimX*dimY)*k + j*dimX + i] = vx[(dimX*dimY)*k + j*dimX + i] * t1 + vy[(dimX*dimY)*k + j*dimX + i] * t2;
+ gy[(dimX*dimY)*k + j*dimX + i] = vx[(dimX*dimY)*k + j*dimX + i] * t2 + vy[(dimX*dimY)*k + j*dimX + i] * t3;
+ }
+
+ // Delete allocated memory
+ free(proj);
+ }}
+
+ return 1;
+}
+
+float divergence(float *qx_upd, float *qy_upd, float *div_upd, long dimX, long dimY, long dimZ)
+{
+ long i, j, k, l;
+#pragma omp parallel for shared(qx_upd,qy_upd,div_upd) private(i, j, k, l)
+ for(k = 0; k < dimZ; k++) {
+ for(j = 0; j < dimY; j++) {
+ l = j * dimX;
+ for(i = 0; i < dimX; i++) {
+ if(i != dimX-1)
+ {
+ // ux[k][i+l] = u[k][i+1+l] - u[k][i+l]
+ div_upd[(dimX*dimY)*k + i+1+l] -= qx_upd[(dimX*dimY)*k + i+l];
+ div_upd[(dimX*dimY)*k + i+l] += qx_upd[(dimX*dimY)*k + i+l];
+ }
+
+ if(j != dimY-1)
+ {
+ // uy[k][i+l] = u[k][i+width+l] - u[k][i+l]
+ //div_upd[(dimX*dimY)*k + i+dimY+l] -= qy_upd[(dimX*dimY)*k + i+l];
+ div_upd[(dimX*dimY)*k + i+(j+1)*dimX] -= qy_upd[(dimX*dimY)*k + i+l];
+ div_upd[(dimX*dimY)*k + i+l] += qy_upd[(dimX*dimY)*k + i+l];
+ }
+ }
+ }
+ }
+ return *div_upd;
+}
diff --git a/src/Core/regularisers_CPU/TNV_core.h b/src/Core/regularisers_CPU/TNV_core.h
new file mode 100644
index 0000000..aa050a4
--- /dev/null
+++ b/src/Core/regularisers_CPU/TNV_core.h
@@ -0,0 +1,47 @@
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <stdio.h>
+#include "omp.h"
+#include "utils.h"
+#include "CCPiDefines.h"
+
+#define fTiny 0.00000001f
+#define fLarge 100000000.0f
+#define INFNORM -1
+
+#define MAX(i,j) ((i)<(j) ? (j):(i))
+#define MIN(i,j) ((i)<(j) ? (i):(j))
+
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float TNV_CPU_main(float *Input, float *u, float lambda, int maxIter, float tol, int dimX, int dimY, int dimZ);
+
+/*float PDHG(float *A, float *B, float tau, float sigma, float theta, float lambda, int p, int q, int r, float tol, int maxIter, int d_c, int d_w, int d_h);*/
+CCPI_EXPORT float proxG(float *u_upd, float *v, float *f, float taulambda, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float gradient(float *u_upd, float *gradx_upd, float *grady_upd, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float proxF(float *gx, float *gy, float *vx, float *vy, float sigma, int p, int q, int r, long dimX, long dimY, long dimZ);
+CCPI_EXPORT float divergence(float *qx_upd, float *qy_upd, float *div_upd, long dimX, long dimY, long dimZ);
+#ifdef __cplusplus
+}
+#endif \ No newline at end of file
diff --git a/src/Core/regularisers_CPU/utils.c b/src/Core/regularisers_CPU/utils.c
new file mode 100644
index 0000000..7a4e80b
--- /dev/null
+++ b/src/Core/regularisers_CPU/utils.c
@@ -0,0 +1,117 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazanteev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "utils.h"
+#include <math.h>
+
+/* Copy Image (float) */
+float copyIm(float *A, float *U, long dimX, long dimY, long dimZ)
+{
+ long j;
+#pragma omp parallel for shared(A, U) private(j)
+ for (j = 0; j<dimX*dimY*dimZ; j++) U[j] = A[j];
+ return *U;
+}
+
+/* Copy Image */
+unsigned char copyIm_unchar(unsigned char *A, unsigned char *U, int dimX, int dimY, int dimZ)
+{
+ int j;
+#pragma omp parallel for shared(A, U) private(j)
+ for (j = 0; j<dimX*dimY*dimZ; j++) U[j] = A[j];
+ return *U;
+}
+
+/*Roll image symmetrically from top to bottom*/
+float copyIm_roll(float *A, float *U, int dimX, int dimY, int roll_value, int switcher)
+{
+ int i, j;
+#pragma omp parallel for shared(U, A) private(i,j)
+ for (i=0; i<dimX; i++) {
+ for (j=0; j<dimY; j++) {
+ if (switcher == 0) {
+ if (j < (dimY - roll_value)) U[j*dimX + i] = A[(j+roll_value)*dimX + i];
+ else U[j*dimX + i] = A[(j - (dimY - roll_value))*dimX + i];
+ }
+ else {
+ if (j < roll_value) U[j*dimX + i] = A[(j+(dimY - roll_value))*dimX + i];
+ else U[j*dimX + i] = A[(j - roll_value)*dimX + i];
+ }
+ }}
+ return *U;
+}
+
+/* function that calculates TV energy
+ * type - 1: 2*lambda*min||\nabla u|| + ||u -u0||^2
+ * type - 2: 2*lambda*min||\nabla u||
+ * */
+float TV_energy2D(float *U, float *U0, float *E_val, float lambda, int type, int dimX, int dimY)
+{
+ int i, j, i1, j1, index;
+ float NOMx_2, NOMy_2, E_Grad=0.0f, E_Data=0.0f;
+
+ /* first calculate \grad U_xy*/
+ for(j=0; j<dimY; j++) {
+ for(i=0; i<dimX; i++) {
+ index = j*dimX+i;
+ /* boundary conditions */
+ i1 = i + 1; if (i == dimX-1) i1 = i;
+ j1 = j + 1; if (j == dimY-1) j1 = j;
+
+ /* Forward differences */
+ NOMx_2 = powf((float)(U[j1*dimX + i] - U[index]),2); /* x+ */
+ NOMy_2 = powf((float)(U[j*dimX + i1] - U[index]),2); /* y+ */
+ E_Grad += 2.0f*lambda*sqrtf((float)(NOMx_2) + (float)(NOMy_2)); /* gradient term energy */
+ E_Data += powf((float)(U[index]-U0[index]),2); /* fidelity term energy */
+ }
+ }
+ if (type == 1) E_val[0] = E_Grad + E_Data;
+ if (type == 2) E_val[0] = E_Grad;
+ return *E_val;
+}
+
+float TV_energy3D(float *U, float *U0, float *E_val, float lambda, int type, int dimX, int dimY, int dimZ)
+{
+ long i, j, k, i1, j1, k1, index;
+ float NOMx_2, NOMy_2, NOMz_2, E_Grad=0.0f, E_Data=0.0f;
+
+ /* first calculate \grad U_xy*/
+ for(j=0; j<(long)(dimY); j++) {
+ for(i=0; i<(long)(dimX); i++) {
+ for(k=0; k<(long)(dimZ); k++) {
+ index = (dimX*dimY)*k + j*dimX+i;
+ /* boundary conditions */
+ i1 = i + 1; if (i == (long)(dimX-1)) i1 = i;
+ j1 = j + 1; if (j == (long)(dimY-1)) j1 = j;
+ k1 = k + 1; if (k == (long)(dimZ-1)) k1 = k;
+
+ /* Forward differences */
+ NOMx_2 = powf((float)(U[(dimX*dimY)*k + j1*dimX+i] - U[index]),2); /* x+ */
+ NOMy_2 = powf((float)(U[(dimX*dimY)*k + j*dimX+i1] - U[index]),2); /* y+ */
+ NOMz_2 = powf((float)(U[(dimX*dimY)*k1 + j*dimX+i] - U[index]),2); /* z+ */
+
+ E_Grad += 2.0f*lambda*sqrtf((float)(NOMx_2) + (float)(NOMy_2) + (float)(NOMz_2)); /* gradient term energy */
+ E_Data += (powf((float)(U[index]-U0[index]),2)); /* fidelity term energy */
+ }
+ }
+ }
+ if (type == 1) E_val[0] = E_Grad + E_Data;
+ if (type == 2) E_val[0] = E_Grad;
+ return *E_val;
+}
diff --git a/src/Core/regularisers_CPU/utils.h b/src/Core/regularisers_CPU/utils.h
new file mode 100644
index 0000000..cfaf6d7
--- /dev/null
+++ b/src/Core/regularisers_CPU/utils.h
@@ -0,0 +1,34 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <stdlib.h>
+#include <memory.h>
+#include "CCPiDefines.h"
+#include "omp.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+CCPI_EXPORT float copyIm(float *A, float *U, long dimX, long dimY, long dimZ);
+CCPI_EXPORT unsigned char copyIm_unchar(unsigned char *A, unsigned char *U, int dimX, int dimY, int dimZ);
+CCPI_EXPORT float copyIm_roll(float *A, float *U, int dimX, int dimY, int roll_value, int switcher);
+CCPI_EXPORT float TV_energy2D(float *U, float *U0, float *E_val, float lambda, int type, int dimX, int dimY);
+CCPI_EXPORT float TV_energy3D(float *U, float *U0, float *E_val, float lambda, int type, int dimX, int dimY, int dimZ);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu b/src/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu
new file mode 100644
index 0000000..a4dbe70
--- /dev/null
+++ b/src/Core/regularisers_GPU/Diffus_4thO_GPU_core.cu
@@ -0,0 +1,268 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "Diffus_4thO_GPU_core.h"
+#include "shared.h"
+
+/* CUDA implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case)
+ * The minimisation is performed using explicit scheme.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambda - regularization parameter
+ * 3. Edge-preserving parameter (sigma)
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended
+ * 5. tau - time-marching step for explicit scheme
+ *
+ * Output:
+ * [1] Regularized image/volume
+ *
+ * This function is based on the paper by
+ * [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.
+ */
+
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+#define EPS 1.0e-7
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+__global__ void Weighted_Laplc2D_kernel(float *W_Lapl, float *U0, float sigma, int dimX, int dimY)
+{
+ int i1,i2,j1,j2;
+ float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + dimX*j;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
+
+ /* boundary conditions (Neumann reflections) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+
+ gradX = 0.5f*(U0[j*dimX+i2] - U0[j*dimX+i1]);
+ gradX_sq = powf(gradX,2);
+
+ gradY = 0.5f*(U0[j2*dimX+i] - U0[j1*dimX+i]);
+ gradY_sq = powf(gradY,2);
+
+ gradXX = U0[j*dimX+i2] + U0[j*dimX+i1] - 2*U0[index];
+ gradYY = U0[j2*dimX+i] + U0[j1*dimX+i] - 2*U0[index];
+
+ gradXY = 0.25f*(U0[j2*dimX+i2] + U0[j1*dimX+i1] - U0[j1*dimX+i2] - U0[j2*dimX+i1]);
+ xy_2 = 2.0f*gradX*gradY*gradXY;
+
+ denom = gradX_sq + gradY_sq;
+
+ if (denom <= EPS) {
+ V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/EPS;
+ V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/EPS;
+ }
+ else {
+ V_norm = (gradXX*gradX_sq + xy_2 + gradYY*gradY_sq)/denom;
+ V_orth = (gradXX*gradY_sq - xy_2 + gradYY*gradX_sq)/denom;
+ }
+
+ c = 1.0f/(1.0f + denom/sigma);
+ c_sq = c*c;
+
+ W_Lapl[index] = c_sq*V_norm + c*V_orth;
+ }
+ return;
+}
+
+__global__ void Diffusion_update_step2D_kernel(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, int dimX, int dimY)
+{
+ int i1,i2,j1,j2;
+ float gradXXc, gradYYc;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + dimX*j;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
+
+ /* boundary conditions (Neumann reflections) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+
+ gradXXc = W_Lapl[j*dimX+i2] + W_Lapl[j*dimX+i1] - 2*W_Lapl[index];
+ gradYYc = W_Lapl[j2*dimX+i] + W_Lapl[j1*dimX+i] - 2*W_Lapl[index];
+
+ Output[index] += tau*(-lambdaPar*(gradXXc + gradYYc) - (Output[index] - Input[index]));
+ }
+ return;
+}
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+__global__ void Weighted_Laplc3D_kernel(float *W_Lapl, float *U0, float sigma, int dimX, int dimY, int dimZ)
+{
+ int i1,i2,j1,j2,k1,k2;
+ float gradX, gradX_sq, gradY, gradY_sq, gradXX, gradYY, gradXY, xy_2, denom, V_norm, V_orth, c, c_sq, gradZ, gradZ_sq, gradZZ, gradXZ, gradYZ, xyz_1, xyz_2;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+
+ /* boundary conditions (Neumann reflections) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+ k1 = k+1; if (k1 == dimZ) k1 = k-1;
+ k2 = k-1; if (k2 < 0) k2 = k+1;
+
+ int index = (dimX*dimY)*k + j*dimX+i;
+
+ gradX = 0.5f*(U0[(dimX*dimY)*k + j*dimX+i2] - U0[(dimX*dimY)*k + j*dimX+i1]);
+ gradX_sq = pow(gradX,2);
+
+ gradY = 0.5f*(U0[(dimX*dimY)*k + j2*dimX+i] - U0[(dimX*dimY)*k + j1*dimX+i]);
+ gradY_sq = pow(gradY,2);
+
+ gradZ = 0.5f*(U0[(dimX*dimY)*k2 + j*dimX+i] - U0[(dimX*dimY)*k1 + j*dimX+i]);
+ gradZ_sq = pow(gradZ,2);
+
+ gradXX = U0[(dimX*dimY)*k + j*dimX+i2] + U0[(dimX*dimY)*k + j*dimX+i1] - 2*U0[index];
+ gradYY = U0[(dimX*dimY)*k + j2*dimX+i] + U0[(dimX*dimY)*k + j1*dimX+i] - 2*U0[index];
+ gradZZ = U0[(dimX*dimY)*k2 + j*dimX+i] + U0[(dimX*dimY)*k1 + j*dimX+i] - 2*U0[index];
+
+ gradXY = 0.25f*(U0[(dimX*dimY)*k + j2*dimX+i2] + U0[(dimX*dimY)*k + j1*dimX+i1] - U0[(dimX*dimY)*k + j1*dimX+i2] - U0[(dimX*dimY)*k + j2*dimX+i1]);
+ gradXZ = 0.25f*(U0[(dimX*dimY)*k2 + j*dimX+i2] - U0[(dimX*dimY)*k2+j*dimX+i1] - U0[(dimX*dimY)*k1+j*dimX+i2] + U0[(dimX*dimY)*k1+j*dimX+i1]);
+ gradYZ = 0.25f*(U0[(dimX*dimY)*k2 +j2*dimX+i] - U0[(dimX*dimY)*k2+j1*dimX+i] - U0[(dimX*dimY)*k1+j2*dimX+i] + U0[(dimX*dimY)*k1+j1*dimX+i]);
+
+ xy_2 = 2.0f*gradX*gradY*gradXY;
+ xyz_1 = 2.0f*gradX*gradZ*gradXZ;
+ xyz_2 = 2.0f*gradY*gradZ*gradYZ;
+
+ denom = gradX_sq + gradY_sq + gradZ_sq;
+
+ if (denom <= EPS) {
+ V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/EPS;
+ V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/EPS;
+ }
+ else {
+ V_norm = (gradXX*gradX_sq + gradYY*gradY_sq + gradZZ*gradZ_sq + xy_2 + xyz_1 + xyz_2)/denom;
+ V_orth = ((gradY_sq + gradZ_sq)*gradXX + (gradX_sq + gradZ_sq)*gradYY + (gradX_sq + gradY_sq)*gradZZ - xy_2 - xyz_1 - xyz_2)/denom;
+ }
+
+ c = 1.0f/(1.0f + denom/sigma);
+ c_sq = c*c;
+
+ W_Lapl[index] = c_sq*V_norm + c*V_orth;
+ }
+ return;
+}
+__global__ void Diffusion_update_step3D_kernel(float *Output, float *Input, float *W_Lapl, float lambdaPar, float sigmaPar2, float tau, int dimX, int dimY, int dimZ)
+{
+ int i1,i2,j1,j2,k1,k2;
+ float gradXXc, gradYYc, gradZZc;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+
+ /* boundary conditions (Neumann reflections) */
+ i1 = i+1; if (i1 == dimX) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ j1 = j+1; if (j1 == dimY) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+ k1 = k+1; if (k1 == dimZ) k1 = k-1;
+ k2 = k-1; if (k2 < 0) k2 = k+1;
+
+ int index = (dimX*dimY)*k + j*dimX+i;
+
+ gradXXc = W_Lapl[(dimX*dimY)*k + j*dimX+i2] + W_Lapl[(dimX*dimY)*k + j*dimX+i1] - 2*W_Lapl[index];
+ gradYYc = W_Lapl[(dimX*dimY)*k + j2*dimX+i] + W_Lapl[(dimX*dimY)*k + j1*dimX+i] - 2*W_Lapl[index];
+ gradZZc = W_Lapl[(dimX*dimY)*k2 + j*dimX+i] + W_Lapl[(dimX*dimY)*k1 + j*dimX+i] - 2*W_Lapl[index];
+
+ Output[index] += tau*(-lambdaPar*(gradXXc + gradYYc + gradZZc) - (Output[index] - Input[index]));
+ }
+ return;
+}
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+/********************* MAIN HOST FUNCTION ******************/
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+extern "C" int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z)
+{
+ int dimTotal, dev = 0;
+ CHECK(cudaSetDevice(dev));
+ float *d_input, *d_output, *d_W_Lapl;
+ float sigmaPar2;
+ sigmaPar2 = sigmaPar*sigmaPar;
+ dimTotal = N*M*Z;
+
+ CHECK(cudaMalloc((void**)&d_input,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&d_output,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&d_W_Lapl,dimTotal*sizeof(float)));
+
+ CHECK(cudaMemcpy(d_input,Input,dimTotal*sizeof(float),cudaMemcpyHostToDevice));
+ CHECK(cudaMemcpy(d_output,Input,dimTotal*sizeof(float),cudaMemcpyHostToDevice));
+
+ if (Z == 1) {
+ /*2D case */
+ dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+ dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D));
+
+ for(int n=0; n < iterationsNumb; n++) {
+ /* Calculating weighted Laplacian */
+ Weighted_Laplc2D_kernel<<<dimGrid,dimBlock>>>(d_W_Lapl, d_output, sigmaPar2, N, M);
+ CHECK(cudaDeviceSynchronize());
+ /* Perform iteration step */
+ Diffusion_update_step2D_kernel<<<dimGrid,dimBlock>>>(d_output, d_input, d_W_Lapl, lambdaPar, sigmaPar2, tau, N, M);
+ CHECK(cudaDeviceSynchronize());
+ }
+ }
+ else {
+ /*3D case*/
+ dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+ dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKZSIZE));
+ for(int n=0; n < iterationsNumb; n++) {
+ /* Calculating weighted Laplacian */
+ Weighted_Laplc3D_kernel<<<dimGrid,dimBlock>>>(d_W_Lapl, d_output, sigmaPar2, N, M, Z);
+ CHECK(cudaDeviceSynchronize());
+ /* Perform iteration step */
+ Diffusion_update_step3D_kernel<<<dimGrid,dimBlock>>>(d_output, d_input, d_W_Lapl, lambdaPar, sigmaPar2, tau, N, M, Z);
+ CHECK(cudaDeviceSynchronize());
+ }
+ }
+ CHECK(cudaMemcpy(Output,d_output,dimTotal*sizeof(float),cudaMemcpyDeviceToHost));
+ CHECK(cudaFree(d_input));
+ CHECK(cudaFree(d_output));
+ CHECK(cudaFree(d_W_Lapl));
+ return 0;
+}
diff --git a/src/Core/regularisers_GPU/Diffus_4thO_GPU_core.h b/src/Core/regularisers_GPU/Diffus_4thO_GPU_core.h
new file mode 100644
index 0000000..77d5d79
--- /dev/null
+++ b/src/Core/regularisers_GPU/Diffus_4thO_GPU_core.h
@@ -0,0 +1,8 @@
+#ifndef __Diff_4thO_GPU_H__
+#define __Diff_4thO_GPU_H__
+#include "CCPiDefines.h"
+#include <stdio.h>
+
+extern "C" CCPI_EXPORT int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z);
+
+#endif
diff --git a/src/Core/regularisers_GPU/LLT_ROF_GPU_core.cu b/src/Core/regularisers_GPU/LLT_ROF_GPU_core.cu
new file mode 100644
index 0000000..87871be
--- /dev/null
+++ b/src/Core/regularisers_GPU/LLT_ROF_GPU_core.cu
@@ -0,0 +1,473 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "LLT_ROF_GPU_core.h"
+#include "shared.h"
+
+/* CUDA implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.
+ *
+* This penalty can deliver visually pleasant piecewise-smooth recovery if regularisation parameters are selected well.
+* The rule of thumb for selection is to start with lambdaLLT = 0 (just the ROF-TV model) and then proceed to increase
+* lambdaLLT starting with smaller values.
+*
+* Input Parameters:
+* 1. U0 - original noise image/volume
+* 2. lambdaROF - ROF-related regularisation parameter
+* 3. lambdaLLT - LLT-related regularisation parameter
+* 4. tau - time-marching step
+* 5. iter - iterations number (for both models)
+*
+* Output:
+* Filtered/regularised image
+*
+* References:
+* [1] Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590.
+* [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+*/
+
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+
+
+#define EPS_LLT 0.01
+#define EPS_ROF 1.0e-12
+
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+__host__ __device__ int signLLT (float x)
+{
+ return (x > 0) - (x < 0);
+}
+
+/*************************************************************************/
+/**********************LLT-related functions *****************************/
+/*************************************************************************/
+__global__ void der2D_LLT_kernel(float *U, float *D1, float *D2, int dimX, int dimY)
+ {
+ int i_p, i_m, j_m, j_p;
+ float dxx, dyy, denom_xx, denom_yy;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + dimX*j;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
+
+ /* symmetric boundary conditions (Neuman) */
+ i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+ i_m = i - 1; if (i_m < 0) i_m = i + 1;
+ j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+ j_m = j - 1; if (j_m < 0) j_m = j + 1;
+
+ dxx = U[j*dimX+i_p] - 2.0f*U[index] + U[j*dimX+i_m];
+ dyy = U[j_p*dimX+i] - 2.0f*U[index] + U[j_m*dimX+i];
+
+ denom_xx = abs(dxx) + EPS_LLT;
+ denom_yy = abs(dyy) + EPS_LLT;
+
+ D1[index] = dxx / denom_xx;
+ D2[index] = dyy / denom_yy;
+ }
+ }
+
+__global__ void der3D_LLT_kernel(float* U, float *D1, float *D2, float *D3, int dimX, int dimY, int dimZ)
+ {
+ int i_p, i_m, j_m, j_p, k_p, k_m;
+ float dxx, dyy, dzz, denom_xx, denom_yy, denom_zz;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+
+ /* symmetric boundary conditions (Neuman) */
+ i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+ i_m = i - 1; if (i_m < 0) i_m = i + 1;
+ j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+ j_m = j - 1; if (j_m < 0) j_m = j + 1;
+ k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
+ k_m = k - 1; if (k_m < 0) k_m = k + 1;
+
+ int index = (dimX*dimY)*k + j*dimX+i;
+
+ dxx = U[(dimX*dimY)*k + j*dimX+i_p] - 2.0f*U[index] + U[(dimX*dimY)*k + j*dimX+i_m];
+ dyy = U[(dimX*dimY)*k + j_p*dimX+i] - 2.0f*U[index] + U[(dimX*dimY)*k + j_m*dimX+i];
+ dzz = U[(dimX*dimY)*k_p + j*dimX+i] - 2.0f*U[index] + U[(dimX*dimY)*k_m + j*dimX+i];
+
+ denom_xx = abs(dxx) + EPS_LLT;
+ denom_yy = abs(dyy) + EPS_LLT;
+ denom_zz = abs(dzz) + EPS_LLT;
+
+ D1[index] = dxx / denom_xx;
+ D2[index] = dyy / denom_yy;
+ D3[index] = dzz / denom_zz;
+ }
+ }
+
+/*************************************************************************/
+/**********************ROF-related functions *****************************/
+/*************************************************************************/
+
+/* first-order differences 1 */
+__global__ void D1_func2D_ROF_kernel(float* Input, float* D1, int N, int M)
+ {
+ int i1, j1, i2;
+ float NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + N*j;
+
+ if ((i >= 0) && (i < N) && (j >= 0) && (j < M)) {
+
+ /* boundary conditions (Neumann reflections) */
+ i1 = i + 1; if (i1 >= N) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= M) j1 = j-1;
+
+ /* Forward-backward differences */
+ NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */
+ NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */
+ NOMy_0 = Input[index] - Input[j*N + i2]; /* y- */
+
+ denom1 = NOMx_1*NOMx_1;
+ denom2 = 0.5f*(signLLT((float)NOMy_1) + signLLT((float)NOMy_0))*(MIN(abs((float)NOMy_1),abs((float)NOMy_0)));
+ denom2 = denom2*denom2;
+ T1 = sqrt(denom1 + denom2 + EPS_ROF);
+ D1[index] = NOMx_1/T1;
+ }
+ }
+
+/* differences 2 */
+__global__ void D2_func2D_ROF_kernel(float* Input, float* D2, int N, int M)
+ {
+ int i1, j1, j2;
+ float NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + N*j;
+
+ if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) {
+
+ /* boundary conditions (Neumann reflections) */
+ i1 = i + 1; if (i1 >= N) i1 = i-1;
+ j1 = j + 1; if (j1 >= M) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */
+ NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */
+ NOMx_0 = Input[index] - Input[j2*N + i]; /* x- */
+
+ denom1 = NOMy_1*NOMy_1;
+ denom2 = 0.5f*(signLLT((float)NOMx_1) + signLLT((float)NOMx_0))*(MIN(abs((float)NOMx_1),abs((float)NOMx_0)));
+ denom2 = denom2*denom2;
+ T2 = sqrt(denom1 + denom2 + EPS_ROF);
+ D2[index] = NOMy_1/T2;
+ }
+ }
+
+
+ /* differences 1 */
+__global__ void D1_func3D_ROF_kernel(float* Input, float* D1, int dimX, int dimY, int dimZ)
+ {
+ float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1;
+ int i1,i2,k1,j1,j2,k2;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (dimX*dimY)*k + j*dimX+i;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+ k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+ k2 = k - 1; if (k2 < 0) k2 = k+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = Input[(dimX*dimY)*k + j1*dimX + i] - Input[index]; /* x+ */
+ NOMy_1 = Input[(dimX*dimY)*k + j*dimX + i1] - Input[index]; /* y+ */
+ NOMy_0 = Input[index] - Input[(dimX*dimY)*k + j*dimX + i2]; /* y- */
+
+ NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
+ NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + j*dimX + i]; /* z- */
+
+
+ denom1 = NOMx_1*NOMx_1;
+ denom2 = 0.5*(signLLT(NOMy_1) + signLLT(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0)));
+ denom2 = denom2*denom2;
+ denom3 = 0.5*(signLLT(NOMz_1) + signLLT(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0)));
+ denom3 = denom3*denom3;
+ T1 = sqrt(denom1 + denom2 + denom3 + EPS_ROF);
+ D1[index] = NOMx_1/T1;
+ }
+ }
+
+ /* differences 2 */
+ __global__ void D2_func3D_ROF_kernel(float* Input, float* D2, int dimX, int dimY, int dimZ)
+ {
+ float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2;
+ int i1,i2,k1,j1,j2,k2;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (dimX*dimY)*k + j*dimX+i;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+ k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+ k2 = k - 1; if (k2 < 0) k2 = k+1;
+
+
+ /* Forward-backward differences */
+ NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */
+ NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */
+ NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+ NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
+ NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */
+
+
+ denom1 = NOMy_1*NOMy_1;
+ denom2 = 0.5*(signLLT(NOMx_1) + signLLT(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0)));
+ denom2 = denom2*denom2;
+ denom3 = 0.5*(signLLT(NOMz_1) + signLLT(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0)));
+ denom3 = denom3*denom3;
+ T2 = sqrt(denom1 + denom2 + denom3 + EPS_ROF);
+ D2[index] = NOMy_1/T2;
+ }
+ }
+
+ /* differences 3 */
+ __global__ void D3_func3D_ROF_kernel(float* Input, float* D3, int dimX, int dimY, int dimZ)
+ {
+ float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3;
+ int i1,i2,k1,j1,j2,k2;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (dimX*dimY)*k + j*dimX+i;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+ k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+ k2 = k - 1; if (k2 < 0) k2 = k+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */
+ NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */
+ NOMy_0 = Input[index] - Input[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */
+ NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+ NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
+
+ denom1 = NOMz_1*NOMz_1;
+ denom2 = 0.5*(signLLT(NOMx_1) + signLLT(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0)));
+ denom2 = denom2*denom2;
+ denom3 = 0.5*(signLLT(NOMy_1) + signLLT(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0)));
+ denom3 = denom3*denom3;
+ T3 = sqrt(denom1 + denom2 + denom3 + EPS_ROF);
+ D3[index] = NOMz_1/T3;
+ }
+ }
+/*************************************************************************/
+/**********************ROF-LLT-related functions *************************/
+/*************************************************************************/
+
+__global__ void Update2D_LLT_ROF_kernel(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D1_ROF, float *D2_ROF, float lambdaROF, float lambdaLLT, float tau, int dimX, int dimY)
+{
+
+ int i_p, i_m, j_m, j_p;
+ float div, laplc, dxx, dyy, dv1, dv2;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + dimX*j;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) {
+
+ /* symmetric boundary conditions (Neuman) */
+ i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+ i_m = i - 1; if (i_m < 0) i_m = i + 1;
+ j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+ j_m = j - 1; if (j_m < 0) j_m = j + 1;
+
+ index = j*dimX+i;
+
+ /*LLT-related part*/
+ dxx = D1_LLT[j*dimX+i_p] - 2.0f*D1_LLT[index] + D1_LLT[j*dimX+i_m];
+ dyy = D2_LLT[j_p*dimX+i] - 2.0f*D2_LLT[index] + D2_LLT[j_m*dimX+i];
+ laplc = dxx + dyy; /*build Laplacian*/
+ /*ROF-related part*/
+ dv1 = D1_ROF[index] - D1_ROF[j_m*dimX + i];
+ dv2 = D2_ROF[index] - D2_ROF[j*dimX + i_m];
+ div = dv1 + dv2; /*build Divirgent*/
+
+ /*combine all into one cost function to minimise */
+ U[index] += tau*(2.0f*lambdaROF*(div) - lambdaLLT*(laplc) - (U[index] - U0[index]));
+ }
+}
+
+__global__ void Update3D_LLT_ROF_kernel(float *U0, float *U, float *D1_LLT, float *D2_LLT, float *D3_LLT, float *D1_ROF, float *D2_ROF, float *D3_ROF, float lambdaROF, float lambdaLLT, float tau, int dimX, int dimY, int dimZ)
+{
+ int i_p, i_m, j_m, j_p, k_p, k_m;
+ float div, laplc, dxx, dyy, dzz, dv1, dv2, dv3;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+
+ /* symmetric boundary conditions (Neuman) */
+ i_p = i + 1; if (i_p == dimX) i_p = i - 1;
+ i_m = i - 1; if (i_m < 0) i_m = i + 1;
+ j_p = j + 1; if (j_p == dimY) j_p = j - 1;
+ j_m = j - 1; if (j_m < 0) j_m = j + 1;
+ k_p = k + 1; if (k_p == dimZ) k_p = k - 1;
+ k_m = k - 1; if (k_m < 0) k_m = k + 1;
+
+ int index = (dimX*dimY)*k + j*dimX+i;
+
+ /*LLT-related part*/
+ dxx = D1_LLT[(dimX*dimY)*k + j*dimX+i_p] - 2.0f*D1_LLT[index] + D1_LLT[(dimX*dimY)*k + j*dimX+i_m];
+ dyy = D2_LLT[(dimX*dimY)*k + j_p*dimX+i] - 2.0f*D2_LLT[index] + D2_LLT[(dimX*dimY)*k + j_m*dimX+i];
+ dzz = D3_LLT[(dimX*dimY)*k_p + j*dimX+i] - 2.0f*D3_LLT[index] + D3_LLT[(dimX*dimY)*k_m + j*dimX+i];
+ laplc = dxx + dyy + dzz; /*build Laplacian*/
+
+ /*ROF-related part*/
+ dv1 = D1_ROF[index] - D1_ROF[(dimX*dimY)*k + j_m*dimX+i];
+ dv2 = D2_ROF[index] - D2_ROF[(dimX*dimY)*k + j*dimX+i_m];
+ dv3 = D3_ROF[index] - D3_ROF[(dimX*dimY)*k_m + j*dimX+i];
+ div = dv1 + dv2 + dv3; /*build Divirgent*/
+
+ /*combine all into one cost function to minimise */
+ U[index] += tau*(2.0f*lambdaROF*(div) - lambdaLLT*(laplc) - (U[index] - U0[index]));
+ }
+}
+
+/*******************************************************************/
+/************************ HOST FUNCTION ****************************/
+/*******************************************************************/
+
+extern "C" int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z)
+{
+ // set up device
+ int dev = 0;
+ int DimTotal;
+ DimTotal = N*M*Z;
+ CHECK(cudaSetDevice(dev));
+ float *d_input, *d_update;
+ float *D1_LLT=NULL, *D2_LLT=NULL, *D3_LLT=NULL, *D1_ROF=NULL, *D2_ROF=NULL, *D3_ROF=NULL;
+
+ if (Z == 0) {Z = 1;}
+
+ CHECK(cudaMalloc((void**)&d_input,DimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&d_update,DimTotal*sizeof(float)));
+
+ CHECK(cudaMalloc((void**)&D1_LLT,DimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&D2_LLT,DimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&D3_LLT,DimTotal*sizeof(float)));
+
+ CHECK(cudaMalloc((void**)&D1_ROF,DimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&D2_ROF,DimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&D3_ROF,DimTotal*sizeof(float)));
+
+ CHECK(cudaMemcpy(d_input,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
+ CHECK(cudaMemcpy(d_update,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
+
+ if (Z == 1) {
+ // TV - 2D case
+ dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+ dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D));
+
+ for(int n=0; n < iterationsNumb; n++) {
+ /****************ROF******************/
+ /* calculate first-order differences */
+ D1_func2D_ROF_kernel<<<dimGrid,dimBlock>>>(d_update, D1_ROF, N, M);
+ CHECK(cudaDeviceSynchronize());
+ D2_func2D_ROF_kernel<<<dimGrid,dimBlock>>>(d_update, D2_ROF, N, M);
+ CHECK(cudaDeviceSynchronize());
+ /****************LLT******************/
+ /* estimate second-order derrivatives */
+ der2D_LLT_kernel<<<dimGrid,dimBlock>>>(d_update, D1_LLT, D2_LLT, N, M);
+ /* Joint update for ROF and LLT models */
+ Update2D_LLT_ROF_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, D1_LLT, D2_LLT, D1_ROF, D2_ROF, lambdaROF, lambdaLLT, tau, N, M);
+ CHECK(cudaDeviceSynchronize());
+ }
+ }
+ else {
+ // 3D case
+ dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+ dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE));
+
+ for(int n=0; n < iterationsNumb; n++) {
+ /****************ROF******************/
+ /* calculate first-order differences */
+ D1_func3D_ROF_kernel<<<dimGrid,dimBlock>>>(d_update, D1_ROF, N, M, Z);
+ CHECK(cudaDeviceSynchronize());
+ D2_func3D_ROF_kernel<<<dimGrid,dimBlock>>>(d_update, D2_ROF, N, M, Z);
+ CHECK(cudaDeviceSynchronize());
+ D3_func3D_ROF_kernel<<<dimGrid,dimBlock>>>(d_update, D3_ROF, N, M, Z);
+ CHECK(cudaDeviceSynchronize());
+ /****************LLT******************/
+ /* estimate second-order derrivatives */
+ der3D_LLT_kernel<<<dimGrid,dimBlock>>>(d_update, D1_LLT, D2_LLT, D3_LLT, N, M, Z);
+ /* Joint update for ROF and LLT models */
+ Update3D_LLT_ROF_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, D1_LLT, D2_LLT, D3_LLT, D1_ROF, D2_ROF, D3_ROF, lambdaROF, lambdaLLT, tau, N, M, Z);
+ CHECK(cudaDeviceSynchronize());
+ }
+ }
+ CHECK(cudaMemcpy(Output,d_update,DimTotal*sizeof(float),cudaMemcpyDeviceToHost));
+ CHECK(cudaFree(d_input));
+ CHECK(cudaFree(d_update));
+ CHECK(cudaFree(D1_LLT));
+ CHECK(cudaFree(D2_LLT));
+ CHECK(cudaFree(D3_LLT));
+ CHECK(cudaFree(D1_ROF));
+ CHECK(cudaFree(D2_ROF));
+ CHECK(cudaFree(D3_ROF));
+ return 0;
+}
diff --git a/src/Core/regularisers_GPU/LLT_ROF_GPU_core.h b/src/Core/regularisers_GPU/LLT_ROF_GPU_core.h
new file mode 100644
index 0000000..a6bfcc7
--- /dev/null
+++ b/src/Core/regularisers_GPU/LLT_ROF_GPU_core.h
@@ -0,0 +1,8 @@
+#ifndef __ROFLLTGPU_H__
+#define __ROFLLTGPU_H__
+#include "CCPiDefines.h"
+#include <stdio.h>
+
+extern "C" CCPI_EXPORT int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z);
+
+#endif
diff --git a/src/Core/regularisers_GPU/NonlDiff_GPU_core.cu b/src/Core/regularisers_GPU/NonlDiff_GPU_core.cu
new file mode 100644
index 0000000..ff7ce4d
--- /dev/null
+++ b/src/Core/regularisers_GPU/NonlDiff_GPU_core.cu
@@ -0,0 +1,345 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "NonlDiff_GPU_core.h"
+#include "shared.h"
+
+/* CUDA implementation of linear and nonlinear diffusion with the regularisation model [1,2] (2D/3D case)
+ * The minimisation is performed using explicit scheme.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambda - regularization parameter
+ * 3. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended
+ * 5. tau - time-marching step for explicit scheme
+ * 6. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
+ *
+ * Output:
+ * [1] Regularized image/volume
+ *
+ * This function is based on the paper by
+ * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
+ * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
+ */
+
+
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+#define EPS 1.0e-5
+
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+__host__ __device__ int signNDF (float x)
+{
+ return (x > 0) - (x < 0);
+}
+
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+__global__ void LinearDiff2D_kernel(float *Input, float *Output, float lambdaPar, float tau, int N, int M)
+ {
+ int i1,i2,j1,j2;
+ float e,w,n,s,e1,w1,n1,s1;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + N*j;
+
+ if ((i >= 0) && (i < N) && (j >= 0) && (j < M)) {
+
+ /* boundary conditions (Neumann reflections) */
+ i1 = i+1; if (i1 == N) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ j1 = j+1; if (j1 == M) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+
+ e = Output[j*N+i1];
+ w = Output[j*N+i2];
+ n = Output[j1*N+i];
+ s = Output[j2*N+i];
+
+ e1 = e - Output[index];
+ w1 = w - Output[index];
+ n1 = n - Output[index];
+ s1 = s - Output[index];
+
+ Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index]));
+ }
+ }
+
+ __global__ void NonLinearDiff2D_kernel(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, int N, int M)
+ {
+ int i1,i2,j1,j2;
+ float e,w,n,s,e1,w1,n1,s1;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + N*j;
+
+ if ((i >= 0) && (i < N) && (j >= 0) && (j < M)) {
+
+ /* boundary conditions (Neumann reflections) */
+ i1 = i+1; if (i1 == N) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ j1 = j+1; if (j1 == M) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+
+ e = Output[j*N+i1];
+ w = Output[j*N+i2];
+ n = Output[j1*N+i];
+ s = Output[j2*N+i];
+
+ e1 = e - Output[index];
+ w1 = w - Output[index];
+ n1 = n - Output[index];
+ s1 = s - Output[index];
+
+ if (penaltytype == 1){
+ /* Huber penalty */
+ if (abs(e1) > sigmaPar) e1 = signNDF(e1);
+ else e1 = e1/sigmaPar;
+
+ if (abs(w1) > sigmaPar) w1 = signNDF(w1);
+ else w1 = w1/sigmaPar;
+
+ if (abs(n1) > sigmaPar) n1 = signNDF(n1);
+ else n1 = n1/sigmaPar;
+
+ if (abs(s1) > sigmaPar) s1 = signNDF(s1);
+ else s1 = s1/sigmaPar;
+ }
+ else if (penaltytype == 2) {
+ /* Perona-Malik */
+ e1 = (e1)/(1.0f + pow((e1/sigmaPar),2));
+ w1 = (w1)/(1.0f + pow((w1/sigmaPar),2));
+ n1 = (n1)/(1.0f + pow((n1/sigmaPar),2));
+ s1 = (s1)/(1.0f + pow((s1/sigmaPar),2));
+ }
+ else if (penaltytype == 3) {
+ /* Tukey Biweight */
+ if (abs(e1) <= sigmaPar) e1 = e1*pow((1.0f - pow((e1/sigmaPar),2)), 2);
+ else e1 = 0.0f;
+ if (abs(w1) <= sigmaPar) w1 = w1*pow((1.0f - pow((w1/sigmaPar),2)), 2);
+ else w1 = 0.0f;
+ if (abs(n1) <= sigmaPar) n1 = n1*pow((1.0f - pow((n1/sigmaPar),2)), 2);
+ else n1 = 0.0f;
+ if (abs(s1) <= sigmaPar) s1 = s1*pow((1.0f - pow((s1/sigmaPar),2)), 2);
+ else s1 = 0.0f;
+ }
+ else printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
+
+ Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1) - (Output[index] - Input[index]));
+ }
+ }
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+
+__global__ void LinearDiff3D_kernel(float *Input, float *Output, float lambdaPar, float tau, int N, int M, int Z)
+ {
+ int i1,i2,j1,j2,k1,k2;
+ float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i >= 0) && (i < N) && (j >= 0) && (j < M) && (k >= 0) && (k < Z)) {
+
+ /* boundary conditions (Neumann reflections) */
+ i1 = i+1; if (i1 == N) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ j1 = j+1; if (j1 == M) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+ k1 = k+1; if (k1 == Z) k1 = k-1;
+ k2 = k-1; if (k2 < 0) k2 = k+1;
+
+ e = Output[(N*M)*k + i1 + N*j];
+ w = Output[(N*M)*k + i2 + N*j];
+ n = Output[(N*M)*k + i + N*j1];
+ s = Output[(N*M)*k + i + N*j2];
+ u = Output[(N*M)*k1 + i + N*j];
+ d = Output[(N*M)*k2 + i + N*j];
+
+ e1 = e - Output[index];
+ w1 = w - Output[index];
+ n1 = n - Output[index];
+ s1 = s - Output[index];
+ u1 = u - Output[index];
+ d1 = d - Output[index];
+
+ Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index]));
+ }
+ }
+
+__global__ void NonLinearDiff3D_kernel(float *Input, float *Output, float lambdaPar, float sigmaPar, float tau, int penaltytype, int N, int M, int Z)
+ {
+ int i1,i2,j1,j2,k1,k2;
+ float e,w,n,s,u,d,e1,w1,n1,s1,u1,d1;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i >= 0) && (i < N) && (j >= 0) && (j < M) && (k >= 0) && (k < Z)) {
+
+ /* boundary conditions (Neumann reflections) */
+ i1 = i+1; if (i1 == N) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ j1 = j+1; if (j1 == M) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+ k1 = k+1; if (k1 == Z) k1 = k-1;
+ k2 = k-1; if (k2 < 0) k2 = k+1;
+
+ e = Output[(N*M)*k + i1 + N*j];
+ w = Output[(N*M)*k + i2 + N*j];
+ n = Output[(N*M)*k + i + N*j1];
+ s = Output[(N*M)*k + i + N*j2];
+ u = Output[(N*M)*k1 + i + N*j];
+ d = Output[(N*M)*k2 + i + N*j];
+
+ e1 = e - Output[index];
+ w1 = w - Output[index];
+ n1 = n - Output[index];
+ s1 = s - Output[index];
+ u1 = u - Output[index];
+ d1 = d - Output[index];
+
+
+ if (penaltytype == 1){
+ /* Huber penalty */
+ if (abs(e1) > sigmaPar) e1 = signNDF(e1);
+ else e1 = e1/sigmaPar;
+
+ if (abs(w1) > sigmaPar) w1 = signNDF(w1);
+ else w1 = w1/sigmaPar;
+
+ if (abs(n1) > sigmaPar) n1 = signNDF(n1);
+ else n1 = n1/sigmaPar;
+
+ if (abs(s1) > sigmaPar) s1 = signNDF(s1);
+ else s1 = s1/sigmaPar;
+
+ if (abs(u1) > sigmaPar) u1 = signNDF(u1);
+ else u1 = u1/sigmaPar;
+
+ if (abs(d1) > sigmaPar) d1 = signNDF(d1);
+ else d1 = d1/sigmaPar;
+ }
+ else if (penaltytype == 2) {
+ /* Perona-Malik */
+ e1 = (e1)/(1.0f + pow((e1/sigmaPar),2));
+ w1 = (w1)/(1.0f + pow((w1/sigmaPar),2));
+ n1 = (n1)/(1.0f + pow((n1/sigmaPar),2));
+ s1 = (s1)/(1.0f + pow((s1/sigmaPar),2));
+ u1 = (u1)/(1.0f + pow((u1/sigmaPar),2));
+ d1 = (d1)/(1.0f + pow((d1/sigmaPar),2));
+ }
+ else if (penaltytype == 3) {
+ /* Tukey Biweight */
+ if (abs(e1) <= sigmaPar) e1 = e1*pow((1.0f - pow((e1/sigmaPar),2)), 2);
+ else e1 = 0.0f;
+ if (abs(w1) <= sigmaPar) w1 = w1*pow((1.0f - pow((w1/sigmaPar),2)), 2);
+ else w1 = 0.0f;
+ if (abs(n1) <= sigmaPar) n1 = n1*pow((1.0f - pow((n1/sigmaPar),2)), 2);
+ else n1 = 0.0f;
+ if (abs(s1) <= sigmaPar) s1 = s1*pow((1.0f - pow((s1/sigmaPar),2)), 2);
+ else s1 = 0.0f;
+ if (abs(u1) <= sigmaPar) u1 = u1*pow((1.0f - pow((u1/sigmaPar),2)), 2);
+ else u1 = 0.0f;
+ if (abs(d1) <= sigmaPar) d1 = d1*pow((1.0f - pow((d1/sigmaPar),2)), 2);
+ else d1 = 0.0f;
+ }
+ else printf("%s \n", "No penalty function selected! Use 1,2 or 3.");
+
+ Output[index] += tau*(lambdaPar*(e1 + w1 + n1 + s1 + u1 + d1) - (Output[index] - Input[index]));
+ }
+ }
+
+/////////////////////////////////////////////////
+// HOST FUNCTION
+extern "C" int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z)
+{
+ // set up device
+ int dev = 0;
+ CHECK(cudaSetDevice(dev));
+ float *d_input, *d_output;
+ float sigmaPar2;
+ sigmaPar2 = sigmaPar/sqrt(2.0f);
+
+ CHECK(cudaMalloc((void**)&d_input,N*M*Z*sizeof(float)));
+ CHECK(cudaMalloc((void**)&d_output,N*M*Z*sizeof(float)));
+
+ CHECK(cudaMemcpy(d_input,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
+ CHECK(cudaMemcpy(d_output,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
+
+ if (Z == 1) {
+ /*2D case */
+
+ dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+ dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D));
+
+ for(int n=0; n < iterationsNumb; n++) {
+ if (sigmaPar == 0.0f) {
+ /* linear diffusion (heat equation) */
+ LinearDiff2D_kernel<<<dimGrid,dimBlock>>>(d_input, d_output, lambdaPar, tau, N, M);
+ CHECK(cudaDeviceSynchronize());
+ }
+ else {
+ /* nonlinear diffusion */
+ NonLinearDiff2D_kernel<<<dimGrid,dimBlock>>>(d_input, d_output, lambdaPar, sigmaPar2, tau, penaltytype, N, M);
+ CHECK(cudaDeviceSynchronize());
+ }
+ }
+ }
+ else {
+ /*3D case*/
+ dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+ dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKZSIZE));
+ for(int n=0; n < iterationsNumb; n++) {
+ if (sigmaPar == 0.0f) {
+ /* linear diffusion (heat equation) */
+ LinearDiff3D_kernel<<<dimGrid,dimBlock>>>(d_input, d_output, lambdaPar, tau, N, M, Z);
+ CHECK(cudaDeviceSynchronize());
+ }
+ else {
+ /* nonlinear diffusion */
+ NonLinearDiff3D_kernel<<<dimGrid,dimBlock>>>(d_input, d_output, lambdaPar, sigmaPar2, tau, penaltytype, N, M, Z);
+ CHECK(cudaDeviceSynchronize());
+ }
+ }
+
+ }
+ CHECK(cudaMemcpy(Output,d_output,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost));
+ CHECK(cudaFree(d_input));
+ CHECK(cudaFree(d_output));
+ //cudaDeviceReset();
+ return 0;
+}
diff --git a/src/Core/regularisers_GPU/NonlDiff_GPU_core.h b/src/Core/regularisers_GPU/NonlDiff_GPU_core.h
new file mode 100644
index 0000000..5fe457e
--- /dev/null
+++ b/src/Core/regularisers_GPU/NonlDiff_GPU_core.h
@@ -0,0 +1,8 @@
+#ifndef __NonlDiffGPU_H__
+#define __NonlDiffGPU_H__
+#include "CCPiDefines.h"
+#include <stdio.h>
+
+extern "C" CCPI_EXPORT int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z);
+
+#endif
diff --git a/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu b/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu
new file mode 100644
index 0000000..98c8488
--- /dev/null
+++ b/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu
@@ -0,0 +1,460 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd.
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "PatchSelect_GPU_core.h"
+#include "shared.h"
+
+/* CUDA implementation of non-local weight pre-calculation for non-local priors
+ * Weights and associated indices are stored into pre-allocated arrays and passed
+ * to the regulariser
+ *
+ *
+ * Input Parameters:
+ * 1. 2D grayscale image (classical 3D version will not be supported but rather 2D + dim extension (TODO))
+ * 2. Searching window (half-size of the main bigger searching window, e.g. 11)
+ * 3. Similarity window (half-size of the patch window, e.g. 2)
+ * 4. The number of neighbours to take (the most prominent after sorting neighbours will be taken)
+ * 5. noise-related parameter to calculate non-local weights
+ *
+ * Output [2D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. Weights_ij - associated weights
+ */
+
+
+#define BLKXSIZE 16
+#define BLKYSIZE 16
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+#define M_PI 3.14159265358979323846
+#define EPS 1.0e-8
+#define CONSTVECSIZE5 121
+#define CONSTVECSIZE7 225
+#define CONSTVECSIZE9 361
+#define CONSTVECSIZE11 529
+#define CONSTVECSIZE13 729
+
+__device__ void swap(float *xp, float *yp)
+{
+ float temp = *xp;
+ *xp = *yp;
+ *yp = temp;
+}
+__device__ void swapUS(unsigned short *xp, unsigned short *yp)
+{
+ unsigned short temp = *xp;
+ *xp = *yp;
+ *yp = temp;
+}
+
+/********************************************************************************/
+__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
+
+ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+ float normsum;
+
+ float Weight_Vec[CONSTVECSIZE5];
+ unsigned short ind_i[CONSTVECSIZE5];
+ unsigned short ind_j[CONSTVECSIZE5];
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ long index = i*M+j;
+
+ counter = 0;
+ for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+ for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+ i1 = i+i_m;
+ j1 = j+j_m;
+ if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+ normsum = 0.0f; counterG = 0;
+ for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+ for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+ i2 = i1 + i_c;
+ j2 = j1 + j_c;
+ i3 = i + i_c;
+ j3 = j + j_c;
+ if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
+ if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
+ normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
+ counterG++;
+ }}
+ }}
+ /* writing temporarily into vectors */
+ if (normsum > EPS) {
+ Weight_Vec[counter] = __expf(-normsum/h2);
+ ind_i[counter] = i1;
+ ind_j[counter] = j1;
+ counter++;
+ }
+ }
+ }}
+
+ /* do sorting to choose the most prominent weights [HIGH to LOW] */
+ /* and re-arrange indeces accordingly */
+ for (x = 0; x < counter-1; x++) {
+ for (y = 0; y < counter-x-1; y++) {
+ if (Weight_Vec[y] < Weight_Vec[y+1]) {
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swapUS(&ind_i[y], &ind_i[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
+ }
+ }
+ }
+ /*sorting loop finished*/
+ /*now select the NumNeighb more prominent weights and store into arrays */
+ for(x=0; x < NumNeighb; x++) {
+ index2 = (N*M*x) + index;
+ H_i_d[index2] = ind_i[x];
+ H_j_d[index2] = ind_j[x];
+ Weights_d[index2] = Weight_Vec[x];
+ }
+}
+/********************************************************************************/
+__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
+
+ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+ float normsum;
+
+ float Weight_Vec[CONSTVECSIZE7];
+ unsigned short ind_i[CONSTVECSIZE7];
+ unsigned short ind_j[CONSTVECSIZE7];
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ long index = i*M+j;
+
+ counter = 0;
+ for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+ for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+ i1 = i+i_m;
+ j1 = j+j_m;
+ if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+ normsum = 0.0f; counterG = 0;
+ for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+ for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+ i2 = i1 + i_c;
+ j2 = j1 + j_c;
+ i3 = i + i_c;
+ j3 = j + j_c;
+ if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
+ if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
+ normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
+ counterG++;
+ }}
+ }}
+ /* writing temporarily into vectors */
+ if (normsum > EPS) {
+ Weight_Vec[counter] = __expf(-normsum/h2);
+ ind_i[counter] = i1;
+ ind_j[counter] = j1;
+ counter++;
+ }
+ }
+ }}
+
+ /* do sorting to choose the most prominent weights [HIGH to LOW] */
+ /* and re-arrange indeces accordingly */
+ for (x = 0; x < counter-1; x++) {
+ for (y = 0; y < counter-x-1; y++) {
+ if (Weight_Vec[y] < Weight_Vec[y+1]) {
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swapUS(&ind_i[y], &ind_i[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
+ }
+ }
+ }
+ /*sorting loop finished*/
+ /*now select the NumNeighb more prominent weights and store into arrays */
+ for(x=0; x < NumNeighb; x++) {
+ index2 = (N*M*x) + index;
+ H_i_d[index2] = ind_i[x];
+ H_j_d[index2] = ind_j[x];
+ Weights_d[index2] = Weight_Vec[x];
+ }
+}
+__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
+
+ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+ float normsum;
+
+ float Weight_Vec[CONSTVECSIZE9];
+ unsigned short ind_i[CONSTVECSIZE9];
+ unsigned short ind_j[CONSTVECSIZE9];
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ long index = i*M+j;
+
+ counter = 0;
+ for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+ for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+ i1 = i+i_m;
+ j1 = j+j_m;
+ if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+ normsum = 0.0f; counterG = 0;
+ for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+ for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+ i2 = i1 + i_c;
+ j2 = j1 + j_c;
+ i3 = i + i_c;
+ j3 = j + j_c;
+ if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
+ if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
+ normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
+ counterG++;
+ }}
+ }}
+ /* writing temporarily into vectors */
+ if (normsum > EPS) {
+ Weight_Vec[counter] = expf(-normsum/h2);
+ ind_i[counter] = i1;
+ ind_j[counter] = j1;
+ counter++;
+ }
+ }
+ }}
+
+ /* do sorting to choose the most prominent weights [HIGH to LOW] */
+ /* and re-arrange indeces accordingly */
+ for (x = 0; x < counter-1; x++) {
+ for (y = 0; y < counter-x-1; y++) {
+ if (Weight_Vec[y] < Weight_Vec[y+1]) {
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swapUS(&ind_i[y], &ind_i[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
+ }
+ }
+ }
+ /*sorting loop finished*/
+ /*now select the NumNeighb more prominent weights and store into arrays */
+ for(x=0; x < NumNeighb; x++) {
+ index2 = (N*M*x) + index;
+ H_i_d[index2] = ind_i[x];
+ H_j_d[index2] = ind_j[x];
+ Weights_d[index2] = Weight_Vec[x];
+ }
+}
+__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
+
+ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+ float normsum;
+
+ float Weight_Vec[CONSTVECSIZE11];
+ unsigned short ind_i[CONSTVECSIZE11];
+ unsigned short ind_j[CONSTVECSIZE11];
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ long index = i*M+j;
+
+ counter = 0;
+ for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+ for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+ i1 = i+i_m;
+ j1 = j+j_m;
+ if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+ normsum = 0.0f; counterG = 0;
+ for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+ for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+ i2 = i1 + i_c;
+ j2 = j1 + j_c;
+ i3 = i + i_c;
+ j3 = j + j_c;
+ if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
+ if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
+ normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
+ counterG++;
+ }}
+ }}
+ /* writing temporarily into vectors */
+ if (normsum > EPS) {
+ Weight_Vec[counter] = __expf(-normsum/h2);
+ ind_i[counter] = i1;
+ ind_j[counter] = j1;
+ counter++;
+ }
+ }
+ }}
+
+ /* do sorting to choose the most prominent weights [HIGH to LOW] */
+ /* and re-arrange indeces accordingly */
+ for (x = 0; x < counter-1; x++) {
+ for (y = 0; y < counter-x-1; y++) {
+ if (Weight_Vec[y] < Weight_Vec[y+1]) {
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swapUS(&ind_i[y], &ind_i[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
+ }
+ }
+ }
+ /*sorting loop finished*/
+ /*now select the NumNeighb more prominent weights and store into arrays */
+ for(x=0; x < NumNeighb; x++) {
+ index2 = (N*M*x) + index;
+ H_i_d[index2] = ind_i[x];
+ H_j_d[index2] = ind_j[x];
+ Weights_d[index2] = Weight_Vec[x];
+ }
+}
+__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
+
+ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2;
+ float normsum;
+
+ float Weight_Vec[CONSTVECSIZE13];
+ unsigned short ind_i[CONSTVECSIZE13];
+ unsigned short ind_j[CONSTVECSIZE13];
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ long index = i*M+j;
+
+ counter = 0;
+ for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) {
+ for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) {
+ i1 = i+i_m;
+ j1 = j+j_m;
+ if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) {
+ normsum = 0.0f; counterG = 0;
+ for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) {
+ for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) {
+ i2 = i1 + i_c;
+ j2 = j1 + j_c;
+ i3 = i + i_c;
+ j3 = j + j_c;
+ if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) {
+ if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) {
+ normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2);
+ counterG++;
+ }}
+ }}
+ /* writing temporarily into vectors */
+ if (normsum > EPS) {
+ Weight_Vec[counter] = __expf(-normsum/h2);
+ ind_i[counter] = i1;
+ ind_j[counter] = j1;
+ counter++;
+ }
+ }
+ }}
+
+ /* do sorting to choose the most prominent weights [HIGH to LOW] */
+ /* and re-arrange indeces accordingly */
+ for (x = 0; x < counter-1; x++) {
+ for (y = 0; y < counter-x-1; y++) {
+ if (Weight_Vec[y] < Weight_Vec[y+1]) {
+ swap(&Weight_Vec[y], &Weight_Vec[y+1]);
+ swapUS(&ind_i[y], &ind_i[y+1]);
+ swapUS(&ind_j[y], &ind_j[y+1]);
+ }
+ }
+ }
+ /*sorting loop finished*/
+ /*now select the NumNeighb more prominent weights and store into arrays */
+ for(x=0; x < NumNeighb; x++) {
+ index2 = (N*M*x) + index;
+ H_i_d[index2] = ind_i[x];
+ H_j_d[index2] = ind_j[x];
+ Weights_d[index2] = Weight_Vec[x];
+ }
+}
+
+
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+/********************* MAIN HOST FUNCTION ******************/
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+extern "C" int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h)
+{
+ int deviceCount = -1; // number of devices
+ cudaGetDeviceCount(&deviceCount);
+ if (deviceCount == 0) {
+ fprintf(stderr, "No CUDA devices found\n");
+ return -1;
+ }
+
+ int SearchW_full, SimilW_full, counterG, i, j;
+ float *Ad, *Weights_d, h2, *Eucl_Vec, *Eucl_Vec_d;
+ unsigned short *H_i_d, *H_j_d;
+ h2 = h*h;
+
+ dim3 dimBlock(BLKXSIZE,BLKYSIZE);
+ dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));
+
+ SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window size */
+ SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1); /* the full similarity window size */
+
+ /* generate a 2D Gaussian kernel for NLM procedure */
+ Eucl_Vec = (float*) calloc (SimilW_full,sizeof(float));
+ counterG = 0;
+ for(i=-SimilarWin; i<=SimilarWin; i++) {
+ for(j=-SimilarWin; j<=SimilarWin; j++) {
+ Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2))/(2.0*SimilarWin*SimilarWin));
+ counterG++;
+ }} /*main neighb loop */
+
+
+ /*allocate space on the device*/
+ checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&H_i_d, N*M*NumNeighb*sizeof(unsigned short)) );
+ checkCudaErrors( cudaMalloc((void**)&H_j_d, N*M*NumNeighb*sizeof(unsigned short)) );
+ checkCudaErrors( cudaMalloc((void**)&Weights_d, N*M*NumNeighb*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d, SimilW_full*sizeof(float)) );
+
+ /* copy data from the host to the device */
+ checkCudaErrors( cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice) );
+ checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );
+
+ /********************** Run CUDA kernel here ********************/
+ if (SearchWindow == 5) IndexSelect2D_5_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
+ else if (SearchWindow == 7) IndexSelect2D_7_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
+ else if (SearchWindow == 9) IndexSelect2D_9_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
+ else if (SearchWindow == 11) IndexSelect2D_11_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
+ else if (SearchWindow == 13) IndexSelect2D_13_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2);
+ else {
+ fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n");
+ return -1;}
+ checkCudaErrors(cudaPeekAtLastError() );
+ checkCudaErrors(cudaDeviceSynchronize());
+ /***************************************************************/
+
+ checkCudaErrors(cudaMemcpy(H_i, H_i_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) );
+ checkCudaErrors(cudaMemcpy(H_j, H_j_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) );
+ checkCudaErrors(cudaMemcpy(Weights, Weights_d, N*M*NumNeighb*sizeof(float),cudaMemcpyDeviceToHost) );
+
+
+ cudaFree(Ad);
+ cudaFree(H_i_d);
+ cudaFree(H_j_d);
+ cudaFree(Weights_d);
+ cudaFree(Eucl_Vec_d);
+ cudaDeviceReset();
+ return 0;
+}
diff --git a/src/Core/regularisers_GPU/PatchSelect_GPU_core.h b/src/Core/regularisers_GPU/PatchSelect_GPU_core.h
new file mode 100644
index 0000000..8c124d3
--- /dev/null
+++ b/src/Core/regularisers_GPU/PatchSelect_GPU_core.h
@@ -0,0 +1,8 @@
+#ifndef __NLREG_KERNELS_H_
+#define __NLREG_KERNELS_H_
+#include "CCPiDefines.h"
+#include <stdio.h>
+
+extern "C" CCPI_EXPORT int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h);
+
+#endif
diff --git a/src/Core/regularisers_GPU/TGV_GPU_core.cu b/src/Core/regularisers_GPU/TGV_GPU_core.cu
new file mode 100644
index 0000000..e4abf72
--- /dev/null
+++ b/src/Core/regularisers_GPU/TGV_GPU_core.cu
@@ -0,0 +1,728 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2019 Daniil Kazantsev
+Copyright 2019 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "TGV_GPU_core.h"
+#include "shared.h"
+
+/* CUDA implementation of Primal-Dual denoising method for
+ * Total Generilized Variation (TGV)-L2 model [1] (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume (2D/3D)
+ * 2. lambda - regularisation parameter
+ * 3. parameter to control the first-order term (alpha1)
+ * 4. parameter to control the second-order term (alpha0)
+ * 5. Number of Chambolle-Pock (Primal-Dual) iterations
+ * 6. Lipshitz constant (default is 12)
+ *
+ * Output:
+ * Filtered/regularised image
+ *
+ * References:
+ * [1] K. Bredies "Total Generalized Variation"
+ */
+
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+
+#define BLKXSIZE2D 8
+#define BLKYSIZE2D 8
+#define EPS 1.0e-7
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+
+
+/********************************************************************/
+/***************************2D Functions*****************************/
+/********************************************************************/
+__global__ void DualP_2D_kernel(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, float sigma)
+{
+ int num_total = dimX*dimY;
+ const int i = blockDim.x * blockIdx.x + threadIdx.x;
+ const int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + dimX*j;
+
+ if (index < num_total) {
+ /* symmetric boundary conditions (Neuman) */
+ if ((i >= 0) && (i < dimX-1)) P1[index] += sigma*((U[(i+1) + dimX*j] - U[index]) - V1[index]);
+ else P1[index] += sigma*(-V1[index]);
+ if ((j >= 0) && (j < dimY-1)) P2[index] += sigma*((U[i + dimX*(j+1)] - U[index]) - V2[index]);
+ else P2[index] += sigma*(-V2[index]);
+ }
+ return;
+}
+
+__global__ void ProjP_2D_kernel(float *P1, float *P2, int dimX, int dimY, float alpha1)
+{
+ float grad_magn;
+ int num_total = dimX*dimY;
+
+ const int i = blockDim.x * blockIdx.x + threadIdx.x;
+ const int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + dimX*j;
+
+ if (index < num_total) {
+ grad_magn = sqrtf(pow(P1[index],2) + pow(P2[index],2));
+ grad_magn = grad_magn/alpha1;
+ if (grad_magn > 1.0f) {
+ P1[index] /= grad_magn;
+ P2[index] /= grad_magn;
+ }
+ }
+ return;
+}
+
+__global__ void DualQ_2D_kernel(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float sigma)
+{
+ float q1, q2, q11, q22;
+ int num_total = dimX*dimY;
+
+ const int i = blockDim.x * blockIdx.x + threadIdx.x;
+ const int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + dimX*j;
+
+ if (index < num_total) {
+ q1 = 0.0f; q2 = 0.0f; q11 = 0.0f; q22 = 0.0f;
+
+ if ((i >= 0) && (i < dimX-1)) {
+ /* boundary conditions (Neuman) */
+ q1 = V1[(i+1) + dimX*j] - V1[index];
+ q11 = V2[(i+1) + dimX*j] - V2[index];
+ }
+ if ((j >= 0) && (j < dimY-1)) {
+ q2 = V2[i + dimX*(j+1)] - V2[index];
+ q22 = V1[i + dimX*(j+1)] - V1[index];
+ }
+
+ Q1[index] += sigma*(q1);
+ Q2[index] += sigma*(q2);
+ Q3[index] += sigma*(0.5f*(q11 + q22));
+ }
+ return;
+}
+
+__global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, int dimX, int dimY, float alpha0)
+{
+ float grad_magn;
+ int num_total = dimX*dimY;
+
+ const int i = blockDim.x * blockIdx.x + threadIdx.x;
+ const int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + dimX*j;
+
+ if (index < num_total) {
+ grad_magn = sqrt(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2));
+ grad_magn = grad_magn/alpha0;
+ if (grad_magn > 1.0f) {
+ Q1[index] /= grad_magn;
+ Q2[index] /= grad_magn;
+ Q3[index] /= grad_magn;
+ }
+ }
+ return;
+}
+
+__global__ void DivProjP_2D_kernel(float *U, float *U0, float *P1, float *P2, int dimX, int dimY, float lambda, float tau)
+{
+ float P_v1, P_v2, div;
+ int num_total = dimX*dimY;
+
+ const int i = blockDim.x * blockIdx.x + threadIdx.x;
+ const int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + dimX*j;
+
+ if (index < num_total) {
+ P_v1 = 0.0f; P_v2 = 0.0f;
+
+ if (i == 0) P_v1 = P1[index];
+ if (i == dimX-1) P_v1 = -P1[(i-1) + dimX*j];
+ if ((i > 0) && (i < dimX-1)) P_v1 = P1[index] - P1[(i-1) + dimX*j];
+
+ if (j == 0) P_v2 = P2[index];
+ if (j == dimY-1) P_v2 = -P2[i + dimX*(j-1)];
+ if ((j > 0) && (j < dimY-1)) P_v2 = P2[index] - P2[i + dimX*(j-1)];
+
+ div = P_v1 + P_v2;
+ U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
+ }
+ return;
+}
+
+__global__ void UpdV_2D_kernel(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float tau)
+{
+ float q1, q3_x, q2, q3_y, div1, div2;
+ int num_total = dimX*dimY;
+ int i1, j1;
+
+ const int i = blockDim.x * blockIdx.x + threadIdx.x;
+ const int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + dimX*j;
+
+ if (index < num_total) {
+
+ i1 = (i-1) + dimX*j;
+ j1 = (i) + dimX*(j-1);
+
+ /* boundary conditions (Neuman) */
+ if ((i > 0) && (i < dimX-1)) {
+ q1 = Q1[index] - Q1[i1];
+ q3_x = Q3[index] - Q3[i1]; }
+ else if (i == 0) {
+ q1 = Q1[index];
+ q3_x = Q3[index]; }
+ else if (i == dimX-1) {
+ q1 = -Q1[i1];
+ q3_x = -Q3[i1]; }
+ else {
+ q1 = 0.0f;
+ q3_x = 0.0f;
+ }
+
+ if ((j > 0) && (j < dimY-1)) {
+ q2 = Q2[index] - Q2[j1];
+ q3_y = Q3[index] - Q3[j1]; }
+ else if (j == dimY-1) {
+ q2 = -Q2[j1];
+ q3_y = -Q3[j1]; }
+ else if (j == 0) {
+ q2 = Q2[index];
+ q3_y = Q3[index]; }
+ else {
+ q2 = 0.0f;
+ q3_y = 0.0f;
+ }
+
+ div1 = q1 + q3_y;
+ div2 = q3_x + q2;
+ V1[index] += tau*(P1[index] + div1);
+ V2[index] += tau*(P2[index] + div2);
+ }
+ return;
+}
+
+__global__ void copyIm_TGV_kernel(float *U, float *U_old, int N, int M, int num_total)
+{
+ int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+ int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if (index < num_total) {
+ U_old[index] = U[index];
+ }
+}
+
+__global__ void copyIm_TGV_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, int N, int M, int num_total)
+{
+ int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+ int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if (index < num_total) {
+ V1_old[index] = V1[index];
+ V2_old[index] = V2[index];
+ }
+}
+
+__global__ void newU_kernel(float *U, float *U_old, int N, int M, int num_total)
+{
+ int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+ int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if (index < num_total) {
+ U[index] = 2.0f*U[index] - U_old[index];
+ }
+}
+
+
+__global__ void newU_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_old, int N, int M, int num_total)
+{
+ int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+ int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if (index < num_total) {
+ V1[index] = 2.0f*V1[index] - V1_old[index];
+ V2[index] = 2.0f*V2[index] - V2_old[index];
+ }
+}
+/********************************************************************/
+/***************************3D Functions*****************************/
+/********************************************************************/
+__global__ void DualP_3D_kernel(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float sigma)
+{
+ int index;
+ const int i = blockDim.x * blockIdx.x + threadIdx.x;
+ const int j = blockDim.y * blockIdx.y + threadIdx.y;
+ const int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int num_total = dimX*dimY*dimZ;
+
+ index = (dimX*dimY)*k + i*dimX+j;
+ if (index < num_total) {
+ /* symmetric boundary conditions (Neuman) */
+ if ((i >= 0) && (i < dimX-1)) P1[index] += sigma*((U[(dimX*dimY)*k + (i+1)*dimX+j] - U[index]) - V1[index]);
+ else P1[index] += sigma*(-V1[index]);
+ if ((j >= 0) && (j < dimY-1)) P2[index] += sigma*((U[(dimX*dimY)*k + i*dimX+(j+1)] - U[index]) - V2[index]);
+ else P2[index] += sigma*(-V2[index]);
+ if ((k >= 0) && (k < dimZ-1)) P3[index] += sigma*((U[(dimX*dimY)*(k+1) + i*dimX+(j)] - U[index]) - V3[index]);
+ else P3[index] += sigma*(-V3[index]);
+ }
+ return;
+}
+
+__global__ void ProjP_3D_kernel(float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float alpha1)
+{
+ float grad_magn;
+ int index;
+ int num_total = dimX*dimY*dimZ;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ index = (dimX*dimY)*k + i*dimX+j;
+ if (index < num_total) {
+ grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2)))/alpha1;
+ if (grad_magn > 1.0f) {
+ P1[index] /= grad_magn;
+ P2[index] /= grad_magn;
+ P3[index] /= grad_magn;
+ }
+ }
+ return;
+}
+
+__global__ void DualQ_3D_kernel(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float sigma)
+{
+ int index;
+ float q1, q2, q3, q11, q22, q33, q44, q55, q66;
+
+ int num_total = dimX*dimY*dimZ;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ index = (dimX*dimY)*k + i*dimX+j;
+ int i1 = (dimX*dimY)*k + (i+1)*dimX+j;
+ int j1 = (dimX*dimY)*k + (i)*dimX+(j+1);
+ int k1 = (dimX*dimY)*(k+1) + (i)*dimX+(j);
+
+ if (index < num_total) {
+ q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f;
+
+ /* boundary conditions (Neuman) */
+ if ((i >= 0) && (i < dimX-1)) {
+ q1 = V1[i1] - V1[index];
+ q11 = V2[i1] - V2[index];
+ q33 = V3[i1] - V3[index]; }
+ if ((j >= 0) && (j < dimY-1)) {
+ q2 = V2[j1] - V2[index];
+ q22 = V1[j1] - V1[index];
+ q55 = V3[j1] - V3[index]; }
+ if ((k >= 0) && (k < dimZ-1)) {
+ q3 = V3[k1] - V3[index];
+ q44 = V1[k1] - V1[index];
+ q66 = V2[k1] - V2[index]; }
+
+ Q1[index] += sigma*(q1); /*Q11*/
+ Q2[index] += sigma*(q2); /*Q22*/
+ Q3[index] += sigma*(q3); /*Q33*/
+ Q4[index] += sigma*(0.5f*(q11 + q22)); /* Q21 / Q12 */
+ Q5[index] += sigma*(0.5f*(q33 + q44)); /* Q31 / Q13 */
+ Q6[index] += sigma*(0.5f*(q55 + q66)); /* Q32 / Q23 */
+ }
+ return;
+}
+
+__global__ void ProjQ_3D_kernel(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float alpha0)
+{
+ float grad_magn;
+ int index;
+ int num_total = dimX*dimY*dimZ;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ index = (dimX*dimY)*k + i*dimX+j;
+
+ if (index < num_total) {
+ grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + pow(Q3[index],2) + 2.0f*pow(Q4[index],2) + 2.0f*pow(Q5[index],2) + 2.0f*pow(Q6[index],2));
+ grad_magn = grad_magn/alpha0;
+ if (grad_magn > 1.0f) {
+ Q1[index] /= grad_magn;
+ Q2[index] /= grad_magn;
+ Q3[index] /= grad_magn;
+ Q4[index] /= grad_magn;
+ Q5[index] /= grad_magn;
+ Q6[index] /= grad_magn;
+ }
+ }
+ return;
+}
+__global__ void DivProjP_3D_kernel(float *U, float *U0, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float lambda, float tau)
+{
+ float P_v1, P_v2, P_v3, div;
+ int index;
+ int num_total = dimX*dimY*dimZ;
+
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ index = (dimX*dimY)*k + i*dimX+j;
+ int i1 = (dimX*dimY)*k + (i-1)*dimX+j;
+ int j1 = (dimX*dimY)*k + (i)*dimX+(j-1);
+ int k1 = (dimX*dimY)*(k-1) + (i)*dimX+(j);
+
+ if (index < num_total) {
+ P_v1 = 0.0f; P_v2 = 0.0f; P_v3 = 0.0f;
+
+ if (i == 0) P_v1 = P1[index];
+ if (i == dimX-1) P_v1 = -P1[i1];
+ if ((i > 0) && (i < dimX-1)) P_v1 = P1[index] - P1[i1];
+
+ if (j == 0) P_v2 = P2[index];
+ if (j == dimY-1) P_v2 = -P2[j1];
+ if ((j > 0) && (j < dimY-1)) P_v2 = P2[index] - P2[j1];
+
+ if (k == 0) P_v3 = P3[index];
+ if (k == dimZ-1) P_v3 = -P3[k1];
+ if ((k > 0) && (k < dimZ-1)) P_v3 = P3[index] - P3[k1];
+
+
+ div = P_v1 + P_v2 + P_v3;
+ U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau);
+ }
+ return;
+}
+__global__ void UpdV_3D_kernel(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float tau)
+{
+ float q1, q4x, q5x, q2, q4y, q6y, q6z, q5z, q3, div1, div2, div3;
+ int index;
+ int num_total = dimX*dimY*dimZ;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ index = (dimX*dimY)*k + i*dimX+j;
+ int i1 = (dimX*dimY)*k + (i-1)*dimX+j;
+ int j1 = (dimX*dimY)*k + (i)*dimX+(j-1);
+ int k1 = (dimX*dimY)*(k-1) + (i)*dimX+(j);
+
+ /* Q1 - Q11, Q2 - Q22, Q3 - Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/
+ if (index < num_total) {
+
+ /* boundary conditions (Neuman) */
+ if ((i > 0) && (i < dimX-1)) {
+ q1 = Q1[index] - Q1[i1];
+ q4x = Q4[index] - Q4[i1];
+ q5x = Q5[index] - Q5[i1]; }
+ else if (i == 0) {
+ q1 = Q1[index];
+ q4x = Q4[index];
+ q5x = Q5[index]; }
+ else if (i == dimX-1) {
+ q1 = -Q1[i1];
+ q4x = -Q4[i1];
+ q5x = -Q5[i1]; }
+ else {
+ q1 = 0.0f;
+ q4x = 0.0f;
+ q5x = 0.0f; }
+
+ if ((j > 0) && (j < dimY-1)) {
+ q2 = Q2[index] - Q2[j1];
+ q4y = Q4[index] - Q4[j1];
+ q6y = Q6[index] - Q6[j1]; }
+ else if (j == dimY-1) {
+ q2 = -Q2[j1];
+ q4y = -Q4[j1];
+ q6y = -Q6[j1]; }
+ else if (j == 0) {
+ q2 = Q2[index];
+ q4y = Q4[index];
+ q6y = Q6[index]; }
+ else {
+ q2 = 0.0f;
+ q4y = 0.0f;
+ q6y = 0.0f;
+ }
+
+ if ((k > 0) && (k < dimZ-1)) {
+ q6z = Q6[index] - Q6[k1];
+ q5z = Q5[index] - Q5[k1];
+ q3 = Q3[index] - Q3[k1]; }
+ else if (k == dimZ-1) {
+ q6z = -Q6[k1];
+ q5z = -Q5[k1];
+ q3 = -Q3[k1]; }
+ else if (k == 0) {
+ q6z = Q6[index];
+ q5z = Q5[index];
+ q3 = Q3[index]; }
+ else {
+ q6z = 0.0f;
+ q5z = 0.0f;
+ q3 = 0.0f; }
+
+ div1 = q1 + q4y + q5z;
+ div2 = q4x + q2 + q6z;
+ div3 = q5x + q6y + q3;
+
+ V1[index] += tau*(P1[index] + div1);
+ V2[index] += tau*(P2[index] + div2);
+ V3[index] += tau*(P3[index] + div3);
+ }
+ return;
+}
+
+__global__ void copyIm_TGV_kernel3D(float *U, float *U_old, int dimX, int dimY, int dimZ, int num_total)
+{
+ int index;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ if (index < num_total) {
+ U_old[index] = U[index];
+ }
+}
+
+__global__ void copyIm_TGV_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, int dimX, int dimY, int dimZ, int num_total)
+{
+ int index;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ if (index < num_total) {
+ V1_old[index] = V1[index];
+ V2_old[index] = V2[index];
+ V3_old[index] = V3[index];
+ }
+}
+
+__global__ void newU_kernel3D(float *U, float *U_old, int dimX, int dimY, int dimZ, int num_total)
+{
+ int index;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ if (index < num_total) {
+ U[index] = 2.0f*U[index] - U_old[index];
+ }
+}
+
+__global__ void newU_kernel3D_ar3(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, int dimX, int dimY, int dimZ, int num_total)
+{
+ int index;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ index = (dimX*dimY)*k + j*dimX+i;
+
+ if (index < num_total) {
+ V1[index] = 2.0f*V1[index] - V1_old[index];
+ V2[index] = 2.0f*V2[index] - V2_old[index];
+ V3[index] = 2.0f*V3[index] - V3_old[index];
+ }
+}
+
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+/************************ MAIN HOST FUNCTION ***********************/
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ)
+{
+ int dimTotal, dev = 0;
+ CHECK(cudaSetDevice(dev));
+
+ dimTotal = dimX*dimY*dimZ;
+
+ float *U_old, *d_U0, *d_U, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, tau, sigma;
+ tau = pow(L2,-0.5);
+ sigma = pow(L2,-0.5);
+
+ CHECK(cudaMalloc((void**)&d_U0,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&d_U,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&U_old,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&P1,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&P2,dimTotal*sizeof(float)));
+
+ CHECK(cudaMalloc((void**)&Q1,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&Q2,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&Q3,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&V1,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&V2,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&V1_old,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&V2_old,dimTotal*sizeof(float)));
+
+ CHECK(cudaMemcpy(d_U0,U0,dimTotal*sizeof(float),cudaMemcpyHostToDevice));
+ CHECK(cudaMemcpy(d_U,U0,dimTotal*sizeof(float),cudaMemcpyHostToDevice));
+ cudaMemset(P1, 0, dimTotal*sizeof(float));
+ cudaMemset(P2, 0, dimTotal*sizeof(float));
+ cudaMemset(Q1, 0, dimTotal*sizeof(float));
+ cudaMemset(Q2, 0, dimTotal*sizeof(float));
+ cudaMemset(Q3, 0, dimTotal*sizeof(float));
+ cudaMemset(V1, 0, dimTotal*sizeof(float));
+ cudaMemset(V2, 0, dimTotal*sizeof(float));
+
+ if (dimZ == 1) {
+ /*2D case */
+ dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+ dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
+
+ for(int n=0; n < iterationsNumb; n++) {
+
+ /* Calculate Dual Variable P */
+ DualP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, P1, P2, dimX, dimY, sigma);
+ CHECK(cudaDeviceSynchronize());
+ /*Projection onto convex set for P*/
+ ProjP_2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, alpha1);
+ CHECK(cudaDeviceSynchronize());
+ /* Calculate Dual Variable Q */
+ DualQ_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, Q1, Q2, Q3, dimX, dimY, sigma);
+ CHECK(cudaDeviceSynchronize());
+ /*Projection onto convex set for Q*/
+ ProjQ_2D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, dimX, dimY, alpha0);
+ CHECK(cudaDeviceSynchronize());
+ /*saving U into U_old*/
+ copyIm_TGV_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimTotal);
+ CHECK(cudaDeviceSynchronize());
+ /*adjoint operation -> divergence and projection of P*/
+ DivProjP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, dimX, dimY, lambda, tau);
+ CHECK(cudaDeviceSynchronize());
+ /*get updated solution U*/
+ newU_kernel<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimTotal);
+ CHECK(cudaDeviceSynchronize());
+ /*saving V into V_old*/
+ copyIm_TGV_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, dimX, dimY, dimTotal);
+ CHECK(cudaDeviceSynchronize());
+ /* upd V*/
+ UpdV_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, P1, P2, Q1, Q2, Q3, dimX, dimY, tau);
+ CHECK(cudaDeviceSynchronize());
+ /*get new V*/
+ newU_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, dimX, dimY, dimTotal);
+ CHECK(cudaDeviceSynchronize());
+ }
+ }
+ else {
+ /*3D case */
+ dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+ dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKXSIZE));
+
+ float *P3, *Q4, *Q5, *Q6, *V3, *V3_old;
+
+ CHECK(cudaMalloc((void**)&P3,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&Q4,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&Q5,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&Q6,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&V3,dimTotal*sizeof(float)));
+ CHECK(cudaMalloc((void**)&V3_old,dimTotal*sizeof(float)));
+
+ cudaMemset(Q4, 0.0f, dimTotal*sizeof(float));
+ cudaMemset(Q5, 0.0f, dimTotal*sizeof(float));
+ cudaMemset(Q6, 0.0f, dimTotal*sizeof(float));
+ cudaMemset(P3, 0.0f, dimTotal*sizeof(float));
+ cudaMemset(V3, 0.0f, dimTotal*sizeof(float));
+
+ for(int n=0; n < iterationsNumb; n++) {
+
+ /* Calculate Dual Variable P */
+ DualP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, V3, P1, P2, P3, dimX, dimY, dimZ, sigma);
+ CHECK(cudaDeviceSynchronize());
+ /*Projection onto convex set for P*/
+ ProjP_3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, alpha1);
+ CHECK(cudaDeviceSynchronize());
+ /* Calculate Dual Variable Q */
+ DualQ_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, sigma);
+ CHECK(cudaDeviceSynchronize());
+ /*Projection onto convex set for Q*/
+ ProjQ_3D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, alpha0);
+ CHECK(cudaDeviceSynchronize());
+ /*saving U into U_old*/
+ copyIm_TGV_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimZ, dimTotal);
+ CHECK(cudaDeviceSynchronize());
+ /*adjoint operation -> divergence and projection of P*/
+ DivProjP_3D_kernel<<<dimGrid,dimBlock>>>(d_U, d_U0, P1, P2, P3, dimX, dimY, dimZ, lambda, tau);
+ CHECK(cudaDeviceSynchronize());
+ /*get updated solution U*/
+ newU_kernel3D<<<dimGrid,dimBlock>>>(d_U, U_old, dimX, dimY, dimZ, dimTotal);
+ CHECK(cudaDeviceSynchronize());
+ /*saving V into V_old*/
+ copyIm_TGV_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, dimX, dimY, dimZ, dimTotal);
+ CHECK(cudaDeviceSynchronize());
+ /* upd V*/
+ UpdV_3D_kernel<<<dimGrid,dimBlock>>>(V1, V2, V3, P1, P2, P3, Q1, Q2, Q3, Q4, Q5, Q6, dimX, dimY, dimZ, tau);
+ CHECK(cudaDeviceSynchronize());
+ /*get new V*/
+ newU_kernel3D_ar3<<<dimGrid,dimBlock>>>(V1, V2, V3, V1_old, V2_old, V3_old, dimX, dimY, dimZ, dimTotal);
+ CHECK(cudaDeviceSynchronize());
+ }
+
+ CHECK(cudaFree(Q4));
+ CHECK(cudaFree(Q5));
+ CHECK(cudaFree(Q6));
+ CHECK(cudaFree(P3));
+ CHECK(cudaFree(V3));
+ CHECK(cudaFree(V3_old));
+ }
+
+ CHECK(cudaMemcpy(U,d_U,dimTotal*sizeof(float),cudaMemcpyDeviceToHost));
+ CHECK(cudaFree(d_U0));
+ CHECK(cudaFree(d_U));
+ CHECK(cudaFree(U_old));
+ CHECK(cudaFree(P1));
+ CHECK(cudaFree(P2));
+
+ CHECK(cudaFree(Q1));
+ CHECK(cudaFree(Q2));
+ CHECK(cudaFree(Q3));
+ CHECK(cudaFree(V1));
+ CHECK(cudaFree(V2));
+ CHECK(cudaFree(V1_old));
+ CHECK(cudaFree(V2_old));
+ return 0;
+}
diff --git a/src/Core/regularisers_GPU/TGV_GPU_core.h b/src/Core/regularisers_GPU/TGV_GPU_core.h
new file mode 100644
index 0000000..9f73d1c
--- /dev/null
+++ b/src/Core/regularisers_GPU/TGV_GPU_core.h
@@ -0,0 +1,8 @@
+#ifndef __TGV_GPU_H__
+#define __TGV_GPU_H__
+#include "CCPiDefines.h"
+#include <stdio.h>
+
+extern "C" CCPI_EXPORT int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ);
+
+#endif
diff --git a/src/Core/regularisers_GPU/TV_FGP_GPU_core.cu b/src/Core/regularisers_GPU/TV_FGP_GPU_core.cu
new file mode 100755
index 0000000..b371c5d
--- /dev/null
+++ b/src/Core/regularisers_GPU/TV_FGP_GPU_core.cu
@@ -0,0 +1,564 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "TV_FGP_GPU_core.h"
+#include "shared.h"
+#include <thrust/device_vector.h>
+#include <thrust/transform_reduce.h>
+
+/* CUDA implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambdaPar - regularization parameter
+ * 3. Number of iterations
+ * 4. eplsilon: tolerance constant
+ * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
+ * 6. nonneg: 'nonnegativity (0 is OFF by default)
+ * 7. print information: 0 (off) or 1 (on)
+ *
+ * Output:
+ * [1] Filtered/regularized image
+ *
+ * This function is based on the Matlab's code and paper by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ */
+
+
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+struct square { __host__ __device__ float operator()(float x) { return x * x; } };
+
+/************************************************/
+/*****************2D modules*********************/
+/************************************************/
+__global__ void Obj_func2D_kernel(float *Ad, float *D, float *R1, float *R2, int N, int M, int ImSize, float lambda)
+{
+
+ float val1,val2;
+
+ //calculate each thread global index
+ const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+ const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if ((xIndex < N) && (yIndex < M)) {
+ if (xIndex <= 0) {val1 = 0.0f;} else {val1 = R1[(xIndex-1) + N*yIndex];}
+ if (yIndex <= 0) {val2 = 0.0f;} else {val2 = R2[xIndex + N*(yIndex-1)];}
+ //Write final result to global memory
+ D[index] = Ad[index] - lambda*(R1[index] + R2[index] - val1 - val2);
+ }
+ return;
+}
+
+__global__ void Grad_func2D_kernel(float *P1, float *P2, float *D, float *R1, float *R2, int N, int M, int ImSize, float multip)
+{
+
+ float val1,val2;
+
+ //calculate each thread global index
+ const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+ const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if ((xIndex < N) && (yIndex < M)) {
+
+ /* boundary conditions */
+ if (xIndex >= N-1) val1 = 0.0f; else val1 = D[index] - D[(xIndex+1) + N*yIndex];
+ if (yIndex >= M-1) val2 = 0.0f; else val2 = D[index] - D[(xIndex) + N*(yIndex + 1)];
+
+ //Write final result to global memory
+ P1[index] = R1[index] + multip*val1;
+ P2[index] = R2[index] + multip*val2;
+ }
+ return;
+}
+
+__global__ void Proj_func2D_iso_kernel(float *P1, float *P2, int N, int M, int ImSize)
+{
+
+ float denom;
+ //calculate each thread global index
+ const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+ const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if ((xIndex < N) && (yIndex < M)) {
+ denom = pow(P1[index],2) + pow(P2[index],2);
+ if (denom > 1.0f) {
+ P1[index] = P1[index]/sqrt(denom);
+ P2[index] = P2[index]/sqrt(denom);
+ }
+ }
+ return;
+}
+__global__ void Proj_func2D_aniso_kernel(float *P1, float *P2, int N, int M, int ImSize)
+{
+
+ float val1, val2;
+ //calculate each thread global index
+ const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+ const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if ((xIndex < N) && (yIndex < M)) {
+ val1 = abs(P1[index]);
+ val2 = abs(P2[index]);
+ if (val1 < 1.0f) {val1 = 1.0f;}
+ if (val2 < 1.0f) {val2 = 1.0f;}
+ P1[index] = P1[index]/val1;
+ P2[index] = P2[index]/val2;
+ }
+ return;
+}
+__global__ void Rupd_func2D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, float multip2, int N, int M, int ImSize)
+{
+ //calculate each thread global index
+ const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+ const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if ((xIndex < N) && (yIndex < M)) {
+ R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]);
+ R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]);
+ }
+ return;
+}
+__global__ void nonneg2D_kernel(float* Output, int N, int M, int num_total)
+{
+ int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+ int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if (index < num_total) {
+ if (Output[index] < 0.0f) Output[index] = 0.0f;
+ }
+}
+/************************************************/
+/*****************3D modules*********************/
+/************************************************/
+__global__ void Obj_func3D_kernel(float *Ad, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float lambda)
+{
+
+ float val1,val2,val3;
+
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ if (i <= 0) {val1 = 0.0f;} else {val1 = R1[(N*M)*(k) + (i-1) + N*j];}
+ if (j <= 0) {val2 = 0.0f;} else {val2 = R2[(N*M)*(k) + i + N*(j-1)];}
+ if (k <= 0) {val3 = 0.0f;} else {val3 = R3[(N*M)*(k-1) + i + N*j];}
+ //Write final result to global memory
+ D[index] = Ad[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3);
+ }
+ return;
+}
+
+__global__ void Grad_func3D_kernel(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float multip)
+{
+
+ float val1,val2,val3;
+
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ /* boundary conditions */
+ if (i >= N-1) val1 = 0.0f; else val1 = D[index] - D[(N*M)*(k) + (i+1) + N*j];
+ if (j >= M-1) val2 = 0.0f; else val2 = D[index] - D[(N*M)*(k) + i + N*(j+1)];
+ if (k >= Z-1) val3 = 0.0f; else val3 = D[index] - D[(N*M)*(k+1) + i + N*j];
+
+ //Write final result to global memory
+ P1[index] = R1[index] + multip*val1;
+ P2[index] = R2[index] + multip*val2;
+ P3[index] = R3[index] + multip*val3;
+ }
+ return;
+}
+
+__global__ void Proj_func3D_iso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
+{
+
+ float denom,sq_denom;
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ denom = pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2);
+
+ if (denom > 1.0f) {
+ sq_denom = 1.0f/sqrt(denom);
+ P1[index] = P1[index]*sq_denom;
+ P2[index] = P2[index]*sq_denom;
+ P3[index] = P3[index]*sq_denom;
+ }
+ }
+ return;
+}
+
+__global__ void Proj_func3D_aniso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
+{
+
+ float val1, val2, val3;
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ val1 = abs(P1[index]);
+ val2 = abs(P2[index]);
+ val3 = abs(P3[index]);
+ if (val1 < 1.0f) {val1 = 1.0f;}
+ if (val2 < 1.0f) {val2 = 1.0f;}
+ if (val3 < 1.0f) {val3 = 1.0f;}
+ P1[index] = P1[index]/val1;
+ P2[index] = P2[index]/val2;
+ P3[index] = P3[index]/val3;
+ }
+ return;
+}
+__global__ void Rupd_func3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize)
+{
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]);
+ R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]);
+ R3[index] = P3[index] + multip2*(P3[index] - P3_old[index]);
+ }
+ return;
+}
+
+__global__ void nonneg3D_kernel(float* Output, int N, int M, int Z, int num_total)
+{
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if (index < num_total) {
+ if (Output[index] < 0.0f) Output[index] = 0.0f;
+ }
+}
+__global__ void FGPcopy_kernel2D(float *Input, float* Output, int N, int M, int num_total)
+{
+ int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+ int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if (index < num_total) {
+ Output[index] = Input[index];
+ }
+}
+
+__global__ void FGPcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total)
+{
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if (index < num_total) {
+ Output[index] = Input[index];
+ }
+}
+
+__global__ void FGPResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total)
+{
+ int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+ int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if (index < num_total) {
+ Output[index] = Input1[index] - Input2[index];
+ }
+}
+
+__global__ void FGPResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total)
+{
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if (index < num_total) {
+ Output[index] = Input1[index] - Input2[index];
+ }
+}
+
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+
+////////////MAIN HOST FUNCTION ///////////////
+extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+{
+ int deviceCount = -1; // number of devices
+ cudaGetDeviceCount(&deviceCount);
+ if (deviceCount == 0) {
+ fprintf(stderr, "No CUDA devices found\n");
+ return -1;
+ }
+
+ int count = 0, i;
+ float re, multip,multip2;
+ float tk = 1.0f;
+ float tkp1=1.0f;
+
+ if (dimZ <= 1) {
+ /*2D verson*/
+ int ImSize = dimX*dimY;
+ float *d_input, *d_update=NULL, *d_update_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL;
+
+ dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+ dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
+
+ /*allocate space for images on device*/
+ checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) );
+ if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) );
+
+ checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+ cudaMemset(P1, 0, ImSize*sizeof(float));
+ cudaMemset(P2, 0, ImSize*sizeof(float));
+ cudaMemset(P1_prev, 0, ImSize*sizeof(float));
+ cudaMemset(P2_prev, 0, ImSize*sizeof(float));
+ cudaMemset(R1, 0, ImSize*sizeof(float));
+ cudaMemset(R2, 0, ImSize*sizeof(float));
+
+ /********************** Run CUDA 2D kernel here ********************/
+ multip = (1.0f/(8.0f*lambdaPar));
+
+ /* The main kernel */
+ for (i = 0; i < iter; i++) {
+
+ /* computing the gradient of the objective function */
+ Obj_func2D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambdaPar);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ if (nonneg != 0) {
+ nonneg2D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() ); }
+
+ /*Taking a step towards minus of the gradient*/
+ Grad_func2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, d_update, R1, R2, dimX, dimY, ImSize, multip);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ /* projection step */
+ if (methodTV == 0) Proj_func2D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/
+ else Proj_func2D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+ multip2 = ((tk-1.0f)/tkp1);
+
+ Rupd_func2D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ if (epsil != 0.0f) {
+ /* calculate norm - stopping rules using the Thrust library */
+ FGPResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize);
+ float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
+ thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);
+ float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
+
+ re = (reduction/reduction2);
+ if (re < epsil) count++;
+ if (count > 4) break;
+
+ FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+ }
+
+ FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ tk = tkp1;
+ }
+ if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i);
+ /***************************************************************/
+ //copy result matrix from device to host memory
+ cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost);
+
+ cudaFree(d_input);
+ cudaFree(d_update);
+ if (epsil != 0.0f) cudaFree(d_update_prev);
+ cudaFree(P1);
+ cudaFree(P2);
+ cudaFree(P1_prev);
+ cudaFree(P2_prev);
+ cudaFree(R1);
+ cudaFree(R2);
+ }
+ else {
+ /*3D verson*/
+ int ImSize = dimX*dimY*dimZ;
+ float *d_input, *d_update=NULL, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL;
+
+ dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+ dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE));
+
+ /*allocate space for images on device*/
+ checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P3,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P3_prev,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&R3,ImSize*sizeof(float)) );
+
+ checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+ cudaMemset(P1, 0, ImSize*sizeof(float));
+ cudaMemset(P2, 0, ImSize*sizeof(float));
+ cudaMemset(P3, 0, ImSize*sizeof(float));
+ cudaMemset(P1_prev, 0, ImSize*sizeof(float));
+ cudaMemset(P2_prev, 0, ImSize*sizeof(float));
+ cudaMemset(P3_prev, 0, ImSize*sizeof(float));
+ cudaMemset(R1, 0, ImSize*sizeof(float));
+ cudaMemset(R2, 0, ImSize*sizeof(float));
+ cudaMemset(R3, 0, ImSize*sizeof(float));
+ /********************** Run CUDA 3D kernel here ********************/
+ multip = (1.0f/(26.0f*lambdaPar));
+
+ /* The main kernel */
+ for (i = 0; i < iter; i++) {
+
+ /* computing the gradient of the objective function */
+ Obj_func3D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambdaPar);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ if (nonneg != 0) {
+ nonneg3D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, dimZ, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() ); }
+
+ /*Taking a step towards minus of the gradient*/
+ Grad_func3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, multip);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ /* projection step */
+ if (methodTV == 0) Proj_func3D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */
+ else Proj_func3D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+ multip2 = ((tk-1.0f)/tkp1);
+
+ Rupd_func3D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ FGPcopy_kernel3D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, dimZ, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ FGPcopy_kernel3D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, dimZ, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ FGPcopy_kernel3D<<<dimGrid,dimBlock>>>(P3, P3_prev, dimX, dimY, dimZ, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ tk = tkp1;
+ }
+ if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i);
+ /***************************************************************/
+ //copy result matrix from device to host memory
+ cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost);
+
+ cudaFree(d_input);
+ cudaFree(d_update);
+ cudaFree(P1);
+ cudaFree(P2);
+ cudaFree(P3);
+ cudaFree(P1_prev);
+ cudaFree(P2_prev);
+ cudaFree(P3_prev);
+ cudaFree(R1);
+ cudaFree(R2);
+ cudaFree(R3);
+ }
+ //cudaDeviceReset();
+ return 0;
+}
diff --git a/src/Core/regularisers_GPU/TV_FGP_GPU_core.h b/src/Core/regularisers_GPU/TV_FGP_GPU_core.h
new file mode 100755
index 0000000..bf13508
--- /dev/null
+++ b/src/Core/regularisers_GPU/TV_FGP_GPU_core.h
@@ -0,0 +1,9 @@
+#ifndef _TV_FGP_GPU_
+#define _TV_FGP_GPU_
+
+#include "CCPiDefines.h"
+#include <memory.h>
+
+extern "C" CCPI_EXPORT int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+
+#endif
diff --git a/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu b/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu
new file mode 100755
index 0000000..76f5be9
--- /dev/null
+++ b/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu
@@ -0,0 +1,358 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "TV_ROF_GPU_core.h"
+
+/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case)
+*
+* Input Parameters:
+* 1. Noisy image/volume [REQUIRED]
+* 2. lambda - regularization parameter [REQUIRED]
+* 3. tau - marching step for explicit scheme, ~0.1 is recommended [REQUIRED]
+* 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED]
+*
+* Output:
+* [1] Regularized image/volume
+
+ * This function is based on the paper by
+* [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+*
+* D. Kazantsev, 2016-18
+*/
+#include "shared.h"
+
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+#define EPS 1.0e-12
+
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+__host__ __device__ int sign (float x)
+{
+ return (x > 0) - (x < 0);
+}
+
+/*********************2D case****************************/
+
+ /* differences 1 */
+ __global__ void D1_func2D(float* Input, float* D1, int N, int M)
+ {
+ int i1, j1, i2;
+ float NOMx_1,NOMy_1,NOMy_0,denom1,denom2,T1;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + N*j;
+
+ if ((i >= 0) && (i < N) && (j >= 0) && (j < M)) {
+
+ /* boundary conditions (Neumann reflections) */
+ i1 = i + 1; if (i1 >= N) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= M) j1 = j-1;
+
+ /* Forward-backward differences */
+ NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */
+ NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */
+ NOMy_0 = Input[index] - Input[j*N + i2]; /* y- */
+
+ denom1 = NOMx_1*NOMx_1;
+ denom2 = 0.5f*(sign((float)NOMy_1) + sign((float)NOMy_0))*(MIN(abs((float)NOMy_1), abs((float)NOMy_0)));
+ denom2 = denom2*denom2;
+ T1 = sqrt(denom1 + denom2 + EPS);
+ D1[index] = NOMx_1/T1;
+ }
+ }
+
+ /* differences 2 */
+ __global__ void D2_func2D(float* Input, float* D2, int N, int M)
+ {
+ int i1, j1, j2;
+ float NOMx_1,NOMy_1,NOMx_0,denom1,denom2,T2;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + N*j;
+
+ if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) {
+
+ /* boundary conditions (Neumann reflections) */
+ i1 = i + 1; if (i1 >= N) i1 = i-1;
+ j1 = j + 1; if (j1 >= M) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = Input[j1*N + i] - Input[index]; /* x+ */
+ NOMy_1 = Input[j*N + i1] - Input[index]; /* y+ */
+ NOMx_0 = Input[index] - Input[j2*N + i]; /* x- */
+
+ denom1 = NOMy_1*NOMy_1;
+ denom2 = 0.5f*(sign((float)NOMx_1) + sign((float)NOMx_0))*(MIN(abs((float)NOMx_1), abs((float)NOMx_0)));
+ denom2 = denom2*denom2;
+ T2 = sqrt(denom1 + denom2 + EPS);
+ D2[index] = NOMy_1/T2;
+ }
+ }
+
+ __global__ void TV_kernel2D(float *D1, float *D2, float *Update, float *Input, float lambda, float tau, int N, int M)
+ {
+ int i2, j2;
+ float dv1,dv2;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = i + N*j;
+
+ if ((i >= 0) && (i < (N)) && (j >= 0) && (j < (M))) {
+
+ /* boundary conditions (Neumann reflections) */
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+
+ /* divergence components */
+ dv1 = D1[index] - D1[j2*N + i];
+ dv2 = D2[index] - D2[j*N + i2];
+
+ Update[index] += tau*(2.0f*lambda*(dv1 + dv2) - (Update[index] - Input[index]));
+
+ }
+ }
+/*********************3D case****************************/
+
+ /* differences 1 */
+ __global__ void D1_func3D(float* Input, float* D1, int dimX, int dimY, int dimZ)
+ {
+ float NOMx_1, NOMy_1, NOMy_0, NOMz_1, NOMz_0, denom1, denom2,denom3, T1;
+ int i1,i2,k1,j1,j2,k2;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (dimX*dimY)*k + j*dimX+i;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+ k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+ k2 = k - 1; if (k2 < 0) k2 = k+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = Input[(dimX*dimY)*k + j1*dimX + i] - Input[index]; /* x+ */
+ NOMy_1 = Input[(dimX*dimY)*k + j*dimX + i1] - Input[index]; /* y+ */
+ NOMy_0 = Input[index] - Input[(dimX*dimY)*k + j*dimX + i2]; /* y- */
+
+ NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
+ NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + j*dimX + i]; /* z- */
+
+
+ denom1 = NOMx_1*NOMx_1;
+ denom2 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0)));
+ denom2 = denom2*denom2;
+ denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0)));
+ denom3 = denom3*denom3;
+ T1 = sqrt(denom1 + denom2 + denom3 + EPS);
+ D1[index] = NOMx_1/T1;
+ }
+ }
+
+ /* differences 2 */
+ __global__ void D2_func3D(float* Input, float* D2, int dimX, int dimY, int dimZ)
+ {
+ float NOMx_1, NOMy_1, NOMx_0, NOMz_1, NOMz_0, denom1, denom2, denom3, T2;
+ int i1,i2,k1,j1,j2,k2;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (dimX*dimY)*k + j*dimX+i;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+ k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+ k2 = k - 1; if (k2 < 0) k2 = k+1;
+
+
+ /* Forward-backward differences */
+ NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */
+ NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */
+ NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+ NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
+ NOMz_0 = Input[index] - Input[(dimX*dimY)*k2 + (j)*dimX + i]; /* z- */
+
+
+ denom1 = NOMy_1*NOMy_1;
+ denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0)));
+ denom2 = denom2*denom2;
+ denom3 = 0.5*(sign(NOMz_1) + sign(NOMz_0))*(MIN(abs(NOMz_1),abs(NOMz_0)));
+ denom3 = denom3*denom3;
+ T2 = sqrt(denom1 + denom2 + denom3 + EPS);
+ D2[index] = NOMy_1/T2;
+ }
+ }
+
+ /* differences 3 */
+ __global__ void D3_func3D(float* Input, float* D3, int dimX, int dimY, int dimZ)
+ {
+ float NOMx_1, NOMy_1, NOMx_0, NOMy_0, NOMz_1, denom1, denom2, denom3, T3;
+ int i1,i2,k1,j1,j2,k2;
+
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (dimX*dimY)*k + j*dimX+i;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+ k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+ k2 = k - 1; if (k2 < 0) k2 = k+1;
+
+ /* Forward-backward differences */
+ NOMx_1 = Input[(dimX*dimY)*k + (j1)*dimX + i] - Input[index]; /* x+ */
+ NOMy_1 = Input[(dimX*dimY)*k + (j)*dimX + i1] - Input[index]; /* y+ */
+ NOMy_0 = Input[index] - Input[(dimX*dimY)*k + (j)*dimX + i2]; /* y- */
+ NOMx_0 = Input[index] - Input[(dimX*dimY)*k + (j2)*dimX + i]; /* x- */
+ NOMz_1 = Input[(dimX*dimY)*k1 + j*dimX + i] - Input[index]; /* z+ */
+
+ denom1 = NOMz_1*NOMz_1;
+ denom2 = 0.5*(sign(NOMx_1) + sign(NOMx_0))*(MIN(abs(NOMx_1),abs(NOMx_0)));
+ denom2 = denom2*denom2;
+ denom3 = 0.5*(sign(NOMy_1) + sign(NOMy_0))*(MIN(abs(NOMy_1),abs(NOMy_0)));
+ denom3 = denom3*denom3;
+ T3 = sqrt(denom1 + denom2 + denom3 + EPS);
+ D3[index] = NOMz_1/T3;
+ }
+ }
+
+ __global__ void TV_kernel3D(float *D1, float *D2, float *D3, float *Update, float *Input, float lambda, float tau, int dimX, int dimY, int dimZ)
+ {
+ float dv1, dv2, dv3;
+ int i1,i2,k1,j1,j2,k2;
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (dimX*dimY)*k + j*dimX+i;
+
+ if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) {
+
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i + 1; if (i1 >= dimX) i1 = i-1;
+ i2 = i - 1; if (i2 < 0) i2 = i+1;
+ j1 = j + 1; if (j1 >= dimY) j1 = j-1;
+ j2 = j - 1; if (j2 < 0) j2 = j+1;
+ k1 = k + 1; if (k1 >= dimZ) k1 = k-1;
+ k2 = k - 1; if (k2 < 0) k2 = k+1;
+
+ /*divergence components */
+ dv1 = D1[index] - D1[(dimX*dimY)*k + j2*dimX+i];
+ dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2];
+ dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i];
+
+ Update[index] += tau*(2.0f*lambda*(dv1 + dv2 + dv3) - (Update[index] - Input[index]));
+
+ }
+ }
+
+/////////////////////////////////////////////////
+// HOST FUNCTION
+extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z)
+{
+ // set up device
+ int dev = 0;
+ CHECK(cudaSetDevice(dev));
+ float *d_input, *d_update, *d_D1, *d_D2;
+
+ if (Z == 0) Z = 1;
+ CHECK(cudaMalloc((void**)&d_input,N*M*Z*sizeof(float)));
+ CHECK(cudaMalloc((void**)&d_update,N*M*Z*sizeof(float)));
+ CHECK(cudaMalloc((void**)&d_D1,N*M*Z*sizeof(float)));
+ CHECK(cudaMalloc((void**)&d_D2,N*M*Z*sizeof(float)));
+
+ CHECK(cudaMemcpy(d_input,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
+ CHECK(cudaMemcpy(d_update,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
+
+ if (Z > 1) {
+ // TV - 3D case
+ dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+ dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE));
+
+ float *d_D3;
+ CHECK(cudaMalloc((void**)&d_D3,N*M*Z*sizeof(float)));
+
+ for(int n=0; n < iter; n++) {
+ /* calculate differences */
+ D1_func3D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M, Z);
+ CHECK(cudaDeviceSynchronize());
+ D2_func3D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M, Z);
+ CHECK(cudaDeviceSynchronize());
+ D3_func3D<<<dimGrid,dimBlock>>>(d_update, d_D3, N, M, Z);
+ CHECK(cudaDeviceSynchronize());
+ /*running main kernel*/
+ TV_kernel3D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_D3, d_update, d_input, lambdaPar, tau, N, M, Z);
+ CHECK(cudaDeviceSynchronize());
+ }
+
+ CHECK(cudaFree(d_D3));
+ }
+ else {
+ // TV - 2D case
+ dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+ dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D));
+
+ for(int n=0; n < iter; n++) {
+ /* calculate differences */
+ D1_func2D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M);
+ CHECK(cudaDeviceSynchronize());
+ D2_func2D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M);
+ CHECK(cudaDeviceSynchronize());
+ /*running main kernel*/
+ TV_kernel2D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_update, d_input, lambdaPar, tau, N, M);
+ CHECK(cudaDeviceSynchronize());
+ }
+ }
+ CHECK(cudaMemcpy(Output,d_update,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost));
+ CHECK(cudaFree(d_input));
+ CHECK(cudaFree(d_update));
+ CHECK(cudaFree(d_D1));
+ CHECK(cudaFree(d_D2));
+ //cudaDeviceReset();
+ return 0;
+}
diff --git a/src/Core/regularisers_GPU/TV_ROF_GPU_core.h b/src/Core/regularisers_GPU/TV_ROF_GPU_core.h
new file mode 100755
index 0000000..3a09296
--- /dev/null
+++ b/src/Core/regularisers_GPU/TV_ROF_GPU_core.h
@@ -0,0 +1,8 @@
+#ifndef __TVGPU_H__
+#define __TVGPU_H__
+#include "CCPiDefines.h"
+#include <stdio.h>
+
+extern "C" CCPI_EXPORT int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
+
+#endif
diff --git a/src/Core/regularisers_GPU/TV_SB_GPU_core.cu b/src/Core/regularisers_GPU/TV_SB_GPU_core.cu
new file mode 100755
index 0000000..1f494ee
--- /dev/null
+++ b/src/Core/regularisers_GPU/TV_SB_GPU_core.cu
@@ -0,0 +1,552 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "TV_SB_GPU_core.h"
+#include "shared.h"
+#include <thrust/device_vector.h>
+#include <thrust/transform_reduce.h>
+
+/* CUDA implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
+*
+* Input Parameters:
+* 1. Noisy image/volume
+* 2. lambda - regularisation parameter
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon - tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+* 6. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL parameter]
+* 7. print information: 0 (off) or 1 (on) [OPTIONAL parameter]
+*
+* Output:
+* 1. Filtered/regularized image
+*
+* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
+*/
+
+// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
+
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+struct square { __host__ __device__ float operator()(float x) { return x * x; } };
+
+/************************************************/
+/*****************2D modules*********************/
+/************************************************/
+__global__ void gauss_seidel2D_kernel(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Bx, float *By, float lambda, float mu, float normConst, int N, int M, int ImSize)
+{
+
+ float sum;
+ int i1,i2,j1,j2;
+
+ //calculate each thread global index
+ const int i=blockIdx.x*blockDim.x+threadIdx.x;
+ const int j=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = j*N+i;
+
+ if ((i < N) && (j < M)) {
+ i1 = i+1; if (i1 == N) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ j1 = j+1; if (j1 == M) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+
+ sum = Dx[j*N+i2] - Dx[index] + Dy[j2*N+i] - Dy[index] - Bx[j*N+i2] + Bx[index] - By[j2*N+i] + By[index];
+ sum += U_prev[j*N+i1] + U_prev[j*N+i2] + U_prev[j1*N+i] + U_prev[j2*N+i];
+ sum *= lambda;
+ sum += mu*A[index];
+ U[index] = normConst*sum; //Write final result to global memory
+ }
+ return;
+}
+__global__ void updDxDy_shrinkAniso2D_kernel(float *U, float *Dx, float *Dy, float *Bx, float *By, float lambda, int N, int M, int ImSize)
+{
+
+ int i1,j1;
+ float val1, val11, val2, val22, denom_lam;
+ denom_lam = 1.0f/lambda;
+
+ //calculate each thread global index
+ const int i=blockIdx.x*blockDim.x+threadIdx.x;
+ const int j=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = j*N+i;
+
+ if ((i < N) && (j < M)) {
+ i1 = i+1; if (i1 == N) i1 = i-1;
+ j1 = j+1; if (j1 == M) j1 = j-1;
+
+ val1 = (U[j*N+i1] - U[index]) + Bx[index];
+ val2 = (U[j1*N+i] - U[index]) + By[index];
+
+ val11 = abs(val1) - denom_lam; if (val11 < 0) val11 = 0;
+ val22 = abs(val2) - denom_lam; if (val22 < 0) val22 = 0;
+
+ if (val1 !=0) Dx[index] = (val1/abs(val1))*val11; else Dx[index] = 0;
+ if (val2 !=0) Dy[index] = (val2/abs(val2))*val22; else Dy[index] = 0;
+ }
+ return;
+}
+
+__global__ void updDxDy_shrinkIso2D_kernel(float *U, float *Dx, float *Dy, float *Bx, float *By, float lambda, int N, int M, int ImSize)
+{
+
+ int i1,j1;
+ float val1, val11, val2, denom_lam, denom;
+ denom_lam = 1.0f/lambda;
+
+ //calculate each thread global index
+ const int i=blockIdx.x*blockDim.x+threadIdx.x;
+ const int j=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = j*N+i;
+
+ if ((i < N) && (j < M)) {
+ i1 = i+1; if (i1 == N) i1 = i-1;
+ j1 = j+1; if (j1 == M) j1 = j-1;
+
+ val1 = (U[j*N+i1] - U[index]) + Bx[index];
+ val2 = (U[j1*N+i] - U[index]) + By[index];
+
+ denom = sqrt(val1*val1 + val2*val2);
+
+ val11 = (denom - denom_lam); if (val11 < 0) val11 = 0.0f;
+
+ if (denom != 0.0f) {
+ Dx[index] = val11*(val1/denom);
+ Dy[index] = val11*(val2/denom);
+ }
+ else {
+ Dx[index] = 0;
+ Dy[index] = 0;
+ }
+ }
+ return;
+}
+
+__global__ void updBxBy2D_kernel(float *U, float *Dx, float *Dy, float *Bx, float *By, int N, int M, int ImSize)
+{
+ int i1,j1;
+
+ //calculate each thread global index
+ const int i=blockIdx.x*blockDim.x+threadIdx.x;
+ const int j=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = j*N+i;
+
+ if ((i < N) && (j < M)) {
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i+1; if (i1 == N) i1 = i-1;
+ j1 = j+1; if (j1 == M) j1 = j-1;
+
+ Bx[index] += (U[j*N+i1] - U[index]) - Dx[index];
+ By[index] += (U[j1*N+i] - U[index]) - Dy[index];
+ }
+ return;
+}
+
+
+/************************************************/
+/*****************3D modules*********************/
+/************************************************/
+__global__ void gauss_seidel3D_kernel(float *U, float *A, float *U_prev, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, float lambda, float mu, float normConst, int N, int M, int Z, int ImSize)
+{
+
+ float sum,d_val,b_val;
+ int i1,i2,j1,j2,k1,k2;
+
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ i1 = i+1; if (i1 == N) i1 = i-1;
+ i2 = i-1; if (i2 < 0) i2 = i+1;
+ j1 = j+1; if (j1 == M) j1 = j-1;
+ j2 = j-1; if (j2 < 0) j2 = j+1;
+ k1 = k+1; if (k1 == Z) k1 = k-1;
+ k2 = k-1; if (k2 < 0) k2 = k+1;
+
+ d_val = Dx[(N*M)*k + j*N+i2] - Dx[index] + Dy[(N*M)*k + j2*N+i] - Dy[index] + Dz[(N*M)*k2 + j*N+i] - Dz[index];
+ b_val = -Bx[(N*M)*k + j*N+i2] + Bx[index] - By[(N*M)*k + j2*N+i] + By[index] - Bz[(N*M)*k2 + j*N+i] + Bz[index];
+ sum = d_val + b_val;
+ sum += U_prev[(N*M)*k + j*N+i1] + U_prev[(N*M)*k + j*N+i2] + U_prev[(N*M)*k + j1*N+i] + U_prev[(N*M)*k + j2*N+i] + U_prev[(N*M)*k1 + j*N+i] + U_prev[(N*M)*k2 + j*N+i];
+ sum *= lambda;
+ sum += mu*A[index];
+ U[index] = normConst*sum;
+ }
+ return;
+}
+__global__ void updDxDy_shrinkAniso3D_kernel(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, float lambda, int N, int M, int Z, int ImSize)
+{
+
+ int i1,j1,k1;
+ float val1, val11, val2, val3, val22, val33, denom_lam;
+ denom_lam = 1.0f/lambda;
+
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ i1 = i+1; if (i1 == N) i1 = i-1;
+ j1 = j+1; if (j1 == M) j1 = j-1;
+ k1 = k+1; if (k1 == Z) k1 = k-1;
+
+ val1 = (U[(N*M)*k + i1 + N*j] - U[index]) + Bx[index];
+ val2 = (U[(N*M)*k + i + N*j1] - U[index]) + By[index];
+ val3 = (U[(N*M)*k1 + i + N*j] - U[index]) + Bz[index];
+
+ val11 = abs(val1) - denom_lam; if (val11 < 0.0f) val11 = 0.0f;
+ val22 = abs(val2) - denom_lam; if (val22 < 0.0f) val22 = 0.0f;
+ val33 = abs(val3) - denom_lam; if (val33 < 0.0f) val33 = 0.0f;
+
+ if (val1 !=0.0f) Dx[index] = (val1/abs(val1))*val11; else Dx[index] = 0.0f;
+ if (val2 !=0.0f) Dy[index] = (val2/abs(val2))*val22; else Dy[index] = 0.0f;
+ if (val3 !=0.0f) Dz[index] = (val3/abs(val3))*val33; else Dz[index] = 0.0f;
+ }
+ return;
+}
+
+__global__ void updDxDy_shrinkIso3D_kernel(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, float lambda, int N, int M, int Z, int ImSize)
+{
+
+ int i1,j1,k1;
+ float val1, val11, val2, val3, denom_lam, denom;
+ denom_lam = 1.0f/lambda;
+
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ i1 = i+1; if (i1 == N) i1 = i-1;
+ j1 = j+1; if (j1 == M) j1 = j-1;
+ k1 = k+1; if (k1 == Z) k1 = k-1;
+
+ val1 = (U[(N*M)*k + i1 + N*j] - U[index]) + Bx[index];
+ val2 = (U[(N*M)*k + i + N*j1] - U[index]) + By[index];
+ val3 = (U[(N*M)*k1 + i + N*j] - U[index]) + Bz[index];
+
+ denom = sqrt(val1*val1 + val2*val2 + val3*val3);
+
+ val11 = (denom - denom_lam); if (val11 < 0.0f) val11 = 0.0f;
+
+ if (denom != 0.0f) {
+ Dx[index] = val11*(val1/denom);
+ Dy[index] = val11*(val2/denom);
+ Dz[index] = val11*(val3/denom);
+ }
+ else {
+ Dx[index] = 0.0f;
+ Dy[index] = 0.0f;
+ Dz[index] = 0.0f;
+ }
+ }
+ return;
+}
+
+__global__ void updBxBy3D_kernel(float *U, float *Dx, float *Dy, float *Dz, float *Bx, float *By, float *Bz, int N, int M, int Z, int ImSize)
+{
+ int i1,j1,k1;
+
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ /* symmetric boundary conditions (Neuman) */
+ i1 = i+1; if (i1 == N) i1 = i-1;
+ j1 = j+1; if (j1 == M) j1 = j-1;
+ k1 = k+1; if (k1 == Z) k1 = k-1;
+
+ Bx[index] += (U[(N*M)*k + i1 + N*j] - U[index]) - Dx[index];
+ By[index] += (U[(N*M)*k + i + N*j1] - U[index]) - Dy[index];
+ Bz[index] += (U[(N*M)*k1 + i + N*j] - U[index]) - Dz[index];
+ }
+ return;
+}
+
+__global__ void SBcopy_kernel2D(float *Input, float* Output, int N, int M, int num_total)
+{
+ int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+ int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if (index < num_total) {
+ Output[index] = Input[index];
+ }
+}
+
+__global__ void SBcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total)
+{
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if (index < num_total) {
+ Output[index] = Input[index];
+ }
+}
+
+__global__ void SBResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total)
+{
+ int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+ int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if (index < num_total) {
+ Output[index] = Input1[index] - Input2[index];
+ }
+}
+
+__global__ void SBResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total)
+{
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if (index < num_total) {
+ Output[index] = Input1[index] - Input2[index];
+ }
+}
+
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+/********************* MAIN HOST FUNCTION ******************/
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+extern "C" int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ)
+{
+ int deviceCount = -1; // number of devices
+ cudaGetDeviceCount(&deviceCount);
+ if (deviceCount == 0) {
+ fprintf(stderr, "No CUDA devices found\n");
+ return -1;
+ }
+
+ int ll, DimTotal;
+ float re, lambda, normConst;
+ int count = 0;
+ mu = 1.0f/mu;
+ lambda = 2.0f*mu;
+
+ if (dimZ <= 1) {
+ /*2D verson*/
+ DimTotal = dimX*dimY;
+ normConst = 1.0f/(mu + 4.0f*lambda);
+ float *d_input, *d_update, *d_res, *d_update_prev=NULL, *Dx=NULL, *Dy=NULL, *Bx=NULL, *By=NULL;
+
+ dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+ dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
+
+ /*allocate space for images on device*/
+ checkCudaErrors( cudaMalloc((void**)&d_input,DimTotal*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&d_update,DimTotal*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&d_update_prev,DimTotal*sizeof(float)) );
+ if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_res,DimTotal*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&Dx,DimTotal*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&Dy,DimTotal*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&Bx,DimTotal*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&By,DimTotal*sizeof(float)) );
+
+ checkCudaErrors( cudaMemcpy(d_input,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
+ checkCudaErrors( cudaMemcpy(d_update,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
+ cudaMemset(Dx, 0, DimTotal*sizeof(float));
+ cudaMemset(Dy, 0, DimTotal*sizeof(float));
+ cudaMemset(Bx, 0, DimTotal*sizeof(float));
+ cudaMemset(By, 0, DimTotal*sizeof(float));
+
+ /********************** Run CUDA 2D kernels here ********************/
+ /* The main kernel */
+ for (ll = 0; ll < iter; ll++) {
+
+ /* storing old value */
+ SBcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, DimTotal);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ /* perform two GS iterations (normally 2 is enough for the convergence) */
+ gauss_seidel2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Bx, By, lambda, mu, normConst, dimX, dimY, DimTotal);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+ SBcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, DimTotal);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+ /* 2nd GS iteration */
+ gauss_seidel2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Bx, By, lambda, mu, normConst, dimX, dimY, DimTotal);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ /* TV-related step */
+ if (methodTV == 1) updDxDy_shrinkAniso2D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Bx, By, lambda, dimX, dimY, DimTotal);
+ else updDxDy_shrinkIso2D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Bx, By, lambda, dimX, dimY, DimTotal);
+
+ /* update for Bregman variables */
+ updBxBy2D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Bx, By, dimX, dimY, DimTotal);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ if (epsil != 0.0f) {
+ /* calculate norm - stopping rules using the Thrust library */
+ SBResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, d_res, dimX, dimY, DimTotal);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ thrust::device_vector<float> d_vec(d_res, d_res + DimTotal);
+ float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
+ thrust::device_vector<float> d_vec2(d_update, d_update + DimTotal);
+ float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
+
+ re = (reduction/reduction2);
+ if (re < epsil) count++;
+ if (count > 4) break;
+ }
+
+ }
+ if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll);
+ /***************************************************************/
+ //copy result matrix from device to host memory
+ cudaMemcpy(Output,d_update,DimTotal*sizeof(float),cudaMemcpyDeviceToHost);
+
+ cudaFree(d_input);
+ cudaFree(d_update);
+ cudaFree(d_update_prev);
+ if (epsil != 0.0f) cudaFree(d_res);
+ cudaFree(Dx);
+ cudaFree(Dy);
+ cudaFree(Bx);
+ cudaFree(By);
+ }
+ else {
+ /*3D verson*/
+ DimTotal = dimX*dimY*dimZ;
+ normConst = 1.0f/(mu + 6.0f*lambda);
+ float *d_input, *d_update, *d_res, *d_update_prev=NULL, *Dx=NULL, *Dy=NULL, *Dz=NULL, *Bx=NULL, *By=NULL, *Bz=NULL;
+
+ dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+ dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE));
+
+ /*allocate space for images on device*/
+ checkCudaErrors( cudaMalloc((void**)&d_input,DimTotal*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&d_update,DimTotal*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&d_update_prev,DimTotal*sizeof(float)) );
+ if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_res,DimTotal*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&Dx,DimTotal*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&Dy,DimTotal*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&Dz,DimTotal*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&Bx,DimTotal*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&By,DimTotal*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&Bz,DimTotal*sizeof(float)) );
+
+ checkCudaErrors( cudaMemcpy(d_input,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
+ checkCudaErrors( cudaMemcpy(d_update,Input,DimTotal*sizeof(float),cudaMemcpyHostToDevice));
+ cudaMemset(Dx, 0, DimTotal*sizeof(float));
+ cudaMemset(Dy, 0, DimTotal*sizeof(float));
+ cudaMemset(Dz, 0, DimTotal*sizeof(float));
+ cudaMemset(Bx, 0, DimTotal*sizeof(float));
+ cudaMemset(By, 0, DimTotal*sizeof(float));
+ cudaMemset(Bz, 0, DimTotal*sizeof(float));
+
+ /********************** Run CUDA 3D kernels here ********************/
+ /* The main kernel */
+ for (ll = 0; ll < iter; ll++) {
+
+ /* storing old value */
+ SBcopy_kernel3D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, dimZ, DimTotal);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ /* perform two GS iterations (normally 2 is enough for the convergence) */
+ gauss_seidel3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Dz, Bx, By, Bz, lambda, mu, normConst, dimX, dimY, dimZ, DimTotal);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+ SBcopy_kernel3D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, dimZ, DimTotal);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+ /* 2nd GS iteration */
+ gauss_seidel3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_input, d_update_prev, Dx, Dy, Dz, Bx, By, Bz, lambda, mu, normConst, dimX, dimY, dimZ, DimTotal);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ /* TV-related step */
+ if (methodTV == 1) updDxDy_shrinkAniso3D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Dz, Bx, By, Bz, lambda, dimX, dimY, dimZ, DimTotal);
+ else updDxDy_shrinkIso3D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Dz, Bx, By, Bz, lambda, dimX, dimY, dimZ, DimTotal);
+
+ /* update for Bregman variables */
+ updBxBy3D_kernel<<<dimGrid,dimBlock>>>(d_update, Dx, Dy, Dz, Bx, By, Bz, dimX, dimY, dimZ, DimTotal);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ if (epsil != 0.0f) {
+ /* calculate norm - stopping rules using the Thrust library */
+ SBResidCalc3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, d_res, dimX, dimY, dimZ, DimTotal);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ thrust::device_vector<float> d_vec(d_res, d_res + DimTotal);
+ float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
+ thrust::device_vector<float> d_vec2(d_update, d_update + DimTotal);
+ float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
+
+ re = (reduction/reduction2);
+ if (re < epsil) count++;
+ if (count > 4) break;
+ }
+ }
+ if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll);
+ /***************************************************************/
+ //copy result matrix from device to host memory
+ cudaMemcpy(Output,d_update,DimTotal*sizeof(float),cudaMemcpyDeviceToHost);
+
+ cudaFree(d_input);
+ cudaFree(d_update);
+ cudaFree(d_update_prev);
+ if (epsil != 0.0f) cudaFree(d_res);
+ cudaFree(Dx);
+ cudaFree(Dy);
+ cudaFree(Dz);
+ cudaFree(Bx);
+ cudaFree(By);
+ cudaFree(Bz);
+ }
+ //cudaDeviceReset();
+ return 0;
+}
diff --git a/src/Core/regularisers_GPU/TV_SB_GPU_core.h b/src/Core/regularisers_GPU/TV_SB_GPU_core.h
new file mode 100755
index 0000000..901b90f
--- /dev/null
+++ b/src/Core/regularisers_GPU/TV_SB_GPU_core.h
@@ -0,0 +1,10 @@
+#ifndef _SB_TV_GPU_
+#define _SB_TV_GPU_
+
+#include "CCPiDefines.h"
+#include <memory.h>
+
+
+extern "C" CCPI_EXPORT int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
+
+#endif
diff --git a/src/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/src/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
new file mode 100644
index 0000000..7503ec7
--- /dev/null
+++ b/src/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
@@ -0,0 +1,741 @@
+ /*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+#include "shared.h"
+#include "dTV_FGP_GPU_core.h"
+#include <thrust/device_vector.h>
+#include <thrust/transform_reduce.h>
+
+/* CUDA implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
+ * which employs structural similarity of the level sets of two images/volumes, see [1,2]
+ * The current implementation updates image 1 while image 2 is being fixed.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
+ * 3. lambdaPar - regularization parameter [REQUIRED]
+ * 4. Number of iterations [OPTIONAL]
+ * 5. eplsilon: tolerance constant [OPTIONAL]
+ * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] *
+ * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
+ * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
+ * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
+ *
+ * Output:
+ * [1] Filtered/regularized image/volume
+ *
+ * This function is based on the Matlab's codes and papers by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
+ */
+
+
+#define BLKXSIZE2D 16
+#define BLKYSIZE2D 16
+
+#define BLKXSIZE 8
+#define BLKYSIZE 8
+#define BLKZSIZE 8
+
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+struct square { __host__ __device__ float operator()(float x) { return x * x; } };
+
+/************************************************/
+/*****************2D modules*********************/
+/************************************************/
+
+__global__ void GradNorm_func2D_kernel(float *Refd, float *Refd_x, float *Refd_y, float eta, int N, int M, int ImSize)
+{
+
+ float val1, val2, gradX, gradY, magn;
+ //calculate each thread global index
+ const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+ const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if ((xIndex < N) && (yIndex < M)) {
+ /* boundary conditions */
+ if (xIndex >= N-1) val1 = 0.0f; else val1 = Refd[(xIndex+1) + N*yIndex];
+ if (yIndex >= M-1) val2 = 0.0f; else val2 = Refd[(xIndex) + N*(yIndex + 1)];
+
+ gradX = val1 - Refd[index];
+ gradY = val2 - Refd[index];
+ magn = pow(gradX,2) + pow(gradY,2);
+ magn = sqrt(magn + pow(eta,2));
+ Refd_x[index] = gradX/magn;
+ Refd_y[index] = gradY/magn;
+ }
+ return;
+}
+
+__global__ void ProjectVect_func2D_kernel(float *R1, float *R2, float *Refd_x, float *Refd_y, int N, int M, int ImSize)
+{
+
+ float in_prod;
+ //calculate each thread global index
+ const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+ const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if ((xIndex < N) && (yIndex < M)) {
+ in_prod = R1[index]*Refd_x[index] + R2[index]*Refd_y[index]; /* calculate inner product */
+ R1[index] = R1[index] - in_prod*Refd_x[index];
+ R2[index] = R2[index] - in_prod*Refd_y[index];
+ }
+ return;
+}
+
+
+__global__ void Obj_dfunc2D_kernel(float *Ad, float *D, float *R1, float *R2, int N, int M, int ImSize, float lambda)
+{
+
+ float val1,val2;
+
+ //calculate each thread global index
+ const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+ const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if ((xIndex < N) && (yIndex < M)) {
+ if (xIndex <= 0) {val1 = 0.0f;} else {val1 = R1[(xIndex-1) + N*yIndex];}
+ if (yIndex <= 0) {val2 = 0.0f;} else {val2 = R2[xIndex + N*(yIndex-1)];}
+
+ //Write final result to global memory
+ D[index] = Ad[index] - lambda*(R1[index] + R2[index] - val1 - val2);
+ }
+ return;
+}
+
+__global__ void Grad_dfunc2D_kernel(float *P1, float *P2, float *D, float *R1, float *R2, float *Refd_x, float *Refd_y, int N, int M, int ImSize, float multip)
+{
+
+ float val1,val2,in_prod;
+
+ //calculate each thread global index
+ const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+ const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if ((xIndex < N) && (yIndex < M)) {
+
+ /* boundary conditions */
+ if (xIndex >= N-1) val1 = 0.0f; else val1 = D[index] - D[(xIndex+1) + N*yIndex];
+ if (yIndex >= M-1) val2 = 0.0f; else val2 = D[index] - D[(xIndex) + N*(yIndex + 1)];
+
+ in_prod = val1*Refd_x[index] + val2*Refd_y[index]; /* calculate inner product */
+ val1 = val1 - in_prod*Refd_x[index];
+ val2 = val2 - in_prod*Refd_y[index];
+
+ //Write final result to global memory
+ P1[index] = R1[index] + multip*val1;
+ P2[index] = R2[index] + multip*val2;
+ }
+ return;
+}
+
+__global__ void Proj_dfunc2D_iso_kernel(float *P1, float *P2, int N, int M, int ImSize)
+{
+
+ float denom;
+ //calculate each thread global index
+ const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+ const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if ((xIndex < N) && (yIndex < M)) {
+ denom = pow(P1[index],2) + pow(P2[index],2);
+ if (denom > 1.0f) {
+ P1[index] = P1[index]/sqrt(denom);
+ P2[index] = P2[index]/sqrt(denom);
+ }
+ }
+ return;
+}
+__global__ void Proj_dfunc2D_aniso_kernel(float *P1, float *P2, int N, int M, int ImSize)
+{
+
+ float val1, val2;
+ //calculate each thread global index
+ const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+ const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if ((xIndex < N) && (yIndex < M)) {
+ val1 = abs(P1[index]);
+ val2 = abs(P2[index]);
+ if (val1 < 1.0f) {val1 = 1.0f;}
+ if (val2 < 1.0f) {val2 = 1.0f;}
+ P1[index] = P1[index]/val1;
+ P2[index] = P2[index]/val2;
+ }
+ return;
+}
+__global__ void Rupd_dfunc2D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *R1, float *R2, float tkp1, float tk, float multip2, int N, int M, int ImSize)
+{
+ //calculate each thread global index
+ const int xIndex=blockIdx.x*blockDim.x+threadIdx.x;
+ const int yIndex=blockIdx.y*blockDim.y+threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if ((xIndex < N) && (yIndex < M)) {
+ R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]);
+ R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]);
+ }
+ return;
+}
+__global__ void dTVnonneg2D_kernel(float* Output, int N, int M, int num_total)
+{
+ int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+ int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if (index < num_total) {
+ if (Output[index] < 0.0f) Output[index] = 0.0f;
+ }
+}
+__global__ void dTVcopy_kernel2D(float *Input, float* Output, int N, int M, int num_total)
+{
+ int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+ int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if (index < num_total) {
+ Output[index] = Input[index];
+ }
+}
+
+__global__ void dTVcopy_kernel3D(float *Input, float* Output, int N, int M, int Z, int num_total)
+{
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if (index < num_total) {
+ Output[index] = Input[index];
+ }
+}
+
+__global__ void dTVResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total)
+{
+ int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+ int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if (index < num_total) {
+ Output[index] = Input1[index] - Input2[index];
+ }
+}
+
+__global__ void dTVResidCalc3D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int Z, int num_total)
+{
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if (index < num_total) {
+ Output[index] = Input1[index] - Input2[index];
+ }
+}
+
+/************************************************/
+/*****************3D modules*********************/
+/************************************************/
+__global__ void GradNorm_func3D_kernel(float *Refd, float *Refd_x, float *Refd_y, float *Refd_z, float eta, int N, int M, int Z, int ImSize)
+{
+
+ float val1, val2, val3, gradX, gradY, gradZ, magn;
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ /* boundary conditions */
+ if (i >= N-1) val1 = 0.0f; else val1 = Refd[(N*M)*k + (i+1) + N*j];
+ if (j >= M-1) val2 = 0.0f; else val2 = Refd[(N*M)*k + i + N*(j+1)];
+ if (k >= Z-1) val3 = 0.0f; else val3 = Refd[(N*M)*(k+1) + i + N*j];
+
+ gradX = val1 - Refd[index];
+ gradY = val2 - Refd[index];
+ gradZ = val3 - Refd[index];
+ magn = pow(gradX,2) + pow(gradY,2) + pow(gradZ,2);
+ magn = sqrt(magn + pow(eta,2));
+ Refd_x[index] = gradX/magn;
+ Refd_y[index] = gradY/magn;
+ Refd_z[index] = gradZ/magn;
+ }
+ return;
+}
+
+__global__ void ProjectVect_func3D_kernel(float *R1, float *R2, float *R3, float *Refd_x, float *Refd_y, float *Refd_z, int N, int M, int Z, int ImSize)
+{
+
+ float in_prod;
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ in_prod = R1[index]*Refd_x[index] + R2[index]*Refd_y[index] + R3[index]*Refd_z[index]; /* calculate inner product */
+
+ R1[index] = R1[index] - in_prod*Refd_x[index];
+ R2[index] = R2[index] - in_prod*Refd_y[index];
+ R3[index] = R3[index] - in_prod*Refd_z[index];
+ }
+ return;
+}
+
+
+__global__ void Obj_dfunc3D_kernel(float *Ad, float *D, float *R1, float *R2, float *R3, int N, int M, int Z, int ImSize, float lambda)
+{
+
+ float val1,val2,val3;
+
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ if (i <= 0) {val1 = 0.0f;} else {val1 = R1[(N*M)*(k) + (i-1) + N*j];}
+ if (j <= 0) {val2 = 0.0f;} else {val2 = R2[(N*M)*(k) + i + N*(j-1)];}
+ if (k <= 0) {val3 = 0.0f;} else {val3 = R3[(N*M)*(k-1) + i + N*j];}
+ //Write final result to global memory
+ D[index] = Ad[index] - lambda*(R1[index] + R2[index] + R3[index] - val1 - val2 - val3);
+ }
+ return;
+}
+
+__global__ void Grad_dfunc3D_kernel(float *P1, float *P2, float *P3, float *D, float *R1, float *R2, float *R3, float *Refd_x, float *Refd_y, float *Refd_z, int N, int M, int Z, int ImSize, float multip)
+{
+
+ float val1,val2,val3,in_prod;
+
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ /* boundary conditions */
+ if (i >= N-1) val1 = 0.0f; else val1 = D[index] - D[(N*M)*(k) + (i+1) + N*j];
+ if (j >= M-1) val2 = 0.0f; else val2 = D[index] - D[(N*M)*(k) + i + N*(j+1)];
+ if (k >= Z-1) val3 = 0.0f; else val3 = D[index] - D[(N*M)*(k+1) + i + N*j];
+
+ in_prod = val1*Refd_x[index] + val2*Refd_y[index] + val3*Refd_z[index]; /* calculate inner product */
+ val1 = val1 - in_prod*Refd_x[index];
+ val2 = val2 - in_prod*Refd_y[index];
+ val3 = val3 - in_prod*Refd_z[index];
+
+ //Write final result to global memory
+ P1[index] = R1[index] + multip*val1;
+ P2[index] = R2[index] + multip*val2;
+ P3[index] = R3[index] + multip*val3;
+ }
+ return;
+}
+
+__global__ void Proj_dfunc3D_iso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
+{
+
+ float denom,sq_denom;
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ denom = pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2);
+
+ if (denom > 1.0f) {
+ sq_denom = 1.0f/sqrt(denom);
+ P1[index] = P1[index]*sq_denom;
+ P2[index] = P2[index]*sq_denom;
+ P3[index] = P3[index]*sq_denom;
+ }
+ }
+ return;
+}
+
+__global__ void Proj_dfunc3D_aniso_kernel(float *P1, float *P2, float *P3, int N, int M, int Z, int ImSize)
+{
+
+ float val1, val2, val3;
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ val1 = abs(P1[index]);
+ val2 = abs(P2[index]);
+ val3 = abs(P3[index]);
+ if (val1 < 1.0f) {val1 = 1.0f;}
+ if (val2 < 1.0f) {val2 = 1.0f;}
+ if (val3 < 1.0f) {val3 = 1.0f;}
+ P1[index] = P1[index]/val1;
+ P2[index] = P2[index]/val2;
+ P3[index] = P3[index]/val3;
+ }
+ return;
+}
+
+
+__global__ void Rupd_dfunc3D_kernel(float *P1, float *P1_old, float *P2, float *P2_old, float *P3, float *P3_old, float *R1, float *R2, float *R3, float tkp1, float tk, float multip2, int N, int M, int Z, int ImSize)
+{
+ //calculate each thread global index
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if ((i < N) && (j < M) && (k < Z)) {
+ R1[index] = P1[index] + multip2*(P1[index] - P1_old[index]);
+ R2[index] = P2[index] + multip2*(P2[index] - P2_old[index]);
+ R3[index] = P3[index] + multip2*(P3[index] - P3_old[index]);
+ }
+ return;
+}
+
+__global__ void dTVnonneg3D_kernel(float* Output, int N, int M, int Z, int num_total)
+{
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+ int k = blockDim.z * blockIdx.z + threadIdx.z;
+
+ int index = (N*M)*k + i + N*j;
+
+ if (index < num_total) {
+ if (Output[index] < 0.0f) Output[index] = 0.0f;
+ }
+}
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+
+////////////MAIN HOST FUNCTION ///////////////
+extern "C" int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+{
+ int deviceCount = -1; // number of devices
+ cudaGetDeviceCount(&deviceCount);
+ if (deviceCount == 0) {
+ fprintf(stderr, "No CUDA devices found\n");
+ return -1;
+ }
+
+ int count = 0, i;
+ float re, multip,multip2;
+ float tk = 1.0f;
+ float tkp1=1.0f;
+
+ if (dimZ <= 1) {
+ /*2D verson*/
+ int ImSize = dimX*dimY;
+ float *d_input, *d_update=NULL, *d_update_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL, *InputRef_x=NULL, *InputRef_y=NULL, *d_InputRef=NULL;
+
+ dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+ dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
+
+ /*allocate space for images on device*/
+ checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) );
+ if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&d_InputRef,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&InputRef_x,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&InputRef_y,ImSize*sizeof(float)) );
+
+ checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+ checkCudaErrors( cudaMemcpy(d_InputRef,InputRef,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+
+ cudaMemset(P1, 0, ImSize*sizeof(float));
+ cudaMemset(P2, 0, ImSize*sizeof(float));
+ cudaMemset(P1_prev, 0, ImSize*sizeof(float));
+ cudaMemset(P2_prev, 0, ImSize*sizeof(float));
+ cudaMemset(R1, 0, ImSize*sizeof(float));
+ cudaMemset(R2, 0, ImSize*sizeof(float));
+ cudaMemset(InputRef_x, 0, ImSize*sizeof(float));
+ cudaMemset(InputRef_y, 0, ImSize*sizeof(float));
+
+ /******************** Run CUDA 2D kernel here ********************/
+ multip = (1.0f/(8.0f*lambdaPar));
+ /* calculate gradient vectors for the reference */
+ GradNorm_func2D_kernel<<<dimGrid,dimBlock>>>(d_InputRef, InputRef_x, InputRef_y, eta, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ /* The main kernel */
+ for (i = 0; i < iter; i++) {
+
+ /*projects a 2D vector field R-1,2 onto the orthogonal complement of another 2D vector field InputRef_xy*/
+ ProjectVect_func2D_kernel<<<dimGrid,dimBlock>>>(R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ /* computing the gradient of the objective function */
+ Obj_dfunc2D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, dimX, dimY, ImSize, lambdaPar);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ if (nonneg != 0) {
+ dTVnonneg2D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() ); }
+
+ /*Taking a step towards minus of the gradient*/
+ Grad_dfunc2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, d_update, R1, R2, InputRef_x, InputRef_y, dimX, dimY, ImSize, multip);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ /* projection step */
+ if (methodTV == 0) Proj_dfunc2D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*isotropic TV*/
+ else Proj_dfunc2D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, ImSize); /*anisotropic TV*/
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+ multip2 = ((tk-1.0f)/tkp1);
+
+ Rupd_dfunc2D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ if (epsil != 0.0f) {
+ /* calculate norm - stopping rules using the Thrust library */
+ dTVResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize);
+ float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
+ thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);
+ float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
+
+ re = (reduction/reduction2);
+ if (re < epsil) count++;
+ if (count > 4) break;
+
+ dTVcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+ }
+
+ dTVcopy_kernel2D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ dTVcopy_kernel2D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ tk = tkp1;
+ }
+ if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", i);
+ /***************************************************************/
+ //copy result matrix from device to host memory
+ cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost);
+
+ cudaFree(d_input);
+ cudaFree(d_update);
+ if (epsil != 0.0f) cudaFree(d_update_prev);
+ cudaFree(P1);
+ cudaFree(P2);
+ cudaFree(P1_prev);
+ cudaFree(P2_prev);
+ cudaFree(R1);
+ cudaFree(R2);
+
+ cudaFree(d_InputRef);
+ cudaFree(InputRef_x);
+ cudaFree(InputRef_y);
+ }
+ else {
+ /*3D verson*/
+ int ImSize = dimX*dimY*dimZ;
+ float *d_input, *d_update=NULL, *d_update_prev, *P1=NULL, *P2=NULL, *P3=NULL, *P1_prev=NULL, *P2_prev=NULL, *P3_prev=NULL, *R1=NULL, *R2=NULL, *R3=NULL, *InputRef_x=NULL, *InputRef_y=NULL, *InputRef_z=NULL, *d_InputRef=NULL;
+
+ dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
+ dim3 dimGrid(idivup(dimX,BLKXSIZE), idivup(dimY,BLKYSIZE),idivup(dimZ,BLKZSIZE));
+
+ /*allocate space for images on device*/
+ checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) );
+ if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P3,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P1_prev,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P2_prev,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&P3_prev,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&R1,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&R2,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&R3,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&d_InputRef,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&InputRef_x,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&InputRef_y,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&InputRef_z,ImSize*sizeof(float)) );
+
+ checkCudaErrors( cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+ checkCudaErrors( cudaMemcpy(d_InputRef,InputRef,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+
+ cudaMemset(P1, 0, ImSize*sizeof(float));
+ cudaMemset(P2, 0, ImSize*sizeof(float));
+ cudaMemset(P3, 0, ImSize*sizeof(float));
+ cudaMemset(P1_prev, 0, ImSize*sizeof(float));
+ cudaMemset(P2_prev, 0, ImSize*sizeof(float));
+ cudaMemset(P3_prev, 0, ImSize*sizeof(float));
+ cudaMemset(R1, 0, ImSize*sizeof(float));
+ cudaMemset(R2, 0, ImSize*sizeof(float));
+ cudaMemset(R3, 0, ImSize*sizeof(float));
+ cudaMemset(InputRef_x, 0, ImSize*sizeof(float));
+ cudaMemset(InputRef_y, 0, ImSize*sizeof(float));
+ cudaMemset(InputRef_z, 0, ImSize*sizeof(float));
+
+ /********************** Run CUDA 3D kernel here ********************/
+ multip = (1.0f/(26.0f*lambdaPar));
+ /* calculate gradient vectors for the reference */
+ GradNorm_func3D_kernel<<<dimGrid,dimBlock>>>(d_InputRef, InputRef_x, InputRef_y, InputRef_z, eta, dimX, dimY, dimZ, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ /* The main kernel */
+ for (i = 0; i < iter; i++) {
+
+ /*projects a 3D vector field R-1,2,3 onto the orthogonal complement of another 3D vector field InputRef_xyz*/
+ ProjectVect_func3D_kernel<<<dimGrid,dimBlock>>>(R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ /* computing the gradient of the objective function */
+ Obj_dfunc3D_kernel<<<dimGrid,dimBlock>>>(d_input, d_update, R1, R2, R3, dimX, dimY, dimZ, ImSize, lambdaPar);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ if (nonneg != 0) {
+ dTVnonneg3D_kernel<<<dimGrid,dimBlock>>>(d_update, dimX, dimY, dimZ, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() ); }
+
+ /*Taking a step towards minus of the gradient*/
+ Grad_dfunc3D_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, d_update, R1, R2, R3, InputRef_x, InputRef_y, InputRef_z, dimX, dimY, dimZ, ImSize, multip);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ /* projection step */
+ if (methodTV == 0) Proj_dfunc3D_iso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* isotropic kernel */
+ else Proj_dfunc3D_aniso_kernel<<<dimGrid,dimBlock>>>(P1, P2, P3, dimX, dimY, dimZ, ImSize); /* anisotropic kernel */
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ tkp1 = (1.0f + sqrt(1.0f + 4.0f*tk*tk))*0.5f;
+ multip2 = ((tk-1.0f)/tkp1);
+
+ Rupd_dfunc3D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, P3, P3_prev, R1, R2, R3, tkp1, tk, multip2, dimX, dimY, dimZ, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ if (epsil != 0.0f) {
+ /* calculate norm - stopping rules using the Thrust library */
+ dTVResidCalc3D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, dimZ, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize);
+ float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
+ thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);
+ float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
+
+ re = (reduction/reduction2);
+ if (re < epsil) count++;
+ if (count > 4) break;
+
+ dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, dimZ, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+ }
+
+ dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, dimZ, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, dimZ, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(P3, P3_prev, dimX, dimY, dimZ, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ tk = tkp1;
+ }
+ if (printM == 1) printf("FGP-dTV iterations stopped at iteration %i \n", i);
+ /***************************************************************/
+ //copy result matrix from device to host memory
+ cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost);
+
+ cudaFree(d_input);
+ cudaFree(d_update);
+ if (epsil != 0.0f) cudaFree(d_update_prev);
+ cudaFree(P1);
+ cudaFree(P2);
+ cudaFree(P3);
+ cudaFree(P1_prev);
+ cudaFree(P2_prev);
+ cudaFree(P3_prev);
+ cudaFree(R1);
+ cudaFree(R2);
+ cudaFree(R3);
+ cudaFree(InputRef_x);
+ cudaFree(InputRef_y);
+ cudaFree(InputRef_z);
+ cudaFree(d_InputRef);
+ }
+ //cudaDeviceReset();
+ return 0;
+}
diff --git a/src/Core/regularisers_GPU/dTV_FGP_GPU_core.h b/src/Core/regularisers_GPU/dTV_FGP_GPU_core.h
new file mode 100644
index 0000000..f9281e8
--- /dev/null
+++ b/src/Core/regularisers_GPU/dTV_FGP_GPU_core.h
@@ -0,0 +1,9 @@
+#ifndef _dTV_FGP_GPU_
+#define _dTV_FGP_GPU_
+
+#include "CCPiDefines.h"
+#include <memory.h>
+
+extern "C" CCPI_EXPORT int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iter, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+
+#endif
diff --git a/src/Core/regularisers_GPU/shared.h b/src/Core/regularisers_GPU/shared.h
new file mode 100644
index 0000000..fe98cd6
--- /dev/null
+++ b/src/Core/regularisers_GPU/shared.h
@@ -0,0 +1,42 @@
+/*shared macros*/
+
+
+/*checks CUDA call, should be used in functions returning <int> value
+if error happens, writes to standard error and explicitly returns -1*/
+#define CHECK(call) \
+{ \
+ const cudaError_t error = call; \
+ if (error != cudaSuccess) \
+ { \
+ fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
+ fprintf(stderr, "code: %d, reason: %s\n", error, \
+ cudaGetErrorString(error)); \
+ return -1; \
+ } \
+}
+
+// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
+#define checkCudaErrors(call) \
+{ \
+ const cudaError_t error = call; \
+ if (error != cudaSuccess) \
+ { \
+ fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
+ fprintf(stderr, "code: %d, reason: %s\n", error, \
+ cudaGetErrorString(error)); \
+ return -1; \
+ } \
+}
+/*#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
+
+inline void __checkCudaErrors(cudaError err, const char *file, const int line)
+{
+ if (cudaSuccess != err)
+ {
+ fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
+ file, line, (int)err, cudaGetErrorString(err));
+ return;
+ }
+}
+*/
+
diff --git a/src/Matlab/CMakeLists.txt b/src/Matlab/CMakeLists.txt
new file mode 100755
index 0000000..b97f845
--- /dev/null
+++ b/src/Matlab/CMakeLists.txt
@@ -0,0 +1,147 @@
+project(regulariserMatlab)
+
+
+find_package(Matlab REQUIRED COMPONENTS MAIN_PROGRAM MX_LIBRARY ENG_LIBRARY )
+
+
+
+#C:\Users\ofn77899\Documents\Projects\CCPi\GitHub\CCPi-FISTA_Reconstruction\Core\regularisers_CPU
+# matlab_add_mex(
+ # NAME CPU_ROF
+ # SRC
+ # ${CMAKE_SOURCE_DIR}/Matlab/mex_compile/regularisers_CPU/ROF_TV.c
+ # LINK_TO cilreg ${Matlab_LIBRARIES}
+ # )
+
+# target_include_directories(CPU_ROF
+ # PUBLIC ${CMAKE_SOURCE_DIR}/Core/regularisers_CPU
+ # ${CMAKE_SOURCE_DIR}/Core/regularisers_GPU
+ # ${CMAKE_SOURCE_DIR}/Core/inpainters_CPU
+ # ${CMAKE_SOURCE_DIR}/Core/
+ # ${MATLAB_INCLUDE_DIR})
+
+ # matlab_add_mex(
+ # NAME CPU_TNV
+ # SRC
+ # ${CMAKE_SOURCE_DIR}/Matlab/mex_compile/regularisers_CPU/TNV.c
+ # LINK_TO cilreg ${Matlab_LIBRARIES}
+ # )
+
+# target_include_directories(CPU_TNV
+ # PUBLIC ${CMAKE_SOURCE_DIR}/Core/regularisers_CPU
+ # ${CMAKE_SOURCE_DIR}/Core/regularisers_GPU
+ # ${CMAKE_SOURCE_DIR}/Core/inpainters_CPU
+ # ${CMAKE_SOURCE_DIR}/Core/
+ # ${MATLAB_INCLUDE_DIR})
+
+#set (CPU_MEX_FILES "regularisers_CPU/TNV.c;regularisers_CPU/ROF_TV.c")
+#set (MEX_TARGETS "CPU_TNV;CPU_ROF")
+#list(APPEND MEX_TARGETS "CPU_TNV")
+#list(APPEND MEX_TARGETS "CPU_ROF")
+
+file(GLOB CPU_MEX_FILES
+ "${CMAKE_SOURCE_DIR}/Matlab/mex_compile/regularisers_CPU/*.c"
+ #"${CMAKE_SOURCE_DIR}/Matlab/mex_compile/regularisers_GPU/*.c"
+)
+
+#message("CPU_MEX_FILES " ${CPU_MEX_FILES})
+
+list(LENGTH CPU_MEX_FILES num)
+
+
+MATH(EXPR num "${num}-1")
+#set(num "-1")
+message("found ${num} files")
+
+foreach(tgt RANGE 0 ${num})
+ message("number " ${tgt})
+ list(LENGTH CPU_MEX_FILES num2)
+ message("the list is ${num2}")
+ #list(GET CPU_TARGETS ${tgt} current_target)
+ list(GET CPU_MEX_FILES ${tgt} current_file_name)
+ get_filename_component(current_file ${current_file_name} NAME)
+ string(REGEX MATCH "(.+).c" match ${current_file})
+ if (NOT ${match} EQUAL "" )
+ set (current_target ${CMAKE_MATCH_1})
+ endif()
+ message("matlab_add_mex target " ${current_file} " and " ${current_target})
+ matlab_add_mex(
+ NAME ${current_target}
+ SRC
+ ${current_file_name}
+ #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/FGP_TV_core.c
+ #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/SB_TV_core.c
+ #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/TGV_core.c
+ #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/Diffusion_core.c
+ #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/Diffus4th_order_core.c
+ #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/LLT_ROF_core.c
+ #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/ROF_TV_core.c
+ #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/FGP_dTV_core.c
+ #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/TNV_core.c
+ #${CMAKE_SOURCE_DIR}/Core/regularisers_CPU/utils.c
+ #${CMAKE_SOURCE_DIR}/Core/inpainters_CPU/Diffusion_Inpaint_core.c
+ #${CMAKE_SOURCE_DIR}/Core/inpainters_CPU/NonlocalMarching_Inpaint_core.c
+ LINK_TO cilreg ${Matlab_LIBRARIES}
+ )
+
+target_include_directories(${current_target}
+ PUBLIC ${CMAKE_SOURCE_DIR}/Core/regularisers_CPU
+ ${CMAKE_SOURCE_DIR}/Core/regularisers_GPU
+ ${CMAKE_SOURCE_DIR}/Core/inpainters_CPU
+ ${CMAKE_SOURCE_DIR}/Core/
+ ${MATLAB_INCLUDE_DIR})
+ set_property(TARGET ${current_target} PROPERTY C_STANDARD 99)
+ list(APPEND CPU_MEX_TARGETS ${current_target})
+ INSTALL(TARGETS ${current_target} DESTINATION "${MATLAB_DEST}")
+endforeach()
+
+add_custom_target(MatlabWrapper DEPENDS ${CPU_MEX_TARGETS})
+
+if (BUILD_CUDA)
+ find_package(CUDA)
+ if (CUDA_FOUND)
+ file(GLOB GPU_MEX_FILES
+ "${CMAKE_SOURCE_DIR}/Matlab/mex_compile/regularisers_GPU/*.cpp"
+ )
+
+ list(LENGTH GPU_MEX_FILES num)
+message("number of GPU files " ${num})
+
+ MATH(EXPR num "${num}-1")
+ #set(num "-1")
+
+ foreach(tgt RANGE ${num})
+ message("number " ${tgt})
+ list(LENGTH GPU_MEX_FILES num2)
+ message("the list is ${num2}")
+ #list(GET CPU_TARGETS ${tgt} current_target)
+ list(GET GPU_MEX_FILES ${tgt} current_file_name)
+ get_filename_component(current_file ${current_file_name} NAME)
+ string(REGEX MATCH "(.+).c" match ${current_file})
+ if (NOT ${match} EQUAL "" )
+ set (current_target ${CMAKE_MATCH_1})
+ endif()
+ message("matlab_add_mex target " ${current_file} " and " ${current_target})
+ message("matlab_add_mex " ${current_target})
+ matlab_add_mex(
+ NAME ${current_target}
+ SRC
+ ${current_file_name}
+ LINK_TO cilregcuda ${Matlab_LIBRARIES}
+ )
+
+ target_include_directories(${current_target}
+ PUBLIC ${CMAKE_SOURCE_DIR}/Core/regularisers_CPU
+ ${CMAKE_SOURCE_DIR}/Core/regularisers_GPU
+ ${CMAKE_SOURCE_DIR}/Core/inpainters_CPU
+ ${CMAKE_SOURCE_DIR}/Core/
+ ${MATLAB_INCLUDE_DIR})
+
+ list(APPEND GPU_MEX_TARGETS ${current_target})
+ INSTALL(TARGETS ${current_target} DESTINATION "${MATLAB_DEST}")
+ endforeach()
+
+ add_custom_target(MatlabWrapperGPU DEPENDS ${GPU_MEX_TARGETS})
+
+ endif()
+endif()
diff --git a/src/Matlab/mex_compile/compileCPU_mex_Linux.m b/src/Matlab/mex_compile/compileCPU_mex_Linux.m
new file mode 100644
index 0000000..72a828e
--- /dev/null
+++ b/src/Matlab/mex_compile/compileCPU_mex_Linux.m
@@ -0,0 +1,81 @@
+% execute this mex file on Linux in Matlab once
+
+fsep = '/';
+
+pathcopyFrom = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'regularisers_CPU'], 1i);
+pathcopyFrom1 = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'CCPiDefines.h'], 1i);
+pathcopyFrom2 = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'inpainters_CPU'], 1i);
+
+copyfile(pathcopyFrom, 'regularisers_CPU');
+copyfile(pathcopyFrom1, 'regularisers_CPU');
+copyfile(pathcopyFrom2, 'regularisers_CPU');
+
+cd regularisers_CPU
+
+Pathmove = sprintf(['..' fsep 'installed' fsep], 1i);
+
+fprintf('%s \n', '<<<<<<<<<<<Compiling CPU regularisers>>>>>>>>>>>>>');
+
+fprintf('%s \n', 'Compiling ROF-TV...');
+mex ROF_TV.c ROF_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('ROF_TV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling FGP-TV...');
+mex FGP_TV.c FGP_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('FGP_TV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling SB-TV...');
+mex SB_TV.c SB_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('SB_TV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling dFGP-TV...');
+mex FGP_dTV.c FGP_dTV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('FGP_dTV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling TNV...');
+mex TNV.c TNV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('TNV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling NonLinear Diffusion...');
+mex NonlDiff.c Diffusion_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('NonlDiff.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling Anisotropic diffusion of higher order...');
+mex Diffusion_4thO.c Diffus4th_order_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('Diffusion_4thO.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling TGV...');
+mex TGV.c TGV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('TGV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling ROF-LLT...');
+mex LLT_ROF.c LLT_ROF_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('LLT_ROF.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling NonLocal-TV...');
+mex PatchSelect.c PatchSelect_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+mex Nonlocal_TV.c Nonlocal_TV_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('Nonlocal_TV.mex*',Pathmove);
+movefile('PatchSelect.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling additional tools...');
+mex TV_energy.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('TV_energy.mex*',Pathmove);
+
+%############Inpainters##############%
+fprintf('%s \n', 'Compiling Nonlinear/Linear diffusion inpainting...');
+mex NonlDiff_Inp.c Diffusion_Inpaint_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('NonlDiff_Inp.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling Nonlocal marching method for inpainting...');
+mex NonlocalMarching_Inpaint.c NonlocalMarching_Inpaint_core.c utils.c CFLAGS="\$CFLAGS -fopenmp -Wall -std=c99" LDFLAGS="\$LDFLAGS -fopenmp"
+movefile('NonlocalMarching_Inpaint.mex*',Pathmove);
+
+delete SB_TV_core* ROF_TV_core* FGP_TV_core* FGP_dTV_core* TNV_core* utils* Diffusion_core* Diffus4th_order_core* TGV_core* LLT_ROF_core* CCPiDefines.h
+delete PatchSelect_core* Nonlocal_TV_core*
+delete Diffusion_Inpaint_core* NonlocalMarching_Inpaint_core*
+fprintf('%s \n', '<<<<<<< Regularisers successfully compiled! >>>>>>>');
+
+pathA2 = sprintf(['..' fsep '..' fsep], 1i);
+cd(pathA2);
+cd demos
diff --git a/src/Matlab/mex_compile/compileCPU_mex_WINDOWS.m b/src/Matlab/mex_compile/compileCPU_mex_WINDOWS.m
new file mode 100644
index 0000000..6f7541c
--- /dev/null
+++ b/src/Matlab/mex_compile/compileCPU_mex_WINDOWS.m
@@ -0,0 +1,135 @@
+% execute this mex file on Windows in Matlab once
+
+% >>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+% I've been able to compile on Windows 7 with MinGW and Matlab 2016b, however,
+% not sure if openmp is enabled after the compilation.
+
+% Here I present two ways how software can be compiled, if you have some
+% other suggestions/remarks please contact me at dkazanc@hotmail.com
+% >>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+
+fsep = '/';
+
+pathcopyFrom = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'regularisers_CPU'], 1i);
+pathcopyFrom1 = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'CCPiDefines.h'], 1i);
+pathcopyFrom2 = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'inpainters_CPU'], 1i);
+
+copyfile(pathcopyFrom, 'regularisers_CPU');
+copyfile(pathcopyFrom1, 'regularisers_CPU');
+copyfile(pathcopyFrom2, 'regularisers_CPU');
+
+cd regularisers_CPU
+
+Pathmove = sprintf(['..' fsep 'installed' fsep], 1i);
+
+fprintf('%s \n', '<<<<<<<<<<<Compiling CPU regularisers>>>>>>>>>>>>>');
+
+fprintf('%s \n', 'Compiling ROF-TV...');
+mex ROF_TV.c ROF_TV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('ROF_TV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling FGP-TV...');
+mex FGP_TV.c FGP_TV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('FGP_TV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling SB-TV...');
+mex SB_TV.c SB_TV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('SB_TV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling dFGP-TV...');
+mex FGP_dTV.c FGP_dTV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('FGP_dTV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling TNV...');
+mex TNV.c TNV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('TNV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling NonLinear Diffusion...');
+mex NonlDiff.c Diffusion_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('NonlDiff.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling Anisotropic diffusion of higher order...');
+mex Diffusion_4thO.c Diffus4th_order_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('Diffusion_4thO.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling TGV...');
+mex TGV.c TGV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('TGV.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling ROF-LLT...');
+mex LLT_ROF.c LLT_ROF_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('LLT_ROF.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling NonLocal-TV...');
+mex PatchSelect.c PatchSelect_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+mex Nonlocal_TV.c Nonlocal_TV_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('Nonlocal_TV.mex*',Pathmove);
+movefile('PatchSelect.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling additional tools...');
+mex TV_energy.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('TV_energy.mex*',Pathmove);
+
+%############Inpainters##############%
+fprintf('%s \n', 'Compiling Nonlinear/Linear diffusion inpainting...');
+mex NonlDiff_Inp.c Diffusion_Inpaint_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('NonlDiff_Inp.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling Nonlocal marching method for inpaiting...');
+mex NonlocalMarching_Inpaint.c NonlocalMarching_Inpaint_core.c utils.c COMPFLAGS="\$COMPFLAGS -fopenmp -Wall -std=c99"
+movefile('NonlocalMarching_Inpaint.mex*',Pathmove);
+
+
+%%
+%%% The second approach to compile using TDM-GCC which follows this
+%%% discussion:
+%%% https://uk.mathworks.com/matlabcentral/answers/279171-using-mingw-compiler-and-open-mp#comment_359122
+%%% 1. Install TDM-GCC independently from http://tdm-gcc.tdragon.net/ (I installed 5.1.0)
+%%% Install openmp version: http://sourceforge.net/projects/tdm-gcc/files/TDM-GCC%205%20series/5.1.0-tdm64-1/gcc-5.1.0-tdm64-1-openmp.zip/download
+%%% 2. Link til libgomp.a in that installation when compilling your mex file.
+
+%%% assuming you unzipped TDM GCC (OpenMp) in folder TDMGCC on C drive, uncomment
+%%% bellow
+% fprintf('%s \n', 'Compiling CPU regularisers...');
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" ROF_TV.c ROF_TV_core.c utils.c
+% movefile('ROF_TV.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" FGP_TV.c FGP_TV_core.c utils.c
+% movefile('FGP_TV.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" SB_TV.c SB_TV_core.c utils.c
+% movefile('SB_TV.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" FGP_dTV.c FGP_dTV_core.c utils.c
+% movefile('FGP_dTV.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" TNV.c TNV_core.c utils.c
+% movefile('TNV.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" NonlDiff.c Diffusion_core.c utils.c
+% movefile('NonlDiff.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" Diffusion_4thO.c Diffus4th_order_core.c utils.c
+% movefile('Diffusion_4thO.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" TGV.c TGV_core.c utils.c
+% movefile('TGV.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" LLT_ROF.c LLT_ROF_core.c utils.c
+% movefile('LLT_ROF.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" PatchSelect.c PatchSelect_core.c utils.c
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" Nonlocal_TV.c Nonlocal_TV_core.c utils.c
+% movefile('Nonlocal_TV.mex*',Pathmove);
+% movefile('PatchSelect.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" TV_energy.c utils.c
+% movefile('TV_energy.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" NonlDiff_Inp.c Diffusion_Inpaint_core.c utils.c
+% movefile('NonlDiff_Inp.mex*',Pathmove);
+% mex C:\TDMGCC\lib\gcc\x86_64-w64-mingw32\5.1.0\libgomp.a CXXFLAGS="$CXXFLAGS -std=c++11 -fopenmp" NonlocalMarching_Inpaint.c NonlocalMarching_Inpaint_core.c utils.c
+% movefile('NonlocalMarching_Inpaint.mex*',Pathmove);
+
+
+delete SB_TV_core* ROF_TV_core* FGP_TV_core* FGP_dTV_core* TNV_core* utils* Diffusion_core* Diffus4th_order_core* TGV_core* CCPiDefines.h
+delete PatchSelect_core* Nonlocal_TV_core*
+delete Diffusion_Inpaint_core* NonlocalMarching_Inpaint_core*
+fprintf('%s \n', 'Regularisers successfully compiled!');
+
+
+%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%pathA2 = sprintf(['..' fsep '..' fsep], 1i);
+%cd(pathA2);
+%cd demos
diff --git a/src/Matlab/mex_compile/compileGPU_mex.m b/src/Matlab/mex_compile/compileGPU_mex.m
new file mode 100644
index 0000000..dd1475c
--- /dev/null
+++ b/src/Matlab/mex_compile/compileGPU_mex.m
@@ -0,0 +1,74 @@
+% execute this mex file in Matlab once
+
+%>>>>>>>>>>>>>>>>>Important<<<<<<<<<<<<<<<<<<<
+% In order to compile CUDA modules one needs to have nvcc-compiler
+% installed (see CUDA SDK), check it under MATLAB with !nvcc --version
+
+% In the code bellow we provide a full explicit path to nvcc compiler
+% ! paths to matlab and CUDA sdk can be different, modify accordingly !
+
+% Tested on Ubuntu 18.04/MATLAB 2016b/cuda10.0/gcc7.3
+
+% Installation HAS NOT been tested on Windows, please you Cmake build or
+% modify the code bellow accordingly
+fsep = '/';
+
+pathcopyFrom = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'regularisers_GPU'], 1i);
+pathcopyFrom1 = sprintf(['..' fsep '..' fsep '..' fsep 'Core' fsep 'CCPiDefines.h'], 1i);
+
+copyfile(pathcopyFrom, 'regularisers_GPU');
+copyfile(pathcopyFrom1, 'regularisers_GPU');
+
+cd regularisers_GPU
+
+Pathmove = sprintf(['..' fsep 'installed' fsep], 1i);
+
+fprintf('%s \n', '<<<<<<<<<<<Compiling GPU regularisers (CUDA)>>>>>>>>>>>>>');
+
+fprintf('%s \n', 'Compiling ROF-TV...');
+!/usr/local/cuda/bin/nvcc -O0 -c TV_ROF_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu ROF_TV_GPU.cpp TV_ROF_GPU_core.o
+movefile('ROF_TV_GPU.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling FGP-TV...');
+!/usr/local/cuda/bin/nvcc -O0 -c TV_FGP_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu FGP_TV_GPU.cpp TV_FGP_GPU_core.o
+movefile('FGP_TV_GPU.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling SB-TV...');
+!/usr/local/cuda/bin/nvcc -O0 -c TV_SB_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu SB_TV_GPU.cpp TV_SB_GPU_core.o
+movefile('SB_TV_GPU.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling TGV...');
+!/usr/local/cuda/bin/nvcc -O0 -c TGV_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu TGV_GPU.cpp TGV_GPU_core.o
+movefile('TGV_GPU.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling dFGP-TV...');
+!/usr/local/cuda/bin/nvcc -O0 -c dTV_FGP_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu FGP_dTV_GPU.cpp dTV_FGP_GPU_core.o
+movefile('FGP_dTV_GPU.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling NonLinear Diffusion...');
+!/usr/local/cuda/bin/nvcc -O0 -c NonlDiff_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu NonlDiff_GPU.cpp NonlDiff_GPU_core.o
+movefile('NonlDiff_GPU.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling Anisotropic diffusion of higher order...');
+!/usr/local/cuda/bin/nvcc -O0 -c Diffus_4thO_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu Diffusion_4thO_GPU.cpp Diffus_4thO_GPU_core.o
+movefile('Diffusion_4thO_GPU.mex*',Pathmove);
+
+fprintf('%s \n', 'Compiling ROF-LLT...');
+!/usr/local/cuda/bin/nvcc -O0 -c LLT_ROF_GPU_core.cu -Xcompiler -fPIC -I~/SOFT/MATLAB9/extern/include/
+mex -g -I/usr/local/cuda-10.0/include -L/usr/local/cuda-10.0/lib64 -lcudart -lcufft -lmwgpu LLT_ROF_GPU.cpp LLT_ROF_GPU_core.o
+movefile('LLT_ROF_GPU.mex*',Pathmove);
+
+
+delete TV_ROF_GPU_core* TV_FGP_GPU_core* TV_SB_GPU_core* dTV_FGP_GPU_core* NonlDiff_GPU_core* Diffus_4thO_GPU_core* TGV_GPU_core* LLT_ROF_GPU_core* CCPiDefines.h
+fprintf('%s \n', 'All successfully compiled!');
+
+pathA2 = sprintf(['..' fsep '..' fsep], 1i);
+cd(pathA2);
+cd demos \ No newline at end of file
diff --git a/src/Matlab/mex_compile/installed/MEXed_files_location.txt b/src/Matlab/mex_compile/installed/MEXed_files_location.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/Matlab/mex_compile/installed/MEXed_files_location.txt
diff --git a/src/Matlab/mex_compile/regularisers_CPU/Diffusion_4thO.c b/src/Matlab/mex_compile/regularisers_CPU/Diffusion_4thO.c
new file mode 100644
index 0000000..66ea9be
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/Diffusion_4thO.c
@@ -0,0 +1,77 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "Diffus4th_order_core.h"
+
+/* C-OMP implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case)
+ * The minimisation is performed using explicit scheme.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. lambda - regularization parameter [REQUIRED]
+ * 3. Edge-preserving parameter (sigma) [REQUIRED]
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended [OPTIONAL, default 300]
+ * 5. tau - time-marching step for the explicit scheme [OPTIONAL, default 0.015]
+ *
+ * Output:
+ * [1] Regularized image/volume
+ *
+ * This function is based on the paper by
+ * [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.
+ */
+
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, iter_numb;
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array;
+ float *Input, *Output=NULL, lambda, tau, sigma;
+
+ dim_array = mxGetDimensions(prhs[0]);
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+
+ /*Handling Matlab input data*/
+ Input = (float *) mxGetData(prhs[0]);
+ lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */
+ sigma = (float) mxGetScalar(prhs[2]); /* Edge-preserving parameter */
+ iter_numb = 300; /* iterations number */
+ tau = 0.01; /* marching step parameter */
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+ if ((nrhs < 3) || (nrhs > 5)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Regularisation parameter, Edge-preserving parameter, iterations number, time-marching constant");
+ if ((nrhs == 4) || (nrhs == 5)) iter_numb = (int) mxGetScalar(prhs[3]); /* iterations number */
+ if (nrhs == 5) tau = (float) mxGetScalar(prhs[4]); /* marching step parameter */
+
+ /*Handling Matlab output data*/
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
+ /* output arrays*/
+ if (number_of_dims == 2) {
+ dimZ = 1; /*2D case*/
+ /* output image/volume */
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+ if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+
+ Diffus4th_CPU_main(Input, Output, lambda, sigma, iter_numb, tau, dimX, dimY, dimZ);
+} \ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_CPU/FGP_TV.c b/src/Matlab/mex_compile/regularisers_CPU/FGP_TV.c
new file mode 100644
index 0000000..642362f
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/FGP_TV.c
@@ -0,0 +1,97 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "FGP_TV_core.h"
+
+/* C-OMP implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambdaPar - regularization parameter
+ * 3. Number of iterations
+ * 4. eplsilon: tolerance constant
+ * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
+ * 6. nonneg: 'nonnegativity (0 is OFF by default)
+ * 7. print information: 0 (off) or 1 (on)
+ *
+ * Output:
+ * [1] Filtered/regularized image
+ *
+ * This function is based on the Matlab's code and paper by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ */
+
+
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, iter, methTV, printswitch, nonneg;
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array;
+ float *Input, *Output=NULL, lambda, epsil;
+
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+ dim_array = mxGetDimensions(prhs[0]);
+
+ /*Handling Matlab input data*/
+ if ((nrhs < 2) || (nrhs > 7)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter, Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), nonnegativity switch, print switch");
+
+ Input = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+ lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */
+ iter = 300; /* default iterations number */
+ epsil = 0.0001; /* default tolerance constant */
+ methTV = 0; /* default isotropic TV penalty */
+ nonneg = 0; /* default nonnegativity switch, off - 0 */
+ printswitch = 0; /*default print is switched, off - 0 */
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+
+ if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7)) iter = (int) mxGetScalar(prhs[2]); /* iterations number */
+ if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7)) epsil = (float) mxGetScalar(prhs[3]); /* tolerance constant */
+ if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7)) {
+ char *penalty_type;
+ penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+ if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+ if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */
+ mxFree(penalty_type);
+ }
+ if ((nrhs == 6) || (nrhs == 7)) {
+ nonneg = (int) mxGetScalar(prhs[5]);
+ if ((nonneg != 0) && (nonneg != 1)) mexErrMsgTxt("Nonnegativity constraint can be enabled by choosing 1 or off - 0");
+ }
+ if (nrhs == 7) {
+ printswitch = (int) mxGetScalar(prhs[6]);
+ if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
+ }
+
+ /*Handling Matlab output data*/
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
+ if (number_of_dims == 2) {
+ dimZ = 1; /*2D case*/
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+ if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+
+ /* running the function */
+ TV_FGP_CPU_main(Input, Output, lambda, iter, epsil, methTV, nonneg, printswitch, dimX, dimY, dimZ);
+} \ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c b/src/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c
new file mode 100644
index 0000000..1a0c070
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/FGP_dTV.c
@@ -0,0 +1,114 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "FGP_dTV_core.h"
+
+/* C-OMP implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
+ * which employs structural similarity of the level sets of two images/volumes, see [1,2]
+ * The current implementation updates image 1 while image 2 is being fixed.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
+ * 3. lambdaPar - regularization parameter [REQUIRED]
+ * 4. Number of iterations [OPTIONAL]
+ * 5. eplsilon: tolerance constant [OPTIONAL]
+ * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] *
+ * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
+ * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
+ * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
+ *
+ * Output:
+ * [1] Filtered/regularized image/volume
+ *
+ * This function is based on the Matlab's codes and papers by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
+ */
+
+
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, iter, methTV, printswitch, nonneg;
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array;
+ const mwSize *dim_array2;
+ float *Input, *InputRef, *Output=NULL, lambda, epsil, eta;
+
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+ dim_array = mxGetDimensions(prhs[0]);
+ dim_array2 = mxGetDimensions(prhs[1]);
+
+ /*Handling Matlab input data*/
+ if ((nrhs < 3) || (nrhs > 9)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Reference(2D/3D), Regularization parameter, iterations number, tolerance, smoothing constant, penalty type ('iso' or 'l1'), nonnegativity switch, print switch");
+
+ Input = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+ InputRef = (float *) mxGetData(prhs[1]); /* reference image (2D/3D) */
+ lambda = (float) mxGetScalar(prhs[2]); /* regularization parameter */
+ iter = 300; /* default iterations number */
+ epsil = 0.0001; /* default tolerance constant */
+ eta = 0.01; /* default smoothing constant */
+ methTV = 0; /* default isotropic TV penalty */
+ nonneg = 0; /* default nonnegativity switch, off - 0 */
+ printswitch = 0; /*default print is switched, off - 0 */
+
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+ if (mxGetClassID(prhs[1]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+
+ /*Handling Matlab output data*/
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+ if (number_of_dims == 2) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1])) mexErrMsgTxt("The input images have different dimensionalities");}
+ if (number_of_dims == 3) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1]) || (dimZ != dim_array2[2])) mexErrMsgTxt("The input volumes have different dimensionalities");}
+
+
+ if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9)) iter = (int) mxGetScalar(prhs[3]); /* iterations number */
+ if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9)) epsil = (float) mxGetScalar(prhs[4]); /* tolerance constant */
+ if ((nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9)) {
+ eta = (float) mxGetScalar(prhs[5]); /* smoothing constant for the gradient of InputRef */
+ }
+ if ((nrhs == 7) || (nrhs == 8) || (nrhs == 9)) {
+ char *penalty_type;
+ penalty_type = mxArrayToString(prhs[6]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+ if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+ if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */
+ mxFree(penalty_type);
+ }
+ if ((nrhs == 8) || (nrhs == 9)) {
+ nonneg = (int) mxGetScalar(prhs[7]);
+ if ((nonneg != 0) && (nonneg != 1)) mexErrMsgTxt("Nonnegativity constraint can be enabled by choosing 1 or off - 0");
+ }
+ if (nrhs == 9) {
+ printswitch = (int) mxGetScalar(prhs[8]);
+ if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
+ }
+
+ if (number_of_dims == 2) {
+ dimZ = 1; /*2D case*/
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+ if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+
+ /* running the function */
+ dTV_FGP_CPU_main(Input, InputRef, Output, lambda, iter, epsil, eta, methTV, nonneg, printswitch, dimX, dimY, dimZ);
+} \ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_CPU/LLT_ROF.c b/src/Matlab/mex_compile/regularisers_CPU/LLT_ROF.c
new file mode 100644
index 0000000..ab45446
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/LLT_ROF.c
@@ -0,0 +1,82 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "LLT_ROF_core.h"
+
+/* C-OMP implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.
+*
+* This penalty can deliver visually pleasant piecewise-smooth recovery if regularisation parameters are selected well.
+* The rule of thumb for selection is to start with lambdaLLT = 0 (just the ROF-TV model) and then proceed to increase
+* lambdaLLT starting with smaller values.
+*
+* Input Parameters:
+* 1. U0 - original noise image/volume
+* 2. lambdaROF - ROF-related regularisation parameter
+* 3. lambdaLLT - LLT-related regularisation parameter
+* 4. tau - time-marching step
+* 5. iter - iterations number (for both models)
+*
+* Output:
+* Filtered/regularised image
+*
+* References:
+* [1] Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590.
+* [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+*/
+
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, iterationsNumb;
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array;
+ float *Input, *Output=NULL, lambdaROF, lambdaLLT, tau;
+
+ dim_array = mxGetDimensions(prhs[0]);
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+
+ if ((nrhs < 3) || (nrhs > 5)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Regularisation parameter (ROF), Regularisation parameter (LTT), iterations number, time-marching parameter");
+
+ /*Handling Matlab input data*/
+ Input = (float *) mxGetData(prhs[0]);
+ lambdaROF = (float) mxGetScalar(prhs[1]); /* ROF regularization parameter */
+ lambdaLLT = (float) mxGetScalar(prhs[2]); /* ROF regularization parameter */
+ iterationsNumb = 250;
+ tau = 0.0025;
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+ if ((nrhs == 4) || (nrhs == 5)) iterationsNumb = (int) mxGetScalar(prhs[3]); /* iterations number */
+ if (nrhs == 5) tau = (float) mxGetScalar(prhs[4]); /* marching step parameter */
+
+ /*Handling Matlab output data*/
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
+ /* output arrays*/
+ if (number_of_dims == 2) {
+ dimZ = 1; /*2D case*/
+ /* output image/volume */
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+ if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+
+ LLT_ROF_CPU_main(Input, Output, lambdaROF, lambdaLLT, iterationsNumb, tau, dimX, dimY, dimZ);
+} \ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_CPU/NonlDiff.c b/src/Matlab/mex_compile/regularisers_CPU/NonlDiff.c
new file mode 100644
index 0000000..ec35b8b
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/NonlDiff.c
@@ -0,0 +1,89 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "Diffusion_core.h"
+
+/* C-OMP implementation of linear and nonlinear diffusion with the regularisation model [1] (2D/3D case)
+ * The minimisation is performed using explicit scheme.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambda - regularization parameter
+ * 3. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended [OPTIONAL parameter]
+ * 5. tau - time-marching step for explicit scheme [OPTIONAL parameter]
+ * 6. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight [OPTIONAL parameter]
+ *
+ * Output:
+ * [1] Regularized image/volume
+ *
+ * This function is based on the paper by
+ * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
+ */
+
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, iter_numb, penaltytype;
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array;
+
+ float *Input, *Output=NULL, lambda, tau, sigma;
+
+ dim_array = mxGetDimensions(prhs[0]);
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+
+ /*Handling Matlab input data*/
+ Input = (float *) mxGetData(prhs[0]);
+ lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */
+ sigma = (float) mxGetScalar(prhs[2]); /* Edge-preserving parameter */
+ iter_numb = 300; /* iterations number */
+ tau = 0.025; /* marching step parameter */
+ penaltytype = 1; /* Huber penalty by default */
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+ if ((nrhs < 3) || (nrhs > 6)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Regularisation parameter, Edge-preserving parameter, iterations number, time-marching constant, penalty type - Huber, PM or Tukey");
+ if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6)) iter_numb = (int) mxGetScalar(prhs[3]); /* iterations number */
+ if ((nrhs == 5) || (nrhs == 6)) tau = (float) mxGetScalar(prhs[4]); /* marching step parameter */
+ if (nrhs == 6) {
+ char *penalty_type;
+ penalty_type = mxArrayToString(prhs[5]); /* Huber, PM or Tukey 'Huber' is the default */
+ if ((strcmp(penalty_type, "Huber") != 0) && (strcmp(penalty_type, "PM") != 0) && (strcmp(penalty_type, "Tukey") != 0)) mexErrMsgTxt("Choose penalty: 'Huber', 'PM' or 'Tukey',");
+ if (strcmp(penalty_type, "Huber") == 0) penaltytype = 1; /* enable 'Huber' penalty */
+ if (strcmp(penalty_type, "PM") == 0) penaltytype = 2; /* enable Perona-Malik penalty */
+ if (strcmp(penalty_type, "Tukey") == 0) penaltytype = 3; /* enable Tikey Biweight penalty */
+ mxFree(penalty_type);
+ }
+
+ /*Handling Matlab output data*/
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
+ /* output arrays*/
+ if (number_of_dims == 2) {
+ dimZ = 1; /*2D case*/
+ /* output image/volume */
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+ if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+
+ Diffusion_CPU_main(Input, Output, lambda, sigma, iter_numb, tau, penaltytype, dimX, dimY, dimZ);
+} \ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_CPU/NonlDiff_Inp.c b/src/Matlab/mex_compile/regularisers_CPU/NonlDiff_Inp.c
new file mode 100644
index 0000000..9833392
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/NonlDiff_Inp.c
@@ -0,0 +1,103 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "Diffusion_Inpaint_core.h"
+
+/* C-OMP implementation of linear and nonlinear diffusion [1,2] for inpainting task (2D/3D case)
+ * The minimisation is performed using explicit scheme.
+ *
+ * Input Parameters:
+ * 1. Image/volume to inpaint
+ * 2. Inpainting Mask of the same size as (1) in 'unsigned char' format (ones mark the region to inpaint, zeros belong to the data)
+ * 3. lambda - regularization parameter
+ * 4. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
+ * 5. Number of iterations, for explicit scheme >= 150 is recommended
+ * 6. tau - time-marching step for explicit scheme
+ * 7. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
+ *
+ * Output:
+ * [1] Inpainted image/volume
+ *
+ * This function is based on the paper by
+ * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
+ * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
+ */
+
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, iter_numb, penaltytype, i, inpaint_elements;
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array;
+ const mwSize *dim_array2;
+
+ float *Input, *Output=NULL, lambda, tau, sigma;
+ unsigned char *Mask;
+
+ dim_array = mxGetDimensions(prhs[0]);
+ dim_array2 = mxGetDimensions(prhs[1]);
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+
+ /*Handling Matlab input data*/
+ Input = (float *) mxGetData(prhs[0]);
+ Mask = (unsigned char *) mxGetData(prhs[1]); /* MASK */
+ lambda = (float) mxGetScalar(prhs[2]); /* regularization parameter */
+ sigma = (float) mxGetScalar(prhs[3]); /* Edge-preserving parameter */
+ iter_numb = 300; /* iterations number */
+ tau = 0.025; /* marching step parameter */
+ penaltytype = 1; /* Huber penalty by default */
+
+ if ((nrhs < 4) || (nrhs > 7)) mexErrMsgTxt("At least 4 parameters is required, all parameters are: Image(2D/3D), Mask(2D/3D), Regularisation parameter, Edge-preserving parameter, iterations number, time-marching constant, penalty type - Huber, PM or Tukey");
+ if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7)) iter_numb = (int) mxGetScalar(prhs[4]); /* iterations number */
+ if ((nrhs == 6) || (nrhs == 7)) tau = (float) mxGetScalar(prhs[5]); /* marching step parameter */
+ if (nrhs == 7) {
+ char *penalty_type;
+ penalty_type = mxArrayToString(prhs[6]); /* Huber, PM or Tukey 'Huber' is the default */
+ if ((strcmp(penalty_type, "Huber") != 0) && (strcmp(penalty_type, "PM") != 0) && (strcmp(penalty_type, "Tukey") != 0)) mexErrMsgTxt("Choose penalty: 'Huber', 'PM' or 'Tukey',");
+ if (strcmp(penalty_type, "Huber") == 0) penaltytype = 1; /* enable 'Huber' penalty */
+ if (strcmp(penalty_type, "PM") == 0) penaltytype = 2; /* enable Perona-Malik penalty */
+ if (strcmp(penalty_type, "Tukey") == 0) penaltytype = 3; /* enable Tikey Biweight penalty */
+ mxFree(penalty_type);
+ }
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+ if (mxGetClassID(prhs[1]) != mxUINT8_CLASS) {mexErrMsgTxt("The mask must be in uint8 precision");}
+
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
+ /* output arrays*/
+ if (number_of_dims == 2) {
+ dimZ = 1; /*2D case*/
+ /* output image/volume */
+ if ((dimX != dim_array2[0]) || (dimY != dim_array2[1])) mexErrMsgTxt("Input image and the provided mask are of different dimensions!");
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+ if (number_of_dims == 3) {
+ if ((dimX != dim_array2[0]) || (dimY != dim_array2[1]) || (dimZ != dim_array2[2])) mexErrMsgTxt("Input image and the provided mask are of different dimensions!");
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+
+ inpaint_elements = 0;
+ for (i=0; i<(int)(dimY*dimX*dimZ); i++) if (Mask[i] == 1) inpaint_elements++;
+ if (inpaint_elements == 0) mexErrMsgTxt("The mask is full of zeros, nothing to inpaint");
+ Diffusion_Inpaint_CPU_main(Input, Mask, Output, lambda, sigma, iter_numb, tau, penaltytype, dimX, dimY, dimZ);
+} \ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_CPU/NonlocalMarching_Inpaint.c b/src/Matlab/mex_compile/regularisers_CPU/NonlocalMarching_Inpaint.c
new file mode 100644
index 0000000..b3f2c98
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/NonlocalMarching_Inpaint.c
@@ -0,0 +1,84 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "NonlocalMarching_Inpaint_core.h"
+
+/* C-OMP implementation of Nonlocal Vertical Marching inpainting method (2D case)
+ * The method is heuristic but computationally efficent (especially for larger images).
+ * It developed specifically to smoothly inpaint horizontal or inclined missing data regions in sinograms
+ * The method WILL not work satisfactory if you have lengthy vertical stripes of missing data
+ *
+ * Input:
+ * 1. 2D image or sinogram [REQUIRED]
+ * 2. Mask of the same size as A in 'unsigned char' format (ones mark the region to inpaint, zeros belong to the data) [REQUIRED]
+ * 3. Linear increment to increase searching window size in iterations, values from 1-3 is a good choice [OPTIONAL, default 1]
+ * 4. Number of iterations [OPTIONAL, default - calculate based on the mask]
+ *
+ * Output:
+ * 1. Inpainted sinogram
+ * 2. updated mask
+ * Reference: TBA
+ */
+
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, iterations, SW_increment;
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array;
+ const mwSize *dim_array2;
+
+ float *Input, *Output=NULL;
+ unsigned char *Mask, *Mask_upd=NULL;
+
+ dim_array = mxGetDimensions(prhs[0]);
+ dim_array2 = mxGetDimensions(prhs[1]);
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+
+ /*Handling Matlab input data*/
+ Input = (float *) mxGetData(prhs[0]);
+ Mask = (unsigned char *) mxGetData(prhs[1]); /* MASK */
+ SW_increment = 1;
+ iterations = 0;
+
+ if ((nrhs < 2) || (nrhs > 4)) mexErrMsgTxt("At least 4 parameters is required, all parameters are: Image(2D/3D), Mask(2D/3D), Linear increment, Iterations number");
+ if ((nrhs == 3) || (nrhs == 4)) SW_increment = (int) mxGetScalar(prhs[2]); /* linear increment */
+ if ((nrhs == 4)) iterations = (int) mxGetScalar(prhs[3]); /* iterations number */
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+ if (mxGetClassID(prhs[1]) != mxUINT8_CLASS) {mexErrMsgTxt("The mask must be in uint8 precision");}
+
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
+ /* output arrays*/
+ if (number_of_dims == 2) {
+ dimZ = 1; /*2D case*/
+ /* output image/volume */
+ if ((dimX != dim_array2[0]) || (dimY != dim_array2[1])) mexErrMsgTxt("Input image and the provided mask are of different dimensions!");
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ Mask_upd = (unsigned char*)mxGetPr(plhs[1] = mxCreateNumericArray(2, dim_array, mxUINT8_CLASS, mxREAL));
+ }
+ if (number_of_dims == 3) {
+ mexErrMsgTxt("Currently 2D supported only");
+ }
+ NonlocalMarching_Inpaint_main(Input, Mask, Output, Mask_upd, SW_increment, iterations, 0, dimX, dimY, dimZ);
+} \ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c b/src/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c
new file mode 100644
index 0000000..014c0a0
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c
@@ -0,0 +1,88 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd.
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "matrix.h"
+#include "mex.h"
+#include "Nonlocal_TV_core.h"
+
+#define EPS 1.0000e-9
+
+/* Matlab wrapper for C-OMP implementation of non-local regulariser
+ * Weights and associated indices must be given as an input.
+ * Gauss-Seidel fixed point iteration requires ~ 3 iterations, so the main effort
+ * goes in pre-calculation of weights and selection of patches
+ *
+ *
+ * Input Parameters:
+ * 1. 2D/3D grayscale image/volume
+ * 2. AR_i - indeces of i neighbours
+ * 3. AR_j - indeces of j neighbours
+ * 4. AR_k - indeces of k neighbours (0 - for 2D case)
+ * 5. Weights_ij(k) - associated weights
+ * 6. regularisation parameter
+ * 7. iterations number
+
+ * Output:
+ * 1. denoised image/volume
+ * Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans. Image Processing 17, no. 7 (2008): 1047-1060.
+ */
+
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+{
+ long number_of_dims, dimX, dimY, dimZ;
+ int IterNumb, NumNeighb = 0;
+ unsigned short *H_i, *H_j, *H_k;
+ const int *dim_array;
+ const int *dim_array2;
+ float *A_orig, *Output=NULL, *Weights, lambda;
+
+ dim_array = mxGetDimensions(prhs[0]);
+ dim_array2 = mxGetDimensions(prhs[1]);
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+
+ /*Handling Matlab input data*/
+ A_orig = (float *) mxGetData(prhs[0]); /* a 2D image or a set of 2D images (3D stack) */
+ H_i = (unsigned short *) mxGetData(prhs[1]); /* indeces of i neighbours */
+ H_j = (unsigned short *) mxGetData(prhs[2]); /* indeces of j neighbours */
+ H_k = (unsigned short *) mxGetData(prhs[3]); /* indeces of k neighbours */
+ Weights = (float *) mxGetData(prhs[4]); /* weights for patches */
+ lambda = (float) mxGetScalar(prhs[5]); /* regularisation parameter */
+ IterNumb = (int) mxGetScalar(prhs[6]); /* the number of iterations */
+
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
+ /*****2D INPUT *****/
+ if (number_of_dims == 2) {
+ dimZ = 0;
+ NumNeighb = dim_array2[2];
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+ /*****3D INPUT *****/
+ /****************************************************/
+ if (number_of_dims == 3) {
+ NumNeighb = dim_array2[3];
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+
+ /* run the main function here */
+ Nonlocal_TV_CPU_main(A_orig, Output, H_i, H_j, H_k, Weights, dimX, dimY, dimZ, NumNeighb, lambda, IterNumb);
+}
diff --git a/src/Matlab/mex_compile/regularisers_CPU/PatchSelect.c b/src/Matlab/mex_compile/regularisers_CPU/PatchSelect.c
new file mode 100644
index 0000000..f942539
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/PatchSelect.c
@@ -0,0 +1,92 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC and Diamond Light Source Ltd.
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ * Copyright 2018 Diamond Light Source Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "matrix.h"
+#include "mex.h"
+#include "PatchSelect_core.h"
+
+/* C-OMP implementation of non-local weight pre-calculation for non-local priors
+ * Weights and associated indices are stored into pre-allocated arrays and passed
+ * to the regulariser
+ *
+ *
+ * Input Parameters:
+ * 1. 2D/3D grayscale image/volume
+ * 2. Searching window (half-size of the main bigger searching window, e.g. 11)
+ * 3. Similarity window (half-size of the patch window, e.g. 2)
+ * 4. The number of neighbours to take (the most prominent after sorting neighbours will be taken)
+ * 5. noise-related parameter to calculate non-local weights
+ *
+ * Output [2D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. Weights_ij - associated weights
+ *
+ * Output [3D]:
+ * 1. AR_i - indeces of i neighbours
+ * 2. AR_j - indeces of j neighbours
+ * 3. AR_k - indeces of j neighbours
+ * 4. Weights_ijk - associated weights
+ */
+/**************************************************/
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+{
+ int number_of_dims, SearchWindow, SimilarWin, NumNeighb;
+ mwSize dimX, dimY, dimZ;
+ unsigned short *H_i=NULL, *H_j=NULL, *H_k=NULL;
+ const int *dim_array;
+ float *A, *Weights = NULL, h;
+ int dim_array2[3]; /* for 2D data */
+ int dim_array3[4]; /* for 3D data */
+
+ dim_array = mxGetDimensions(prhs[0]);
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+
+ /*Handling Matlab input data*/
+ A = (float *) mxGetData(prhs[0]); /* a 2D or 3D image/volume */
+ SearchWindow = (int) mxGetScalar(prhs[1]); /* Large Searching window */
+ SimilarWin = (int) mxGetScalar(prhs[2]); /* Similarity window (patch-search)*/
+ NumNeighb = (int) mxGetScalar(prhs[3]); /* the total number of neighbours to take */
+ h = (float) mxGetScalar(prhs[4]); /* NLM parameter */
+
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+ dim_array2[0] = dimX; dim_array2[1] = dimY; dim_array2[2] = NumNeighb; /* 2D case */
+ dim_array3[0] = dimX; dim_array3[1] = dimY; dim_array3[2] = dimZ; dim_array3[3] = NumNeighb; /* 3D case */
+
+ /****************2D INPUT ***************/
+ if (number_of_dims == 2) {
+ dimZ = 0;
+ H_i = (unsigned short*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array2, mxUINT16_CLASS, mxREAL));
+ H_j = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(3, dim_array2, mxUINT16_CLASS, mxREAL));
+ Weights = (float*)mxGetPr(plhs[2] = mxCreateNumericArray(3, dim_array2, mxSINGLE_CLASS, mxREAL));
+ }
+ /****************3D INPUT ***************/
+ if (number_of_dims == 3) {
+ H_i = (unsigned short*)mxGetPr(plhs[0] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL));
+ H_j = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL));
+ H_k = (unsigned short*)mxGetPr(plhs[2] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL));
+ Weights = (float*)mxGetPr(plhs[3] = mxCreateNumericArray(4, dim_array3, mxSINGLE_CLASS, mxREAL));
+ }
+
+ PatchSelect_CPU_main(A, H_i, H_j, H_k, Weights, (long)(dimX), (long)(dimY), (long)(dimZ), SearchWindow, SimilarWin, NumNeighb, h, 0);
+
+ }
diff --git a/src/Matlab/mex_compile/regularisers_CPU/ROF_TV.c b/src/Matlab/mex_compile/regularisers_CPU/ROF_TV.c
new file mode 100644
index 0000000..55ef2b1
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/ROF_TV.c
@@ -0,0 +1,77 @@
+
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "ROF_TV_core.h"
+
+/* ROF-TV denoising/regularization model [1] (2D/3D case)
+ * (MEX wrapper for MATLAB)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. lambda - regularization parameter [REQUIRED]
+ * 3. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED]
+ * 4. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
+ *
+ * Output:
+ * [1] Regularized image/volume
+ *
+ * This function is based on the paper by
+ * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+ *
+ * D. Kazantsev, 2016-18
+ */
+
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, iter_numb;
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array_i;
+ float *Input, *Output=NULL, lambda, tau;
+
+ dim_array_i = mxGetDimensions(prhs[0]);
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+
+ /*Handling Matlab input data*/
+ Input = (float *) mxGetData(prhs[0]);
+ lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */
+ iter_numb = (int) mxGetScalar(prhs[2]); /* iterations number */
+ tau = (float) mxGetScalar(prhs[3]); /* marching step parameter */
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+ if(nrhs != 4) mexErrMsgTxt("Four inputs reqired: Image(2D,3D), regularization parameter, iterations number, marching step constant");
+ /*Handling Matlab output data*/
+ dimX = dim_array_i[0]; dimY = dim_array_i[1]; dimZ = dim_array_i[2];
+
+ /* output arrays*/
+ if (number_of_dims == 2) {
+ dimZ = 1; /*2D case*/
+ /* output image/volume */
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array_i, mxSINGLE_CLASS, mxREAL));
+ }
+ if (number_of_dims == 3) {
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array_i, mxSINGLE_CLASS, mxREAL));
+ }
+
+ TV_ROF_CPU_main(Input, Output, lambda, iter_numb, tau, dimX, dimY, dimZ);
+} \ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_CPU/SB_TV.c b/src/Matlab/mex_compile/regularisers_CPU/SB_TV.c
new file mode 100644
index 0000000..8636322
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/SB_TV.c
@@ -0,0 +1,91 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "SB_TV_core.h"
+
+/* C-OMP implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
+*
+* Input Parameters:
+* 1. Noisy image/volume
+* 2. lambda - regularisation parameter
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon - tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+* 6. print information: 0 (off) or 1 (on) [OPTIONAL parameter]
+*
+* Output:
+* 1. Filtered/regularized image
+*
+* This function is based on the Matlab's code and paper by
+* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
+*/
+
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, iter, methTV, printswitch;
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array;
+
+ float *Input, *Output=NULL, lambda, epsil;
+
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+ dim_array = mxGetDimensions(prhs[0]);
+
+ /*Handling Matlab input data*/
+ if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter, Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), print switch");
+
+ Input = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+ lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */
+ iter = 100; /* default iterations number */
+ epsil = 0.0001; /* default tolerance constant */
+ methTV = 0; /* default isotropic TV penalty */
+ printswitch = 0; /*default print is switched, off - 0 */
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+
+ if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6)) iter = (int) mxGetScalar(prhs[2]); /* iterations number */
+ if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6)) epsil = (float) mxGetScalar(prhs[3]); /* tolerance constant */
+ if ((nrhs == 5) || (nrhs == 6)) {
+ char *penalty_type;
+ penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+ if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+ if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */
+ mxFree(penalty_type);
+ }
+ if (nrhs == 6) {
+ printswitch = (int) mxGetScalar(prhs[5]);
+ if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
+ }
+
+ /*Handling Matlab output data*/
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
+ if (number_of_dims == 2) {
+ dimZ = 1; /*2D case*/
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+ if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+
+ /* running the function */
+ SB_TV_CPU_main(Input, Output, lambda, iter, epsil, methTV, printswitch, dimX, dimY, dimZ);
+}
diff --git a/src/Matlab/mex_compile/regularisers_CPU/TGV.c b/src/Matlab/mex_compile/regularisers_CPU/TGV.c
new file mode 100644
index 0000000..aa4eed4
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/TGV.c
@@ -0,0 +1,83 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "mex.h"
+#include "TGV_core.h"
+
+/* C-OMP implementation of Primal-Dual denoising method for
+ * Total Generilized Variation (TGV)-L2 model [1] (2D/3D)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume (2D/3D)
+ * 2. lambda - regularisation parameter
+ * 3. parameter to control the first-order term (alpha1)
+ * 4. parameter to control the second-order term (alpha0)
+ * 5. Number of Chambolle-Pock (Primal-Dual) iterations
+ * 6. Lipshitz constant (default is 12)
+ *
+ * Output:
+ * Filtered/regulariaed image
+ *
+ * References:
+ * [1] K. Bredies "Total Generalized Variation"
+ */
+
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, iter;
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array;
+
+ float *Input, *Output=NULL, lambda, alpha0, alpha1, L2;
+
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+ dim_array = mxGetDimensions(prhs[0]);
+
+ /*Handling Matlab input data*/
+ if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D), Regularisation parameter, alpha0, alpha1, iterations number, Lipshitz Constant");
+
+ Input = (float *) mxGetData(prhs[0]); /*noisy image/volume */
+ lambda = (float) mxGetScalar(prhs[1]); /* regularisation parameter */
+ alpha1 = 1.0f; /* parameter to control the first-order term */
+ alpha0 = 0.5f; /* parameter to control the second-order term */
+ iter = 300; /* Iterations number */
+ L2 = 12.0f; /* Lipshitz constant */
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+ if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6)) alpha1 = (float) mxGetScalar(prhs[2]); /* parameter to control the first-order term */
+ if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6)) alpha0 = (float) mxGetScalar(prhs[3]); /* parameter to control the second-order term */
+ if ((nrhs == 5) || (nrhs == 6)) iter = (int) mxGetScalar(prhs[4]); /* Iterations number */
+ if (nrhs == 6) L2 = (float) mxGetScalar(prhs[5]); /* Lipshitz constant */
+
+ /*Handling Matlab output data*/
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
+ if (number_of_dims == 2) {
+ dimZ = 1; /*2D case*/
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+ if (number_of_dims == 3) {
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+ /* running the function */
+ TGV_main(Input, Output, lambda, alpha1, alpha0, iter, L2, dimX, dimY, dimZ);
+}
diff --git a/src/Matlab/mex_compile/regularisers_CPU/TNV.c b/src/Matlab/mex_compile/regularisers_CPU/TNV.c
new file mode 100644
index 0000000..acea75d
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/TNV.c
@@ -0,0 +1,74 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "TNV_core.h"
+/*
+ * C-OMP implementation of Total Nuclear Variation regularisation model (2D + channels) [1]
+ * The code is modified from the implementation by Joan Duran <joan.duran@uib.es> see
+ * "denoisingPDHG_ipol.cpp" in Joans Collaborative Total Variation package
+ *
+ * Input Parameters:
+ * 1. Noisy volume of 2D + channel dimension, i.e. 3D volume
+ * 2. lambda - regularisation parameter
+ * 3. Number of iterations [OPTIONAL parameter]
+ * 4. eplsilon - tolerance constant [OPTIONAL parameter]
+ * 5. print information: 0 (off) or 1 (on) [OPTIONAL parameter]
+ *
+ * Output:
+ * 1. Filtered/regularized image
+ *
+ * [1]. Duran, J., Moeller, M., Sbert, C. and Cremers, D., 2016. Collaborative total variation: a general framework for vectorial TV models. SIAM Journal on Imaging Sciences, 9(1), pp.116-151.
+ */
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, iter;
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array;
+ float *Input, *Output=NULL, lambda, epsil;
+
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+ dim_array = mxGetDimensions(prhs[0]);
+
+ /*Handling Matlab input data*/
+ if ((nrhs < 2) || (nrhs > 4)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D + channels), Regularisation parameter, Regularization parameter, iterations number, tolerance");
+
+ Input = (float *) mxGetData(prhs[0]); /* noisy sequence of channels (2D + channels) */
+ lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */
+ iter = 1000; /* default iterations number */
+ epsil = 1.00e-05; /* default tolerance constant */
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+
+ if ((nrhs == 3) || (nrhs == 4)) iter = (int) mxGetScalar(prhs[2]); /* iterations number */
+ if (nrhs == 4) epsil = (float) mxGetScalar(prhs[3]); /* tolerance constant */
+
+ /*Handling Matlab output data*/
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
+ if (number_of_dims == 2) mexErrMsgTxt("The input must be 3D: [X,Y,Channels]");
+ if (number_of_dims == 3) {
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+ /* running the function */
+ TNV_CPU_main(Input, Output, lambda, iter, epsil, dimX, dimY, dimZ);
+ }
+} \ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_CPU/TV_energy.c b/src/Matlab/mex_compile/regularisers_CPU/TV_energy.c
new file mode 100644
index 0000000..d457f46
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_CPU/TV_energy.c
@@ -0,0 +1,72 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "utils.h"
+/*
+ * Function to calculate TV energy value with respect to the denoising variational problem
+ *
+ * Input:
+ * 1. Denoised Image/volume
+ * 2. Original (noisy) Image/volume
+ * 3. lambda - regularisation parameter
+ *
+ * Output:
+ * 1. Energy function value
+ *
+ */
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, type;
+
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array;
+ float *Input, *Input0, lambda;
+
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+ dim_array = mxGetDimensions(prhs[0]);
+
+ /*Handling Matlab input data*/
+ if ((nrhs != 4)) mexErrMsgTxt("4 inputs: Two images or volumes of the same size required, estimated and the original (noisy), regularisation parameter, type");
+
+ Input = (float *) mxGetData(prhs[0]); /* Denoised Image/volume */
+ Input0 = (float *) mxGetData(prhs[1]); /* Original (noisy) Image/volume */
+ lambda = (float) mxGetScalar(prhs[2]); /* regularisation parameter */
+ type = (int) mxGetScalar(prhs[3]); /* type of energy */
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+ if (mxGetClassID(prhs[1]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+
+ /*output energy function value */
+ plhs[0] = mxCreateNumericMatrix(1, 1, mxSINGLE_CLASS, mxREAL);
+ float *funcvalA = (float *) mxGetData(plhs[0]);
+
+ /*Handling Matlab output data*/
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
+ if (number_of_dims == 2) {
+ TV_energy2D(Input, Input0, funcvalA, lambda, type, dimX, dimY);
+ }
+ if (number_of_dims == 3) {
+ TV_energy3D(Input, Input0, funcvalA, lambda, type, dimX, dimY, dimZ);
+ }
+}
diff --git a/src/Matlab/mex_compile/regularisers_GPU/Diffusion_4thO_GPU.cpp b/src/Matlab/mex_compile/regularisers_GPU/Diffusion_4thO_GPU.cpp
new file mode 100644
index 0000000..0cc042b
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_GPU/Diffusion_4thO_GPU.cpp
@@ -0,0 +1,77 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "Diffus_4thO_GPU_core.h"
+
+/* CUDA implementation of fourth-order diffusion scheme [1] for piecewise-smooth recovery (2D/3D case)
+ * The minimisation is performed using explicit scheme.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. lambda - regularization parameter [REQUIRED]
+ * 3. Edge-preserving parameter (sigma) [REQUIRED]
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended [OPTIONAL, default 300]
+ * 5. tau - time-marching step for the explicit scheme [OPTIONAL, default 0.015]
+ *
+ * Output:
+ * [1] Regularized image/volume
+ *
+ * This function is based on the paper by
+ * [1] Hajiaboli, M.R., 2011. An anisotropic fourth-order diffusion filter for image noise removal. International Journal of Computer Vision, 92(2), pp.177-191.
+ */
+
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, iter_numb;
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array;
+ float *Input, *Output=NULL, lambda, tau, sigma;
+
+ dim_array = mxGetDimensions(prhs[0]);
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+
+ /*Handling Matlab input data*/
+ Input = (float *) mxGetData(prhs[0]);
+ lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */
+ sigma = (float) mxGetScalar(prhs[2]); /* Edge-preserving parameter */
+ iter_numb = 300; /* iterations number */
+ tau = 0.01; /* marching step parameter */
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+ if ((nrhs < 3) || (nrhs > 5)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Regularisation parameter, Edge-preserving parameter, iterations number, time-marching constant");
+ if ((nrhs == 4) || (nrhs == 5)) iter_numb = (int) mxGetScalar(prhs[3]); /* iterations number */
+ if (nrhs == 5) tau = (float) mxGetScalar(prhs[4]); /* marching step parameter */
+
+ /*Handling Matlab output data*/
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
+ /* output arrays*/
+ if (number_of_dims == 2) {
+ dimZ = 1; /*2D case*/
+ /* output image/volume */
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+ if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+
+ Diffus4th_GPU_main(Input, Output, lambda, sigma, iter_numb, tau, dimX, dimY, dimZ);
+} \ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_GPU/FGP_TV_GPU.cpp b/src/Matlab/mex_compile/regularisers_GPU/FGP_TV_GPU.cpp
new file mode 100644
index 0000000..c174e75
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_GPU/FGP_TV_GPU.cpp
@@ -0,0 +1,97 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "TV_FGP_GPU_core.h"
+
+/* GPU (CUDA) implementation of FGP-TV [1] denoising/regularization model (2D/3D case)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambdaPar - regularization parameter
+ * 3. Number of iterations
+ * 4. eplsilon: tolerance constant
+ * 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
+ * 6. nonneg: 'nonnegativity (0 is OFF by default)
+ * 7. print information: 0 (off) or 1 (on)
+ *
+ * Output:
+ * [1] Filtered/regularized image
+ *
+ * This function is based on the Matlab's code and paper by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ */
+
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, iter, methTV, printswitch, nonneg;
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array;
+
+ float *Input, *Output=NULL, lambda, epsil;
+
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+ dim_array = mxGetDimensions(prhs[0]);
+
+ /*Handling Matlab input data*/
+ if ((nrhs < 2) || (nrhs > 7)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter. The full list of parameters: Image(2D/3D), Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), nonnegativity switch, print switch");
+
+ Input = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+ lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */
+ iter = 300; /* default iterations number */
+ epsil = 0.0001; /* default tolerance constant */
+ methTV = 0; /* default isotropic TV penalty */
+ nonneg = 0; /* default nonnegativity switch, off - 0 */
+ printswitch = 0; /*default print is switched, off - 0 */
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+
+ if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7)) iter = (int) mxGetScalar(prhs[2]); /* iterations number */
+ if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7)) epsil = (float) mxGetScalar(prhs[3]); /* tolerance constant */
+ if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7)) {
+ char *penalty_type;
+ penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+ if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+ if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */
+ mxFree(penalty_type);
+ }
+ if ((nrhs == 6) || (nrhs == 7)) {
+ nonneg = (int) mxGetScalar(prhs[5]);
+ if ((nonneg != 0) && (nonneg != 1)) mexErrMsgTxt("Nonnegativity constraint can be enabled by choosing 1 or off - 0");
+ }
+ if (nrhs == 7) {
+ printswitch = (int) mxGetScalar(prhs[6]);
+ if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
+ }
+
+ /*Handling Matlab output data*/
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
+ if (number_of_dims == 2) {
+ dimZ = 1; /*2D case*/
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+ if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+
+ /* running the function */
+ TV_FGP_GPU_main(Input, Output, lambda, iter, epsil, methTV, nonneg, printswitch, dimX, dimY, dimZ);
+} \ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp b/src/Matlab/mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp
new file mode 100644
index 0000000..3f5a4b3
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_GPU/FGP_dTV_GPU.cpp
@@ -0,0 +1,113 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "dTV_FGP_GPU_core.h"
+
+/* CUDA implementation of FGP-dTV [1,2] denoising/regularization model (2D/3D case)
+ * which employs structural similarity of the level sets of two images/volumes, see [1,2]
+ * The current implementation updates image 1 while image 2 is being fixed.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. Additional reference image/volume of the same dimensions as (1) [REQUIRED]
+ * 3. lambdaPar - regularization parameter [REQUIRED]
+ * 4. Number of iterations [OPTIONAL]
+ * 5. eplsilon: tolerance constant [OPTIONAL]
+ * 6. eta: smoothing constant to calculate gradient of the reference [OPTIONAL] *
+ * 7. TV-type: methodTV - 'iso' (0) or 'l1' (1) [OPTIONAL]
+ * 8. nonneg: 'nonnegativity (0 is OFF by default) [OPTIONAL]
+ * 9. print information: 0 (off) or 1 (on) [OPTIONAL]
+ *
+ * Output:
+ * [1] Filtered/regularized image/volume
+ *
+ * This function is based on the Matlab's codes and papers by
+ * [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
+ * [2] M. J. Ehrhardt and M. M. Betcke, Multi-Contrast MRI Reconstruction with Structure-Guided Total Variation, SIAM Journal on Imaging Sciences 9(3), pp. 1084–1106
+ */
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, iter, methTV, printswitch, nonneg;
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array;
+ const mwSize *dim_array2;
+
+ float *Input, *InputRef, *Output=NULL, lambda, epsil, eta;
+
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+ dim_array = mxGetDimensions(prhs[0]);
+ dim_array2 = mxGetDimensions(prhs[1]);
+
+ /*Handling Matlab input data*/
+ if ((nrhs < 3) || (nrhs > 9)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Reference(2D/3D), Regularization parameter, iterations number, tolerance, smoothing constant, penalty type ('iso' or 'l1'), nonnegativity switch, print switch");
+
+ Input = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+ InputRef = (float *) mxGetData(prhs[1]); /* reference image (2D/3D) */
+ lambda = (float) mxGetScalar(prhs[2]); /* regularization parameter */
+ iter = 300; /* default iterations number */
+ epsil = 0.0001; /* default tolerance constant */
+ eta = 0.01; /* default smoothing constant */
+ methTV = 0; /* default isotropic TV penalty */
+ nonneg = 0; /* default nonnegativity switch, off - 0 */
+ printswitch = 0; /*default print is switched, off - 0 */
+
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+ if (mxGetClassID(prhs[1]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+
+ /*Handling Matlab output data*/
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+ if (number_of_dims == 2) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1])) mexErrMsgTxt("The input images have different dimensionalities");}
+ if (number_of_dims == 3) { if ((dimX != dim_array2[0]) || (dimY != dim_array2[1]) || (dimZ != dim_array2[2])) mexErrMsgTxt("The input volumes have different dimensionalities");}
+
+
+ if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9)) iter = (int) mxGetScalar(prhs[3]); /* iterations number */
+ if ((nrhs == 5) || (nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9)) epsil = (float) mxGetScalar(prhs[4]); /* tolerance constant */
+ if ((nrhs == 6) || (nrhs == 7) || (nrhs == 8) || (nrhs == 9)) {
+ eta = (float) mxGetScalar(prhs[5]); /* smoothing constant for the gradient of InputRef */
+ }
+ if ((nrhs == 7) || (nrhs == 8) || (nrhs == 9)) {
+ char *penalty_type;
+ penalty_type = mxArrayToString(prhs[6]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+ if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+ if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */
+ mxFree(penalty_type);
+ }
+ if ((nrhs == 8) || (nrhs == 9)) {
+ nonneg = (int) mxGetScalar(prhs[7]);
+ if ((nonneg != 0) && (nonneg != 1)) mexErrMsgTxt("Nonnegativity constraint can be enabled by choosing 1 or off - 0");
+ }
+ if (nrhs == 9) {
+ printswitch = (int) mxGetScalar(prhs[8]);
+ if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
+ }
+
+ if (number_of_dims == 2) {
+ dimZ = 1; /*2D case*/
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+ if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+
+ /* running the function */
+ dTV_FGP_GPU_main(Input, InputRef, Output, lambda, iter, epsil, eta, methTV, nonneg, printswitch, dimX, dimY, dimZ);
+} \ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_GPU/LLT_ROF_GPU.cpp b/src/Matlab/mex_compile/regularisers_GPU/LLT_ROF_GPU.cpp
new file mode 100644
index 0000000..e8da4ce
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_GPU/LLT_ROF_GPU.cpp
@@ -0,0 +1,83 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "LLT_ROF_GPU_core.h"
+
+/* CUDA implementation of Lysaker, Lundervold and Tai (LLT) model [1] combined with Rudin-Osher-Fatemi [2] TV regularisation penalty.
+*
+* This penalty can deliver visually pleasant piecewise-smooth recovery if regularisation parameters are selected well.
+* The rule of thumb for selection is to start with lambdaLLT = 0 (just the ROF-TV model) and then proceed to increase
+* lambdaLLT starting with smaller values.
+*
+* Input Parameters:
+* 1. U0 - original noise image/volume
+* 2. lambdaROF - ROF-related regularisation parameter
+* 3. lambdaLLT - LLT-related regularisation parameter
+* 4. tau - time-marching step
+* 5. iter - iterations number (for both models)
+*
+* Output:
+* Filtered/regularised image
+*
+* References:
+* [1] Lysaker, M., Lundervold, A. and Tai, X.C., 2003. Noise removal using fourth-order partial differential equation with applications to medical magnetic resonance images in space and time. IEEE Transactions on image processing, 12(12), pp.1579-1590.
+* [2] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+*/
+
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, iterationsNumb;
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array;
+
+ float *Input, *Output=NULL, lambdaROF, lambdaLLT, tau;
+
+ dim_array = mxGetDimensions(prhs[0]);
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+
+ if ((nrhs < 3) || (nrhs > 5)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Regularisation parameter (ROF), Regularisation parameter (LTT), iterations number, time-marching parameter");
+
+ /*Handling Matlab input data*/
+ Input = (float *) mxGetData(prhs[0]);
+ lambdaROF = (float) mxGetScalar(prhs[1]); /* ROF regularization parameter */
+ lambdaLLT = (float) mxGetScalar(prhs[2]); /* ROF regularization parameter */
+ iterationsNumb = 250;
+ tau = 0.0025;
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+ if ((nrhs == 4) || (nrhs == 5)) iterationsNumb = (int) mxGetScalar(prhs[3]); /* iterations number */
+ if (nrhs == 5) tau = (float) mxGetScalar(prhs[4]); /* marching step parameter */
+
+ /*Handling Matlab output data*/
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
+ /* output arrays*/
+ if (number_of_dims == 2) {
+ dimZ = 1; /*2D case*/
+ /* output image/volume */
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+ if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+
+ LLT_ROF_GPU_main(Input, Output, lambdaROF, lambdaLLT, iterationsNumb, tau, dimX, dimY, dimZ);
+} \ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_GPU/NonlDiff_GPU.cpp b/src/Matlab/mex_compile/regularisers_GPU/NonlDiff_GPU.cpp
new file mode 100644
index 0000000..1cd0cdc
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_GPU/NonlDiff_GPU.cpp
@@ -0,0 +1,92 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include <stdio.h>
+#include <string.h>
+#include "NonlDiff_GPU_core.h"
+
+/* CUDA implementation of linear and nonlinear diffusion with the regularisation model [1,2] (2D/3D case)
+ * The minimisation is performed using explicit scheme.
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume
+ * 2. lambda - regularization parameter
+ * 3. Edge-preserving parameter (sigma), when sigma equals to zero nonlinear diffusion -> linear diffusion
+ * 4. Number of iterations, for explicit scheme >= 150 is recommended
+ * 5. tau - time-marching step for explicit scheme
+ * 6. Penalty type: 1 - Huber, 2 - Perona-Malik, 3 - Tukey Biweight
+ *
+ * Output:
+ * [1] Regularized image/volume
+ *
+ * This function is based on the paper by
+ * [1] Perona, P. and Malik, J., 1990. Scale-space and edge detection using anisotropic diffusion. IEEE Transactions on pattern analysis and machine intelligence, 12(7), pp.629-639.
+ * [2] Black, M.J., Sapiro, G., Marimont, D.H. and Heeger, D., 1998. Robust anisotropic diffusion. IEEE Transactions on image processing, 7(3), pp.421-432.
+ */
+
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, iter_numb, penaltytype;
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array;
+
+ float *Input, *Output=NULL, lambda, tau, sigma;
+
+ dim_array = mxGetDimensions(prhs[0]);
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+
+ /*Handling Matlab input data*/
+ Input = (float *) mxGetData(prhs[0]);
+ lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */
+ sigma = (float) mxGetScalar(prhs[2]); /* Edge-preserving parameter */
+ iter_numb = 300; /* iterations number */
+ tau = 0.025; /* marching step parameter */
+ penaltytype = 1; /* Huber penalty by default */
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+ if ((nrhs < 3) || (nrhs > 6)) mexErrMsgTxt("At least 3 parameters is required, all parameters are: Image(2D/3D), Regularisation parameter, Edge-preserving parameter, iterations number, time-marching constant, penalty type - Huber, PM or Tukey");
+ if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6)) iter_numb = (int) mxGetScalar(prhs[3]); /* iterations number */
+ if ((nrhs == 5) || (nrhs == 6)) tau = (float) mxGetScalar(prhs[4]); /* marching step parameter */
+ if (nrhs == 6) {
+ char *penalty_type;
+ penalty_type = mxArrayToString(prhs[5]); /* Huber, PM or Tukey 'Huber' is the default */
+ if ((strcmp(penalty_type, "Huber") != 0) && (strcmp(penalty_type, "PM") != 0) && (strcmp(penalty_type, "Tukey") != 0)) mexErrMsgTxt("Choose penalty: 'Huber', 'PM' or 'Tukey',");
+ if (strcmp(penalty_type, "Huber") == 0) penaltytype = 1; /* enable 'Huber' penalty */
+ if (strcmp(penalty_type, "PM") == 0) penaltytype = 2; /* enable Perona-Malik penalty */
+ if (strcmp(penalty_type, "Tukey") == 0) penaltytype = 3; /* enable Tikey Biweight penalty */
+ mxFree(penalty_type);
+ }
+
+ /*Handling Matlab output data*/
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
+ /* output arrays*/
+ if (number_of_dims == 2) {
+ dimZ = 1; /*2D case*/
+ /* output image/volume */
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+ if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+
+ NonlDiff_GPU_main(Input, Output, lambda, sigma, iter_numb, tau, penaltytype, dimX, dimY, dimZ);
+} \ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_GPU/ROF_TV_GPU.cpp b/src/Matlab/mex_compile/regularisers_GPU/ROF_TV_GPU.cpp
new file mode 100644
index 0000000..bd01d55
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_GPU/ROF_TV_GPU.cpp
@@ -0,0 +1,74 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "TV_ROF_GPU_core.h"
+
+/* ROF-TV denoising/regularization model [1] (2D/3D case)
+ * (MEX wrapper for MATLAB)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume [REQUIRED]
+ * 2. lambda - regularization parameter [REQUIRED]
+ * 3. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED]
+ * 4. tau - marching step for explicit scheme, ~1 is recommended [REQUIRED]
+ *
+ * Output:
+ * [1] Regularized image/volume
+ *
+ * This function is based on the paper by
+ * [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
+ *
+ * D. Kazantsev, 2016-18
+ */
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, iter_numb;
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array;
+
+ float *Input, *Output=NULL, lambda, tau;
+
+ dim_array = mxGetDimensions(prhs[0]);
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+
+ /*Handling Matlab input data*/
+ Input = (float *) mxGetData(prhs[0]);
+ lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */
+ iter_numb = (int) mxGetScalar(prhs[2]); /* iterations number */
+ tau = (float) mxGetScalar(prhs[3]); /* marching step parameter */
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+ if(nrhs != 4) mexErrMsgTxt("Four inputs reqired: Image(2D,3D), regularization parameter, iterations number, marching step constant");
+ /*Handling Matlab output data*/
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
+ /* output arrays*/
+ if (number_of_dims == 2) {
+ dimZ = 1; /*2D case*/
+ /* output image/volume */
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+ if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+
+ TV_ROF_GPU_main(Input, Output, lambda, iter_numb, tau, dimX, dimY, dimZ);
+} \ No newline at end of file
diff --git a/src/Matlab/mex_compile/regularisers_GPU/SB_TV_GPU.cpp b/src/Matlab/mex_compile/regularisers_GPU/SB_TV_GPU.cpp
new file mode 100644
index 0000000..9d1328f
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_GPU/SB_TV_GPU.cpp
@@ -0,0 +1,91 @@
+/*
+ * This work is part of the Core Imaging Library developed by
+ * Visual Analytics and Imaging System Group of the Science Technology
+ * Facilities Council, STFC
+ *
+ * Copyright 2017 Daniil Kazantsev
+ * Copyright 2017 Srikanth Nagella, Edoardo Pasca
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "matrix.h"
+#include "mex.h"
+#include "TV_SB_GPU_core.h"
+
+/* CUDA mex-file for implementation of Split Bregman - TV denoising-regularisation model (2D/3D) [1]
+*
+* Input Parameters:
+* 1. Noisy image/volume
+* 2. lambda - regularisation parameter
+* 3. Number of iterations [OPTIONAL parameter]
+* 4. eplsilon - tolerance constant [OPTIONAL parameter]
+* 5. TV-type: 'iso' or 'l1' [OPTIONAL parameter]
+* 6. print information: 0 (off) or 1 (on) [OPTIONAL parameter]
+*
+* Output:
+* 1. Filtered/regularized image
+*
+* This function is based on the Matlab's code and paper by
+* [1]. Goldstein, T. and Osher, S., 2009. The split Bregman method for L1-regularized problems. SIAM journal on imaging sciences, 2(2), pp.323-343.
+*/
+
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, iter, methTV, printswitch;
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array;
+
+ float *Input, *Output=NULL, lambda, epsil;
+
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+ dim_array = mxGetDimensions(prhs[0]);
+
+ /*Handling Matlab input data*/
+ if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D/3D), Regularization parameter, Regularization parameter, iterations number, tolerance, penalty type ('iso' or 'l1'), print switch");
+
+ Input = (float *) mxGetData(prhs[0]); /*noisy image (2D/3D) */
+ lambda = (float) mxGetScalar(prhs[1]); /* regularization parameter */
+ iter = 100; /* default iterations number */
+ epsil = 0.0001; /* default tolerance constant */
+ methTV = 0; /* default isotropic TV penalty */
+ printswitch = 0; /*default print is switched, off - 0 */
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+
+ if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6)) iter = (int) mxGetScalar(prhs[2]); /* iterations number */
+ if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6)) epsil = (float) mxGetScalar(prhs[3]); /* tolerance constant */
+ if ((nrhs == 5) || (nrhs == 6)) {
+ char *penalty_type;
+ penalty_type = mxArrayToString(prhs[4]); /* choosing TV penalty: 'iso' or 'l1', 'iso' is the default */
+ if ((strcmp(penalty_type, "l1") != 0) && (strcmp(penalty_type, "iso") != 0)) mexErrMsgTxt("Choose TV type: 'iso' or 'l1',");
+ if (strcmp(penalty_type, "l1") == 0) methTV = 1; /* enable 'l1' penalty */
+ mxFree(penalty_type);
+ }
+ if (nrhs == 6) {
+ printswitch = (int) mxGetScalar(prhs[5]);
+ if ((printswitch != 0) && (printswitch != 1)) mexErrMsgTxt("Print can be enabled by choosing 1 or off - 0");
+ }
+
+ /*Handling Matlab output data*/
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
+ if (number_of_dims == 2) {
+ dimZ = 1; /*2D case*/
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+ if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+
+ /* running the function */
+ TV_SB_GPU_main(Input, Output, lambda, iter, epsil, methTV, printswitch, dimX, dimY, dimZ);
+}
diff --git a/src/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp b/src/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp
new file mode 100644
index 0000000..1173282
--- /dev/null
+++ b/src/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp
@@ -0,0 +1,81 @@
+/*
+This work is part of the Core Imaging Library developed by
+Visual Analytics and Imaging System Group of the Science Technology
+Facilities Council, STFC
+
+Copyright 2017 Daniil Kazantsev
+Copyright 2017 Srikanth Nagella, Edoardo Pasca
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "mex.h"
+#include "TGV_GPU_core.h"
+
+/* CUDA implementation of Primal-Dual denoising method for
+ * Total Generilized Variation (TGV)-L2 model [1] (2D/3D)
+ *
+ * Input Parameters:
+ * 1. Noisy image/volume (2D/3D)
+ * 2. lambda - regularisation parameter
+ * 3. parameter to control the first-order term (alpha1)
+ * 4. parameter to control the second-order term (alpha0)
+ * 5. Number of Chambolle-Pock (Primal-Dual) iterations
+ * 6. Lipshitz constant (default is 12)
+ *
+ * Output:
+ * Filtered/regularised image
+ *
+ * References:
+ * [1] K. Bredies "Total Generalized Variation"
+ */
+
+void mexFunction(
+ int nlhs, mxArray *plhs[],
+ int nrhs, const mxArray *prhs[])
+
+{
+ int number_of_dims, iter;
+ mwSize dimX, dimY, dimZ;
+ const mwSize *dim_array;
+ float *Input, *Output=NULL, lambda, alpha0, alpha1, L2;
+
+ number_of_dims = mxGetNumberOfDimensions(prhs[0]);
+ dim_array = mxGetDimensions(prhs[0]);
+
+ /*Handling Matlab input data*/
+ if ((nrhs < 2) || (nrhs > 6)) mexErrMsgTxt("At least 2 parameters is required, all parameters are: Image(2D), Regularisation parameter, alpha0, alpha1, iterations number, Lipshitz Constant");
+
+ Input = (float *) mxGetData(prhs[0]); /*noisy image (2D) */
+ lambda = (float) mxGetScalar(prhs[1]); /* regularisation parameter */
+ alpha1 = 1.0f; /* parameter to control the first-order term */
+ alpha0 = 2.0f; /* parameter to control the second-order term */
+ iter = 500; /* Iterations number */
+ L2 = 12.0f; /* Lipshitz constant */
+
+ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); }
+ if ((nrhs == 3) || (nrhs == 4) || (nrhs == 5) || (nrhs == 6)) alpha1 = (float) mxGetScalar(prhs[2]); /* parameter to control the first-order term */
+ if ((nrhs == 4) || (nrhs == 5) || (nrhs == 6)) alpha0 = (float) mxGetScalar(prhs[3]); /* parameter to control the second-order term */
+ if ((nrhs == 5) || (nrhs == 6)) iter = (int) mxGetScalar(prhs[4]); /* Iterations number */
+ if (nrhs == 6) L2 = (float) mxGetScalar(prhs[5]); /* Lipshitz constant */
+
+ /*Handling Matlab output data*/
+ dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2];
+
+ if (number_of_dims == 2) {
+ dimZ = 1; /*2D case*/
+ Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL));
+ }
+ if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL));
+
+ /* running the function */
+ TGV_GPU_main(Input, Output, lambda, alpha1, alpha0, iter, L2, dimX, dimY, dimZ);
+}
diff --git a/src/Matlab/supp/RMSE.m b/src/Matlab/supp/RMSE.m
new file mode 100644
index 0000000..002f776
--- /dev/null
+++ b/src/Matlab/supp/RMSE.m
@@ -0,0 +1,7 @@
+function err = RMSE(signal1, signal2)
+%RMSE Root Mean Squared Error
+
+err = sum((signal1 - signal2).^2)/length(signal1); % MSE
+err = sqrt(err); % RMSE
+
+end \ No newline at end of file
diff --git a/src/Matlab/supp/my_red_yellowMAP.mat b/src/Matlab/supp/my_red_yellowMAP.mat
new file mode 100644
index 0000000..c2a5b87
--- /dev/null
+++ b/src/Matlab/supp/my_red_yellowMAP.mat
Binary files differ
diff --git a/src/Python/CMakeLists.txt b/src/Python/CMakeLists.txt
new file mode 100644
index 0000000..ab95ecc
--- /dev/null
+++ b/src/Python/CMakeLists.txt
@@ -0,0 +1,141 @@
+# Copyright 2018 Edoardo Pasca
+cmake_minimum_required (VERSION 3.0)
+
+project(regulariserPython)
+#https://stackoverflow.com/questions/13298504/using-cmake-with-setup-py
+
+# The version number.
+
+#set (CIL_VERSION $ENV{CIL_VERSION} CACHE INTERNAL "Core Imaging Library version" FORCE)
+message("Creating Python Wrapper")
+# conda orchestrated build
+message("CIL_VERSION: ${CIL_VERSION}")
+#include (GenerateExportHeader)
+
+find_package(PythonInterp REQUIRED)
+if (PYTHONINTERP_FOUND)
+ message ("Current Python " ${PYTHON_VERSION_STRING} " found " ${PYTHON_EXECUTABLE})
+endif()
+
+
+## Build the regularisers package as a library
+#TODO message("Creating Regularisers as shared library")
+
+message("CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}")
+
+set(CMAKE_BUILD_TYPE "Release")
+
+if(WIN32)
+ set (FLAGS "/DWIN32 /EHsc /openmp /DCCPiCore_EXPORTS")
+ set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRT.lib")
+
+ set (EXTRA_LIBRARIES)
+
+ message("library lib: ${LIBRARY_LIB}")
+
+elseif(UNIX)
+ set (FLAGS "-fopenmp -O2 -funsigned-char -Wall -Wl,--no-undefined -DCCPiReconstructionIterative_EXPORTS -std=c++0x")
+ set (EXTRA_LIBRARIES
+ "gomp"
+ )
+endif()
+
+# GPU regularisers
+if (BUILD_CUDA)
+ find_package(CUDA)
+ if (CUDA_FOUND)
+ message("CUDA FOUND")
+ set (SETUP_GPU_WRAPPERS "extra_libraries += ['cilregcuda']\n\
+setup( \n\
+ name='ccpi', \n\
+ description='CCPi Core Imaging Library - Image regularisers GPU',\n\
+ version=cil_version,\n\
+ cmdclass = {'build_ext': build_ext},\n\
+ ext_modules = [Extension('ccpi.filters.gpu_regularisers',\n\
+ sources=[ \n\
+ os.path.join('.' , 'src', 'gpu_regularisers.pyx' ),\n\
+ ],\n\
+ include_dirs=extra_include_dirs, \n\
+ library_dirs=extra_library_dirs, \n\
+ extra_compile_args=extra_compile_args, \n\
+ libraries=extra_libraries ), \n\
+ ],\n\
+ zip_safe = False, \n\
+ packages = {'ccpi','ccpi.filters'},\n\
+ )")
+ else()
+ message("CUDA NOT FOUND")
+ set(SETUP_GPU_WRAPPERS "#CUDA NOT FOUND")
+ endif()
+endif()
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/setup-regularisers.py.in" "${CMAKE_CURRENT_BINARY_DIR}/setup-regularisers.py")
+
+
+find_package(PythonInterp)
+find_package(PythonLibs)
+if (PYTHONINTERP_FOUND)
+ message(STATUS "Found PYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}")
+ message(STATUS "Python version ${PYTHON_VERSION_STRING}")
+endif()
+if (PYTHONLIBS_FOUND)
+ message(STATUS "Found PYTHON_INCLUDE_DIRS=${PYTHON_INCLUDE_DIRS}")
+ message(STATUS "Found PYTHON_LIBRARIES=${PYTHON_LIBRARIES}")
+endif()
+
+if (PYTHONINTERP_FOUND)
+ message("Python found " ${PYTHON_EXECUTABLE})
+ set(SETUP_PY_IN "${CMAKE_CURRENT_SOURCE_DIR}/setup-regularisers.py.in")
+ set(SETUP_PY "${CMAKE_CURRENT_BINARY_DIR}/setup-regularisers.py")
+ #set(DEPS "${CMAKE_CURRENT_SOURCE_DIR}/module/__init__.py")
+ set (DEPS "${CMAKE_BINARY_DIR}/src/Core/")
+ set(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/build/timestamp")
+
+ configure_file(${SETUP_PY_IN} ${SETUP_PY})
+
+ message("Core binary dir " ${CMAKE_BINARY_DIR}/Core/${CMAKE_BUILD_TYPE})
+
+ if (CONDA_BUILD)
+ add_custom_command(OUTPUT ${OUTPUT}
+ COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src
+ COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/ccpi ${CMAKE_CURRENT_BINARY_DIR}/ccpi
+ COMMAND ${CMAKE_COMMAND} -E env CIL_VERSION=${CIL_VERSION}
+ PREFIX=${CMAKE_SOURCE_DIR}/src/Core
+ LIBRARY_INC=${CMAKE_SOURCE_DIR}/src/Core
+ LIBRARY_LIB=${CMAKE_BINARY_DIR}/src/Core
+ ${PYTHON_EXECUTABLE} ${SETUP_PY} install
+ COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT}
+ DEPENDS cilreg)
+
+ else()
+ if (WIN32)
+ add_custom_command(OUTPUT ${OUTPUT}
+ COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src
+ COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/ccpi ${CMAKE_CURRENT_BINARY_DIR}/ccpi
+ COMMAND ${CMAKE_COMMAND} -E env CIL_VERSION=${CIL_VERSION}
+ PREFIX=${CMAKE_SOURCE_DIR}/src/Core
+ LIBRARY_INC=${CMAKE_SOURCE_DIR}/src/Core
+ LIBRARY_LIB=${CMAKE_BINARY_DIR}/src/Core/${CMAKE_BUILD_TYPE}
+ ${PYTHON_EXECUTABLE} ${SETUP_PY} build_ext --inplace
+ COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT}
+ DEPENDS cilreg)
+ else()
+ add_custom_command(OUTPUT ${OUTPUT}
+ COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_BINARY_DIR}/src
+ COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/ccpi ${CMAKE_CURRENT_BINARY_DIR}/ccpi
+ COMMAND ${CMAKE_COMMAND} -E env CIL_VERSION=${CIL_VERSION}
+ PREFIX=${CMAKE_SOURCE_DIR}/src/Core
+ LIBRARY_INC=${CMAKE_SOURCE_DIR}/src/Core
+ LIBRARY_LIB=${CMAKE_BINARY_DIR}/src/Core
+ ${PYTHON_EXECUTABLE} ${SETUP_PY} build_ext --inplace
+ COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT}
+ DEPENDS cilreg)
+ endif()
+ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ccpi
+ DESTINATION ${PYTHON_DEST})
+ endif()
+
+
+ add_custom_target(PythonWrapper ALL DEPENDS ${OUTPUT})
+
+ #install(CODE "execute_process(COMMAND ${PYTHON} ${SETUP_PY} install)")
+endif()
diff --git a/src/Python/ccpi/__init__.py b/src/Python/ccpi/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/Python/ccpi/__init__.py
diff --git a/src/Python/ccpi/filters/__init__.py b/src/Python/ccpi/filters/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/Python/ccpi/filters/__init__.py
diff --git a/src/Python/ccpi/filters/regularisers.py b/src/Python/ccpi/filters/regularisers.py
new file mode 100644
index 0000000..588ea32
--- /dev/null
+++ b/src/Python/ccpi/filters/regularisers.py
@@ -0,0 +1,214 @@
+"""
+script which assigns a proper device core function based on a flag ('cpu' or 'gpu')
+"""
+
+from ccpi.filters.cpu_regularisers import TV_ROF_CPU, TV_FGP_CPU, TV_SB_CPU, dTV_FGP_CPU, TNV_CPU, NDF_CPU, Diff4th_CPU, TGV_CPU, LLT_ROF_CPU, PATCHSEL_CPU, NLTV_CPU
+try:
+ from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU, TV_SB_GPU, dTV_FGP_GPU, NDF_GPU, Diff4th_GPU, TGV_GPU, LLT_ROF_GPU, PATCHSEL_GPU
+ gpu_enabled = True
+except ImportError:
+ gpu_enabled = False
+from ccpi.filters.cpu_regularisers import NDF_INPAINT_CPU, NVM_INPAINT_CPU
+
+def ROF_TV(inputData, regularisation_parameter, iterations,
+ time_marching_parameter,device='cpu'):
+ if device == 'cpu':
+ return TV_ROF_CPU(inputData,
+ regularisation_parameter,
+ iterations,
+ time_marching_parameter)
+ elif device == 'gpu' and gpu_enabled:
+ return TV_ROF_GPU(inputData,
+ regularisation_parameter,
+ iterations,
+ time_marching_parameter)
+ else:
+ if not gpu_enabled and device == 'gpu':
+ raise ValueError ('GPU is not available')
+ raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+ .format(device))
+
+def FGP_TV(inputData, regularisation_parameter,iterations,
+ tolerance_param, methodTV, nonneg, printM, device='cpu'):
+ if device == 'cpu':
+ return TV_FGP_CPU(inputData,
+ regularisation_parameter,
+ iterations,
+ tolerance_param,
+ methodTV,
+ nonneg,
+ printM)
+ elif device == 'gpu' and gpu_enabled:
+ return TV_FGP_GPU(inputData,
+ regularisation_parameter,
+ iterations,
+ tolerance_param,
+ methodTV,
+ nonneg,
+ printM)
+ else:
+ if not gpu_enabled and device == 'gpu':
+ raise ValueError ('GPU is not available')
+ raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+ .format(device))
+def SB_TV(inputData, regularisation_parameter, iterations,
+ tolerance_param, methodTV, printM, device='cpu'):
+ if device == 'cpu':
+ return TV_SB_CPU(inputData,
+ regularisation_parameter,
+ iterations,
+ tolerance_param,
+ methodTV,
+ printM)
+ elif device == 'gpu' and gpu_enabled:
+ return TV_SB_GPU(inputData,
+ regularisation_parameter,
+ iterations,
+ tolerance_param,
+ methodTV,
+ printM)
+ else:
+ if not gpu_enabled and device == 'gpu':
+ raise ValueError ('GPU is not available')
+ raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+ .format(device))
+def FGP_dTV(inputData, refdata, regularisation_parameter, iterations,
+ tolerance_param, eta_const, methodTV, nonneg, printM, device='cpu'):
+ if device == 'cpu':
+ return dTV_FGP_CPU(inputData,
+ refdata,
+ regularisation_parameter,
+ iterations,
+ tolerance_param,
+ eta_const,
+ methodTV,
+ nonneg,
+ printM)
+ elif device == 'gpu' and gpu_enabled:
+ return dTV_FGP_GPU(inputData,
+ refdata,
+ regularisation_parameter,
+ iterations,
+ tolerance_param,
+ eta_const,
+ methodTV,
+ nonneg,
+ printM)
+ else:
+ if not gpu_enabled and device == 'gpu':
+ raise ValueError ('GPU is not available')
+ raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+ .format(device))
+def TNV(inputData, regularisation_parameter, iterations, tolerance_param):
+ return TNV_CPU(inputData,
+ regularisation_parameter,
+ iterations,
+ tolerance_param)
+def NDF(inputData, regularisation_parameter, edge_parameter, iterations,
+ time_marching_parameter, penalty_type, device='cpu'):
+ if device == 'cpu':
+ return NDF_CPU(inputData,
+ regularisation_parameter,
+ edge_parameter,
+ iterations,
+ time_marching_parameter,
+ penalty_type)
+ elif device == 'gpu' and gpu_enabled:
+ return NDF_GPU(inputData,
+ regularisation_parameter,
+ edge_parameter,
+ iterations,
+ time_marching_parameter,
+ penalty_type)
+ else:
+ if not gpu_enabled and device == 'gpu':
+ raise ValueError ('GPU is not available')
+ raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+ .format(device))
+def Diff4th(inputData, regularisation_parameter, edge_parameter, iterations,
+ time_marching_parameter, device='cpu'):
+ if device == 'cpu':
+ return Diff4th_CPU(inputData,
+ regularisation_parameter,
+ edge_parameter,
+ iterations,
+ time_marching_parameter)
+ elif device == 'gpu' and gpu_enabled:
+ return Diff4th_GPU(inputData,
+ regularisation_parameter,
+ edge_parameter,
+ iterations,
+ time_marching_parameter)
+ else:
+ if not gpu_enabled and device == 'gpu':
+ raise ValueError ('GPU is not available')
+ raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+ .format(device))
+
+def PatchSelect(inputData, searchwindow, patchwindow, neighbours, edge_parameter, device='cpu'):
+ if device == 'cpu':
+ return PATCHSEL_CPU(inputData,
+ searchwindow,
+ patchwindow,
+ neighbours,
+ edge_parameter)
+ elif device == 'gpu' and gpu_enabled:
+ return PATCHSEL_GPU(inputData,
+ searchwindow,
+ patchwindow,
+ neighbours,
+ edge_parameter)
+ else:
+ if not gpu_enabled and device == 'gpu':
+ raise ValueError ('GPU is not available')
+ raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+ .format(device))
+
+def NLTV(inputData, H_i, H_j, H_k, Weights, regularisation_parameter, iterations):
+ return NLTV_CPU(inputData,
+ H_i,
+ H_j,
+ H_k,
+ Weights,
+ regularisation_parameter,
+ iterations)
+
+def TGV(inputData, regularisation_parameter, alpha1, alpha0, iterations,
+ LipshitzConst, device='cpu'):
+ if device == 'cpu':
+ return TGV_CPU(inputData,
+ regularisation_parameter,
+ alpha1,
+ alpha0,
+ iterations,
+ LipshitzConst)
+ elif device == 'gpu' and gpu_enabled:
+ return TGV_GPU(inputData,
+ regularisation_parameter,
+ alpha1,
+ alpha0,
+ iterations,
+ LipshitzConst)
+ else:
+ if not gpu_enabled and device == 'gpu':
+ raise ValueError ('GPU is not available')
+ raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+ .format(device))
+def LLT_ROF(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations,
+ time_marching_parameter, device='cpu'):
+ if device == 'cpu':
+ return LLT_ROF_CPU(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter)
+ elif device == 'gpu' and gpu_enabled:
+ return LLT_ROF_GPU(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter)
+ else:
+ if not gpu_enabled and device == 'gpu':
+ raise ValueError ('GPU is not available')
+ raise ValueError('Unknown device {0}. Expecting gpu or cpu'\
+ .format(device))
+def NDF_INP(inputData, maskData, regularisation_parameter, edge_parameter, iterations,
+ time_marching_parameter, penalty_type):
+ return NDF_INPAINT_CPU(inputData, maskData, regularisation_parameter,
+ edge_parameter, iterations, time_marching_parameter, penalty_type)
+
+def NVM_INP(inputData, maskData, SW_increment, iterations):
+ return NVM_INPAINT_CPU(inputData, maskData, SW_increment, iterations)
diff --git a/src/Python/setup-regularisers.py.in b/src/Python/setup-regularisers.py.in
new file mode 100644
index 0000000..82d9f9f
--- /dev/null
+++ b/src/Python/setup-regularisers.py.in
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+
+import setuptools
+from distutils.core import setup
+from distutils.extension import Extension
+from Cython.Distutils import build_ext
+
+import os
+import sys
+import numpy
+import platform
+
+cil_version=os.environ['CIL_VERSION']
+if cil_version == '':
+ print("Please set the environmental variable CIL_VERSION")
+ sys.exit(1)
+
+library_include_path = ""
+library_lib_path = ""
+try:
+ library_include_path = os.environ['LIBRARY_INC']
+ library_lib_path = os.environ['LIBRARY_LIB']
+except:
+ library_include_path = os.environ['PREFIX']+'/include'
+ pass
+
+extra_include_dirs = [numpy.get_include(), library_include_path]
+#extra_library_dirs = [os.path.join(library_include_path, "..", "lib")]
+extra_compile_args = []
+extra_library_dirs = [library_lib_path]
+extra_compile_args = []
+extra_link_args = []
+extra_libraries = ['cilreg']
+
+print ("extra_library_dirs " , extra_library_dirs)
+
+extra_include_dirs += [os.path.join(".." , "Core"),
+ os.path.join(".." , "Core", "regularisers_CPU"),
+ os.path.join(".." , "Core", "inpainters_CPU"),
+ os.path.join(".." , "Core", "regularisers_GPU" , "TV_FGP" ) ,
+ os.path.join(".." , "Core", "regularisers_GPU" , "TV_ROF" ) ,
+ os.path.join(".." , "Core", "regularisers_GPU" , "TV_SB" ) ,
+ os.path.join(".." , "Core", "regularisers_GPU" , "TGV" ) ,
+ os.path.join(".." , "Core", "regularisers_GPU" , "LLTROF" ) ,
+ os.path.join(".." , "Core", "regularisers_GPU" , "NDF" ) ,
+ os.path.join(".." , "Core", "regularisers_GPU" , "dTV_FGP" ) ,
+ os.path.join(".." , "Core", "regularisers_GPU" , "DIFF4th" ) ,
+ os.path.join(".." , "Core", "regularisers_GPU" , "PatchSelect" ) ,
+ "."]
+
+if platform.system() == 'Windows':
+ extra_compile_args[0:] = ['/DWIN32','/EHsc','/DBOOST_ALL_NO_LIB' , '/openmp' ]
+else:
+ extra_compile_args = ['-fopenmp','-O2', '-funsigned-char', '-Wall', '-std=c++0x']
+ extra_libraries += [@EXTRA_OMP_LIB@]
+
+setup(
+ name='ccpi',
+ description='CCPi Core Imaging Library - Image regularisers',
+ version=cil_version,
+ cmdclass = {'build_ext': build_ext},
+ ext_modules = [Extension("ccpi.filters.cpu_regularisers",
+ sources=[os.path.join("." , "src", "cpu_regularisers.pyx" ) ],
+ include_dirs=extra_include_dirs,
+ library_dirs=extra_library_dirs,
+ extra_compile_args=extra_compile_args,
+ libraries=extra_libraries ),
+
+ ],
+ zip_safe = False,
+ packages = {'ccpi','ccpi.filters', 'ccpi.supp'},
+)
+
+
+@SETUP_GPU_WRAPPERS@
diff --git a/src/Python/src/cpu_regularisers.pyx b/src/Python/src/cpu_regularisers.pyx
new file mode 100644
index 0000000..11a0617
--- /dev/null
+++ b/src/Python/src/cpu_regularisers.pyx
@@ -0,0 +1,685 @@
+# distutils: language=c++
+"""
+Copyright 2018 CCPi
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Author: Edoardo Pasca, Daniil Kazantsev
+"""
+
+import cython
+import numpy as np
+cimport numpy as np
+
+cdef extern float TV_ROF_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
+cdef extern float TV_FGP_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+cdef extern float SB_TV_CPU_main(float *Input, float *Output, float lambdaPar, int iterationsNumb, float epsil, int methodTV, int printM, int dimX, int dimY, int dimZ);
+cdef extern float LLT_ROF_CPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
+cdef extern float TGV_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ);
+cdef extern float Diffusion_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int dimX, int dimY, int dimZ);
+cdef extern float Diffus4th_CPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int dimX, int dimY, int dimZ);
+cdef extern float TNV_CPU_main(float *Input, float *u, float lambdaPar, int maxIter, float tol, int dimX, int dimY, int dimZ);
+cdef extern float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+cdef extern float PatchSelect_CPU_main(float *Input, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h, int switchM);
+cdef extern float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb);
+
+cdef extern float Diffusion_Inpaint_CPU_main(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int dimX, int dimY, int dimZ);
+cdef extern float NonlocalMarching_Inpaint_main(float *Input, unsigned char *M, float *Output, unsigned char *M_upd, int SW_increment, int iterationsNumb, int trigger, int dimX, int dimY, int dimZ);
+cdef extern float TV_energy2D(float *U, float *U0, float *E_val, float lambdaPar, int type, int dimX, int dimY);
+cdef extern float TV_energy3D(float *U, float *U0, float *E_val, float lambdaPar, int type, int dimX, int dimY, int dimZ);
+#****************************************************************#
+#********************** Total-variation ROF *********************#
+#****************************************************************#
+def TV_ROF_CPU(inputData, regularisation_parameter, iterationsNumb, marching_step_parameter):
+ if inputData.ndim == 2:
+ return TV_ROF_2D(inputData, regularisation_parameter, iterationsNumb, marching_step_parameter)
+ elif inputData.ndim == 3:
+ return TV_ROF_3D(inputData, regularisation_parameter, iterationsNumb, marching_step_parameter)
+
+def TV_ROF_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ float regularisation_parameter,
+ int iterationsNumb,
+ float marching_step_parameter):
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ # Run ROF iterations for 2D data
+ TV_ROF_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, iterationsNumb, marching_step_parameter, dims[1], dims[0], 1)
+
+ return outputData
+
+def TV_ROF_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ float regularisation_parameter,
+ int iterationsNumb,
+ float marching_step_parameter):
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+
+ # Run ROF iterations for 3D data
+ TV_ROF_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, iterationsNumb, marching_step_parameter, dims[2], dims[1], dims[0])
+
+ return outputData
+
+#****************************************************************#
+#********************** Total-variation FGP *********************#
+#****************************************************************#
+#******** Total-variation Fast-Gradient-Projection (FGP)*********#
+def TV_FGP_CPU(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, nonneg, printM):
+ if inputData.ndim == 2:
+ return TV_FGP_2D(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, nonneg, printM)
+ elif inputData.ndim == 3:
+ return TV_FGP_3D(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, nonneg, printM)
+
+def TV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ float regularisation_parameter,
+ int iterationsNumb,
+ float tolerance_param,
+ int methodTV,
+ int nonneg,
+ int printM):
+
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ #/* Run FGP-TV iterations for 2D data */
+ TV_FGP_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter,
+ iterationsNumb,
+ tolerance_param,
+ methodTV,
+ nonneg,
+ printM,
+ dims[1],dims[0],1)
+
+ return outputData
+
+def TV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ float regularisation_parameter,
+ int iterationsNumb,
+ float tolerance_param,
+ int methodTV,
+ int nonneg,
+ int printM):
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
+
+ #/* Run FGP-TV iterations for 3D data */
+ TV_FGP_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter,
+ iterationsNumb,
+ tolerance_param,
+ methodTV,
+ nonneg,
+ printM,
+ dims[2], dims[1], dims[0])
+ return outputData
+
+#***************************************************************#
+#********************** Total-variation SB *********************#
+#***************************************************************#
+#*************** Total-variation Split Bregman (SB)*************#
+def TV_SB_CPU(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, printM):
+ if inputData.ndim == 2:
+ return TV_SB_2D(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, printM)
+ elif inputData.ndim == 3:
+ return TV_SB_3D(inputData, regularisation_parameter, iterationsNumb, tolerance_param, methodTV, printM)
+
+def TV_SB_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ float regularisation_parameter,
+ int iterationsNumb,
+ float tolerance_param,
+ int methodTV,
+ int printM):
+
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ #/* Run SB-TV iterations for 2D data */
+ SB_TV_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter,
+ iterationsNumb,
+ tolerance_param,
+ methodTV,
+ printM,
+ dims[1],dims[0],1)
+
+ return outputData
+
+def TV_SB_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ float regularisation_parameter,
+ int iterationsNumb,
+ float tolerance_param,
+ int methodTV,
+ int printM):
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
+
+ #/* Run SB-TV iterations for 3D data */
+ SB_TV_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter,
+ iterationsNumb,
+ tolerance_param,
+ methodTV,
+ printM,
+ dims[2], dims[1], dims[0])
+ return outputData
+
+#***************************************************************#
+#***************** Total Generalised Variation *****************#
+#***************************************************************#
+def TGV_CPU(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst):
+ if inputData.ndim == 2:
+ return TGV_2D(inputData, regularisation_parameter, alpha1, alpha0,
+ iterations, LipshitzConst)
+ elif inputData.ndim == 3:
+ return TGV_3D(inputData, regularisation_parameter, alpha1, alpha0,
+ iterations, LipshitzConst)
+
+def TGV_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ float regularisation_parameter,
+ float alpha1,
+ float alpha0,
+ int iterationsNumb,
+ float LipshitzConst):
+
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ #/* Run TGV iterations for 2D data */
+ TGV_main(&inputData[0,0], &outputData[0,0], regularisation_parameter,
+ alpha1,
+ alpha0,
+ iterationsNumb,
+ LipshitzConst,
+ dims[1],dims[0],1)
+ return outputData
+def TGV_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ float regularisation_parameter,
+ float alpha1,
+ float alpha0,
+ int iterationsNumb,
+ float LipshitzConst):
+
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
+
+ #/* Run TGV iterations for 3D data */
+ TGV_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter,
+ alpha1,
+ alpha0,
+ iterationsNumb,
+ LipshitzConst,
+ dims[2], dims[1], dims[0])
+ return outputData
+
+#***************************************************************#
+#******************* ROF - LLT regularisation ******************#
+#***************************************************************#
+def LLT_ROF_CPU(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter):
+ if inputData.ndim == 2:
+ return LLT_ROF_2D(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter)
+ elif inputData.ndim == 3:
+ return LLT_ROF_3D(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter)
+
+def LLT_ROF_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ float regularisation_parameterROF,
+ float regularisation_parameterLLT,
+ int iterations,
+ float time_marching_parameter):
+
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ #/* Run ROF-LLT iterations for 2D data */
+ LLT_ROF_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[1],dims[0],1)
+ return outputData
+
+def LLT_ROF_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ float regularisation_parameterROF,
+ float regularisation_parameterLLT,
+ int iterations,
+ float time_marching_parameter):
+
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
+
+ #/* Run ROF-LLT iterations for 3D data */
+ LLT_ROF_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[2], dims[1], dims[0])
+ return outputData
+
+#****************************************************************#
+#**************Directional Total-variation FGP ******************#
+#****************************************************************#
+#******** Directional TV Fast-Gradient-Projection (FGP)*********#
+def dTV_FGP_CPU(inputData, refdata, regularisation_parameter, iterationsNumb, tolerance_param, eta_const, methodTV, nonneg, printM):
+ if inputData.ndim == 2:
+ return dTV_FGP_2D(inputData, refdata, regularisation_parameter, iterationsNumb, tolerance_param, eta_const, methodTV, nonneg, printM)
+ elif inputData.ndim == 3:
+ return dTV_FGP_3D(inputData, refdata, regularisation_parameter, iterationsNumb, tolerance_param, eta_const, methodTV, nonneg, printM)
+
+def dTV_FGP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ np.ndarray[np.float32_t, ndim=2, mode="c"] refdata,
+ float regularisation_parameter,
+ int iterationsNumb,
+ float tolerance_param,
+ float eta_const,
+ int methodTV,
+ int nonneg,
+ int printM):
+
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ #/* Run FGP-dTV iterations for 2D data */
+ dTV_FGP_CPU_main(&inputData[0,0], &refdata[0,0], &outputData[0,0], regularisation_parameter,
+ iterationsNumb,
+ tolerance_param,
+ eta_const,
+ methodTV,
+ nonneg,
+ printM,
+ dims[1], dims[0], 1)
+
+ return outputData
+
+def dTV_FGP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ np.ndarray[np.float32_t, ndim=3, mode="c"] refdata,
+ float regularisation_parameter,
+ int iterationsNumb,
+ float tolerance_param,
+ float eta_const,
+ int methodTV,
+ int nonneg,
+ int printM):
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0], dims[1], dims[2]], dtype='float32')
+
+ #/* Run FGP-dTV iterations for 3D data */
+ dTV_FGP_CPU_main(&inputData[0,0,0], &refdata[0,0,0], &outputData[0,0,0], regularisation_parameter,
+ iterationsNumb,
+ tolerance_param,
+ eta_const,
+ methodTV,
+ nonneg,
+ printM,
+ dims[2], dims[1], dims[0])
+ return outputData
+
+#****************************************************************#
+#*********************Total Nuclear Variation********************#
+#****************************************************************#
+def TNV_CPU(inputData, regularisation_parameter, iterationsNumb, tolerance_param):
+ if inputData.ndim == 2:
+ return
+ elif inputData.ndim == 3:
+ return TNV_3D(inputData, regularisation_parameter, iterationsNumb, tolerance_param)
+
+def TNV_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ float regularisation_parameter,
+ int iterationsNumb,
+ float tolerance_param):
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+
+ # Run TNV iterations for 3D (X,Y,Channels) data
+ TNV_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, iterationsNumb, tolerance_param, dims[2], dims[1], dims[0])
+ return outputData
+#****************************************************************#
+#***************Nonlinear (Isotropic) Diffusion******************#
+#****************************************************************#
+def NDF_CPU(inputData, regularisation_parameter, edge_parameter, iterationsNumb,time_marching_parameter, penalty_type):
+ if inputData.ndim == 2:
+ return NDF_2D(inputData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type)
+ elif inputData.ndim == 3:
+ return NDF_3D(inputData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type)
+
+def NDF_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ float regularisation_parameter,
+ float edge_parameter,
+ int iterationsNumb,
+ float time_marching_parameter,
+ int penalty_type):
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ # Run Nonlinear Diffusion iterations for 2D data
+ Diffusion_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[1], dims[0], 1)
+ return outputData
+
+def NDF_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ float regularisation_parameter,
+ float edge_parameter,
+ int iterationsNumb,
+ float time_marching_parameter,
+ int penalty_type):
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+
+ # Run Nonlinear Diffusion iterations for 3D data
+ Diffusion_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[2], dims[1], dims[0])
+
+ return outputData
+
+#****************************************************************#
+#*************Anisotropic Fourth-Order diffusion*****************#
+#****************************************************************#
+def Diff4th_CPU(inputData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter):
+ if inputData.ndim == 2:
+ return Diff4th_2D(inputData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter)
+ elif inputData.ndim == 3:
+ return Diff4th_3D(inputData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter)
+
+def Diff4th_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ float regularisation_parameter,
+ float edge_parameter,
+ int iterationsNumb,
+ float time_marching_parameter):
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ # Run Anisotropic Fourth-Order diffusion for 2D data
+ Diffus4th_CPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[1], dims[0], 1)
+ return outputData
+
+def Diff4th_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ float regularisation_parameter,
+ float edge_parameter,
+ int iterationsNumb,
+ float time_marching_parameter):
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+
+ # Run Anisotropic Fourth-Order diffusion for 3D data
+ Diffus4th_CPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[2], dims[1], dims[0])
+
+ return outputData
+
+#****************************************************************#
+#***************Patch-based weights calculation******************#
+#****************************************************************#
+def PATCHSEL_CPU(inputData, searchwindow, patchwindow, neighbours, edge_parameter):
+ if inputData.ndim == 2:
+ return PatchSel_2D(inputData, searchwindow, patchwindow, neighbours, edge_parameter)
+ elif inputData.ndim == 3:
+ return 1
+def PatchSel_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ int searchwindow,
+ int patchwindow,
+ int neighbours,
+ float edge_parameter):
+ cdef long dims[3]
+ dims[0] = neighbours
+ dims[1] = inputData.shape[0]
+ dims[2] = inputData.shape[1]
+
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] Weights = \
+ np.zeros([dims[0], dims[1],dims[2]], dtype='float32')
+
+ cdef np.ndarray[np.uint16_t, ndim=3, mode="c"] H_i = \
+ np.zeros([dims[0], dims[1],dims[2]], dtype='uint16')
+
+ cdef np.ndarray[np.uint16_t, ndim=3, mode="c"] H_j = \
+ np.zeros([dims[0], dims[1],dims[2]], dtype='uint16')
+
+ # Run patch-based weight selection function
+ PatchSelect_CPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], 0, searchwindow, patchwindow, neighbours, edge_parameter, 1)
+ return H_i, H_j, Weights
+"""
+def PatchSel_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ int searchwindow,
+ int patchwindow,
+ int neighbours,
+ float edge_parameter):
+ cdef long dims[4]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+ dims[3] = neighbours
+
+ cdef np.ndarray[np.float32_t, ndim=4, mode="c"] Weights = \
+ np.zeros([dims[3],dims[0],dims[1],dims[2]], dtype='float32')
+
+ cdef np.ndarray[np.uint16_t, ndim=4, mode="c"] H_i = \
+ np.zeros([dims[3],dims[0],dims[1],dims[2]], dtype='uint16')
+
+ cdef np.ndarray[np.uint16_t, ndim=4, mode="c"] H_j = \
+ np.zeros([dims[3],dims[0],dims[1],dims[2]], dtype='uint16')
+
+ cdef np.ndarray[np.uint16_t, ndim=4, mode="c"] H_k = \
+ np.zeros([dims[3],dims[0],dims[1],dims[2]], dtype='uint16')
+
+ # Run patch-based weight selection function
+ PatchSelect_CPU_main(&inputData[0,0,0], &H_i[0,0,0,0], &H_j[0,0,0,0], &H_k[0,0,0,0], &Weights[0,0,0,0], dims[2], dims[1], dims[0], searchwindow, patchwindow, neighbours, edge_parameter, 1)
+ return H_i, H_j, H_k, Weights
+"""
+
+#****************************************************************#
+#***************Non-local Total Variation******************#
+#****************************************************************#
+def NLTV_CPU(inputData, H_i, H_j, H_k, Weights, regularisation_parameter, iterations):
+ if inputData.ndim == 2:
+ return NLTV_2D(inputData, H_i, H_j, Weights, regularisation_parameter, iterations)
+ elif inputData.ndim == 3:
+ return 1
+def NLTV_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ np.ndarray[np.uint16_t, ndim=3, mode="c"] H_i,
+ np.ndarray[np.uint16_t, ndim=3, mode="c"] H_j,
+ np.ndarray[np.float32_t, ndim=3, mode="c"] Weights,
+ float regularisation_parameter,
+ int iterations):
+
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ neighbours = H_i.shape[0]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ # Run nonlocal TV regularisation
+ Nonlocal_TV_CPU_main(&inputData[0,0], &outputData[0,0], &H_i[0,0,0], &H_j[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[1], dims[0], 0, neighbours, regularisation_parameter, iterations)
+ return outputData
+
+#*********************Inpainting WITH****************************#
+#***************Nonlinear (Isotropic) Diffusion******************#
+#****************************************************************#
+def NDF_INPAINT_CPU(inputData, maskData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type):
+ if inputData.ndim == 2:
+ return NDF_INP_2D(inputData, maskData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type)
+ elif inputData.ndim == 3:
+ return NDF_INP_3D(inputData, maskData, regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type)
+
+def NDF_INP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ np.ndarray[np.uint8_t, ndim=2, mode="c"] maskData,
+ float regularisation_parameter,
+ float edge_parameter,
+ int iterationsNumb,
+ float time_marching_parameter,
+ int penalty_type):
+
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ # Run Inpaiting by Diffusion iterations for 2D data
+ Diffusion_Inpaint_CPU_main(&inputData[0,0], &maskData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[1], dims[0], 1)
+ return outputData
+
+def NDF_INP_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ np.ndarray[np.uint8_t, ndim=3, mode="c"] maskData,
+ float regularisation_parameter,
+ float edge_parameter,
+ int iterationsNumb,
+ float time_marching_parameter,
+ int penalty_type):
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+
+ # Run Inpaiting by Diffusion iterations for 3D data
+ Diffusion_Inpaint_CPU_main(&inputData[0,0,0], &maskData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[2], dims[1], dims[0])
+
+ return outputData
+#*********************Inpainting WITH****************************#
+#***************Nonlocal Vertical Marching method****************#
+#****************************************************************#
+def NVM_INPAINT_CPU(inputData, maskData, SW_increment, iterationsNumb):
+ if inputData.ndim == 2:
+ return NVM_INP_2D(inputData, maskData, SW_increment, iterationsNumb)
+ elif inputData.ndim == 3:
+ return
+
+def NVM_INP_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ np.ndarray[np.uint8_t, ndim=2, mode="c"] maskData,
+ int SW_increment,
+ int iterationsNumb):
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ cdef np.ndarray[np.uint8_t, ndim=2, mode="c"] maskData_upd = \
+ np.zeros([dims[0],dims[1]], dtype='uint8')
+
+ # Run Inpaiting by Nonlocal vertical marching method for 2D data
+ NonlocalMarching_Inpaint_main(&inputData[0,0], &maskData[0,0], &outputData[0,0],
+ &maskData_upd[0,0],
+ SW_increment, iterationsNumb, 1, dims[1], dims[0], 1)
+
+ return (outputData, maskData_upd)
+
+
+#****************************************************************#
+#***************Calculation of TV-energy functional**************#
+#****************************************************************#
+def TV_ENERGY(inputData, inputData0, regularisation_parameter, typeFunctional):
+ if inputData.ndim == 2:
+ return TV_ENERGY_2D(inputData, inputData0, regularisation_parameter, typeFunctional)
+ elif inputData.ndim == 3:
+ return TV_ENERGY_3D(inputData, inputData0, regularisation_parameter, typeFunctional)
+
+def TV_ENERGY_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ np.ndarray[np.float32_t, ndim=2, mode="c"] inputData0,
+ float regularisation_parameter,
+ int typeFunctional):
+
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=1, mode="c"] outputData = \
+ np.zeros([1], dtype='float32')
+
+ # run function
+ TV_energy2D(&inputData[0,0], &inputData0[0,0], &outputData[0], regularisation_parameter, typeFunctional, dims[1], dims[0])
+
+ return outputData
+
+def TV_ENERGY_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ np.ndarray[np.float32_t, ndim=3, mode="c"] inputData0,
+ float regularisation_parameter,
+ int typeFunctional):
+
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=1, mode="c"] outputData = \
+ np.zeros([1], dtype='float32')
+
+ # Run function
+ TV_energy3D(&inputData[0,0,0], &inputData0[0,0,0], &outputData[0], regularisation_parameter, typeFunctional, dims[2], dims[1], dims[0])
+
+ return outputData
diff --git a/src/Python/src/gpu_regularisers.pyx b/src/Python/src/gpu_regularisers.pyx
new file mode 100644
index 0000000..b52f669
--- /dev/null
+++ b/src/Python/src/gpu_regularisers.pyx
@@ -0,0 +1,640 @@
+# distutils: language=c++
+"""
+Copyright 2018 CCPi
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Author: Edoardo Pasca, Daniil Kazantsev
+"""
+
+import cython
+import numpy as np
+cimport numpy as np
+
+CUDAErrorMessage = 'CUDA error'
+
+cdef extern int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
+cdef extern int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z);
+cdef extern int TV_SB_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int printM, int N, int M, int Z);
+cdef extern int TGV_GPU_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ);
+cdef extern int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z);
+cdef extern int NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z);
+cdef extern int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z);
+cdef extern int Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z);
+cdef extern int PatchSelect_GPU_main(float *Input, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h);
+
+# Total-variation Rudin-Osher-Fatemi (ROF)
+def TV_ROF_GPU(inputData,
+ regularisation_parameter,
+ iterations,
+ time_marching_parameter):
+ if inputData.ndim == 2:
+ return ROFTV2D(inputData,
+ regularisation_parameter,
+ iterations,
+ time_marching_parameter)
+ elif inputData.ndim == 3:
+ return ROFTV3D(inputData,
+ regularisation_parameter,
+ iterations,
+ time_marching_parameter)
+
+# Total-variation Fast-Gradient-Projection (FGP)
+def TV_FGP_GPU(inputData,
+ regularisation_parameter,
+ iterations,
+ tolerance_param,
+ methodTV,
+ nonneg,
+ printM):
+ if inputData.ndim == 2:
+ return FGPTV2D(inputData,
+ regularisation_parameter,
+ iterations,
+ tolerance_param,
+ methodTV,
+ nonneg,
+ printM)
+ elif inputData.ndim == 3:
+ return FGPTV3D(inputData,
+ regularisation_parameter,
+ iterations,
+ tolerance_param,
+ methodTV,
+ nonneg,
+ printM)
+# Total-variation Split Bregman (SB)
+def TV_SB_GPU(inputData,
+ regularisation_parameter,
+ iterations,
+ tolerance_param,
+ methodTV,
+ printM):
+ if inputData.ndim == 2:
+ return SBTV2D(inputData,
+ regularisation_parameter,
+ iterations,
+ tolerance_param,
+ methodTV,
+ printM)
+ elif inputData.ndim == 3:
+ return SBTV3D(inputData,
+ regularisation_parameter,
+ iterations,
+ tolerance_param,
+ methodTV,
+ printM)
+# LLT-ROF model
+def LLT_ROF_GPU(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter):
+ if inputData.ndim == 2:
+ return LLT_ROF_GPU2D(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter)
+ elif inputData.ndim == 3:
+ return LLT_ROF_GPU3D(inputData, regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter)
+# Total Generilised Variation (TGV)
+def TGV_GPU(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst):
+ if inputData.ndim == 2:
+ return TGV2D(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst)
+ elif inputData.ndim == 3:
+ return TGV3D(inputData, regularisation_parameter, alpha1, alpha0, iterations, LipshitzConst)
+# Directional Total-variation Fast-Gradient-Projection (FGP)
+def dTV_FGP_GPU(inputData,
+ refdata,
+ regularisation_parameter,
+ iterations,
+ tolerance_param,
+ eta_const,
+ methodTV,
+ nonneg,
+ printM):
+ if inputData.ndim == 2:
+ return FGPdTV2D(inputData,
+ refdata,
+ regularisation_parameter,
+ iterations,
+ tolerance_param,
+ eta_const,
+ methodTV,
+ nonneg,
+ printM)
+ elif inputData.ndim == 3:
+ return FGPdTV3D(inputData,
+ refdata,
+ regularisation_parameter,
+ iterations,
+ tolerance_param,
+ eta_const,
+ methodTV,
+ nonneg,
+ printM)
+# Nonlocal Isotropic Diffusion (NDF)
+def NDF_GPU(inputData,
+ regularisation_parameter,
+ edge_parameter,
+ iterations,
+ time_marching_parameter,
+ penalty_type):
+ if inputData.ndim == 2:
+ return NDF_GPU_2D(inputData,
+ regularisation_parameter,
+ edge_parameter,
+ iterations,
+ time_marching_parameter,
+ penalty_type)
+ elif inputData.ndim == 3:
+ return NDF_GPU_3D(inputData,
+ regularisation_parameter,
+ edge_parameter,
+ iterations,
+ time_marching_parameter,
+ penalty_type)
+# Anisotropic Fourth-Order diffusion
+def Diff4th_GPU(inputData,
+ regularisation_parameter,
+ edge_parameter,
+ iterations,
+ time_marching_parameter):
+ if inputData.ndim == 2:
+ return Diff4th_2D(inputData,
+ regularisation_parameter,
+ edge_parameter,
+ iterations,
+ time_marching_parameter)
+ elif inputData.ndim == 3:
+ return Diff4th_3D(inputData,
+ regularisation_parameter,
+ edge_parameter,
+ iterations,
+ time_marching_parameter)
+
+#****************************************************************#
+#********************** Total-variation ROF *********************#
+#****************************************************************#
+def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ float regularisation_parameter,
+ int iterations,
+ float time_marching_parameter):
+
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ # Running CUDA code here
+ if (TV_ROF_GPU_main(
+ &inputData[0,0], &outputData[0,0],
+ regularisation_parameter,
+ iterations ,
+ time_marching_parameter,
+ dims[1], dims[0], 1)==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
+def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ float regularisation_parameter,
+ int iterations,
+ float time_marching_parameter):
+
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+
+ # Running CUDA code here
+ if (TV_ROF_GPU_main(
+ &inputData[0,0,0], &outputData[0,0,0],
+ regularisation_parameter,
+ iterations ,
+ time_marching_parameter,
+ dims[2], dims[1], dims[0])==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+#****************************************************************#
+#********************** Total-variation FGP *********************#
+#****************************************************************#
+#******** Total-variation Fast-Gradient-Projection (FGP)*********#
+def FGPTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ float regularisation_parameter,
+ int iterations,
+ float tolerance_param,
+ int methodTV,
+ int nonneg,
+ int printM):
+
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ # Running CUDA code here
+ if (TV_FGP_GPU_main(&inputData[0,0], &outputData[0,0],
+ regularisation_parameter,
+ iterations,
+ tolerance_param,
+ methodTV,
+ nonneg,
+ printM,
+ dims[1], dims[0], 1)==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
+
+def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ float regularisation_parameter,
+ int iterations,
+ float tolerance_param,
+ int methodTV,
+ int nonneg,
+ int printM):
+
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+
+ # Running CUDA code here
+ if (TV_FGP_GPU_main(&inputData[0,0,0], &outputData[0,0,0],
+ regularisation_parameter ,
+ iterations,
+ tolerance_param,
+ methodTV,
+ nonneg,
+ printM,
+ dims[2], dims[1], dims[0])==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
+#***************************************************************#
+#********************** Total-variation SB *********************#
+#***************************************************************#
+#*************** Total-variation Split Bregman (SB)*************#
+def SBTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ float regularisation_parameter,
+ int iterations,
+ float tolerance_param,
+ int methodTV,
+ int printM):
+
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ # Running CUDA code here
+ if (TV_SB_GPU_main(&inputData[0,0], &outputData[0,0],
+ regularisation_parameter,
+ iterations,
+ tolerance_param,
+ methodTV,
+ printM,
+ dims[1], dims[0], 1)==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
+
+def SBTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ float regularisation_parameter,
+ int iterations,
+ float tolerance_param,
+ int methodTV,
+ int printM):
+
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+
+ # Running CUDA code here
+ if (TV_SB_GPU_main(&inputData[0,0,0], &outputData[0,0,0],
+ regularisation_parameter ,
+ iterations,
+ tolerance_param,
+ methodTV,
+ printM,
+ dims[2], dims[1], dims[0])==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
+
+#***************************************************************#
+#************************ LLT-ROF model ************************#
+#***************************************************************#
+#************Joint LLT-ROF model for higher order **************#
+def LLT_ROF_GPU2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ float regularisation_parameterROF,
+ float regularisation_parameterLLT,
+ int iterations,
+ float time_marching_parameter):
+
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ # Running CUDA code here
+ if (LLT_ROF_GPU_main(&inputData[0,0], &outputData[0,0],regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[1],dims[0],1)==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
+
+def LLT_ROF_GPU3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ float regularisation_parameterROF,
+ float regularisation_parameterLLT,
+ int iterations,
+ float time_marching_parameter):
+
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+
+ # Running CUDA code here
+ if (LLT_ROF_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameterROF, regularisation_parameterLLT, iterations, time_marching_parameter, dims[2], dims[1], dims[0])==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
+
+#***************************************************************#
+#***************** Total Generalised Variation *****************#
+#***************************************************************#
+def TGV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ float regularisation_parameter,
+ float alpha1,
+ float alpha0,
+ int iterationsNumb,
+ float LipshitzConst):
+
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ #/* Run TGV iterations for 2D data */
+ if (TGV_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter,
+ alpha1,
+ alpha0,
+ iterationsNumb,
+ LipshitzConst,
+ dims[1],dims[0], 1)==0):
+ return outputData
+ else:
+ raise ValueError(CUDAErrorMessage);
+
+def TGV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ float regularisation_parameter,
+ float alpha1,
+ float alpha0,
+ int iterationsNumb,
+ float LipshitzConst):
+
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+
+ # Running CUDA code here
+ if (TGV_GPU_main(
+ &inputData[0,0,0], &outputData[0,0,0], regularisation_parameter,
+ alpha1,
+ alpha0,
+ iterationsNumb,
+ LipshitzConst,
+ dims[2], dims[1], dims[0])==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
+
+#****************************************************************#
+#**************Directional Total-variation FGP ******************#
+#****************************************************************#
+#******** Directional TV Fast-Gradient-Projection (FGP)*********#
+def FGPdTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ np.ndarray[np.float32_t, ndim=2, mode="c"] refdata,
+ float regularisation_parameter,
+ int iterations,
+ float tolerance_param,
+ float eta_const,
+ int methodTV,
+ int nonneg,
+ int printM):
+
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ # Running CUDA code here
+ if (dTV_FGP_GPU_main(&inputData[0,0], &refdata[0,0], &outputData[0,0],
+ regularisation_parameter,
+ iterations,
+ tolerance_param,
+ eta_const,
+ methodTV,
+ nonneg,
+ printM,
+ dims[1], dims[0], 1)==0):
+ return outputData
+ else:
+ raise ValueError(CUDAErrorMessage);
+
+
+def FGPdTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ np.ndarray[np.float32_t, ndim=3, mode="c"] refdata,
+ float regularisation_parameter,
+ int iterations,
+ float tolerance_param,
+ float eta_const,
+ int methodTV,
+ int nonneg,
+ int printM):
+
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+
+ # Running CUDA code here
+ if (dTV_FGP_GPU_main(&inputData[0,0,0], &refdata[0,0,0], &outputData[0,0,0],
+ regularisation_parameter ,
+ iterations,
+ tolerance_param,
+ eta_const,
+ methodTV,
+ nonneg,
+ printM,
+ dims[2], dims[1], dims[0])==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
+
+#****************************************************************#
+#***************Nonlinear (Isotropic) Diffusion******************#
+#****************************************************************#
+def NDF_GPU_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ float regularisation_parameter,
+ float edge_parameter,
+ int iterationsNumb,
+ float time_marching_parameter,
+ int penalty_type):
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ #rangecheck = penalty_type < 1 and penalty_type > 3
+ #if not rangecheck:
+# raise ValueError('Choose penalty type as 1 for Huber, 2 - Perona-Malik, 3 - Tukey Biweight')
+
+ # Run Nonlinear Diffusion iterations for 2D data
+ # Running CUDA code here
+ if (NonlDiff_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[1], dims[0], 1)==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
+
+def NDF_GPU_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ float regularisation_parameter,
+ float edge_parameter,
+ int iterationsNumb,
+ float time_marching_parameter,
+ int penalty_type):
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+
+ # Run Nonlinear Diffusion iterations for 3D data
+ # Running CUDA code here
+ if (NonlDiff_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, penalty_type, dims[2], dims[1], dims[0])==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
+#****************************************************************#
+#************Anisotropic Fourth-Order diffusion******************#
+#****************************************************************#
+def Diff4th_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ float regularisation_parameter,
+ float edge_parameter,
+ int iterationsNumb,
+ float time_marching_parameter):
+ cdef long dims[2]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1]], dtype='float32')
+
+ # Run Anisotropic Fourth-Order diffusion for 2D data
+ # Running CUDA code here
+ if (Diffus4th_GPU_main(&inputData[0,0], &outputData[0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[1], dims[0], 1)==0):
+ return outputData
+ else:
+ raise ValueError(CUDAErrorMessage);
+
+
+def Diff4th_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
+ float regularisation_parameter,
+ float edge_parameter,
+ int iterationsNumb,
+ float time_marching_parameter):
+ cdef long dims[3]
+ dims[0] = inputData.shape[0]
+ dims[1] = inputData.shape[1]
+ dims[2] = inputData.shape[2]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
+ np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+
+ # Run Anisotropic Fourth-Order diffusion for 3D data
+ # Running CUDA code here
+ if (Diffus4th_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[2], dims[1], dims[0])==0):
+ return outputData;
+ else:
+ raise ValueError(CUDAErrorMessage);
+
+#****************************************************************#
+#************Patch-based weights pre-selection******************#
+#****************************************************************#
+def PATCHSEL_GPU(inputData, searchwindow, patchwindow, neighbours, edge_parameter):
+ if inputData.ndim == 2:
+ return PatchSel_2D(inputData, searchwindow, patchwindow, neighbours, edge_parameter)
+ elif inputData.ndim == 3:
+ return 1
+def PatchSel_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
+ int searchwindow,
+ int patchwindow,
+ int neighbours,
+ float edge_parameter):
+ cdef long dims[3]
+ dims[0] = neighbours
+ dims[1] = inputData.shape[0]
+ dims[2] = inputData.shape[1]
+
+ cdef np.ndarray[np.float32_t, ndim=3, mode="c"] Weights = \
+ np.zeros([dims[0], dims[1],dims[2]], dtype='float32')
+
+ cdef np.ndarray[np.uint16_t, ndim=3, mode="c"] H_i = \
+ np.zeros([dims[0], dims[1],dims[2]], dtype='uint16')
+
+ cdef np.ndarray[np.uint16_t, ndim=3, mode="c"] H_j = \
+ np.zeros([dims[0], dims[1],dims[2]], dtype='uint16')
+
+ # Run patch-based weight selection function
+ if (PatchSelect_GPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], searchwindow, patchwindow, neighbours, edge_parameter)==0):
+ return H_i, H_j, Weights;
+ else:
+ raise ValueError(CUDAErrorMessage);
+