From 3d3a0958fad475c6b0493ad85459e1c04ba4ba62 Mon Sep 17 00:00:00 2001
From: Edoardo Pasca <edo.paskino@gmail.com>
Date: Fri, 6 Dec 2019 17:37:35 +0000
Subject: C lib (#458)

C library implemented with optimised axpy fucntions and gradient operator in c
---
 CMakeLists.txt                                     |   7 +
 Wrappers/Python/CMakeLists.txt                     |  88 +++
 Wrappers/Python/ccpi/framework/framework.py        |  84 ++-
 .../optimisation/operators/GradientOperator.py     | 257 +++++++-
 Wrappers/Python/conda-recipe/bld.bat               |  10 -
 Wrappers/Python/conda-recipe/build.sh              |   9 -
 .../Python/conda-recipe/conda_build_config.yaml    |  10 -
 Wrappers/Python/conda-recipe/meta.yaml             |  43 --
 Wrappers/Python/recipe/bld.bat                     |  10 +
 Wrappers/Python/recipe/build.sh                    |   9 +
 Wrappers/Python/recipe/conda_build_config.yaml     |  10 +
 Wrappers/Python/recipe/meta.yaml                   |  43 ++
 Wrappers/Python/setup-cil.py.in                    |  70 +++
 Wrappers/Python/test/test_DataContainer.py         |  13 +
 Wrappers/Python/test/test_Gradient.py              | 146 ++++-
 Wrappers/Python/test/test_Operator.py              |   3 +-
 build/jenkins-build.sh                             |   3 -
 recipe/bld.bat                                     |  15 +
 recipe/build.sh                                    |  24 +
 recipe/conda_build_config.yaml                     |  10 +
 recipe/meta.yaml                                   |  45 ++
 src/Core/CMakeLists.txt                            | 124 ++++
 src/Core/FiniteDifferenceLibrary.c                 | 692 +++++++++++++++++++++
 src/Core/axpby.c                                   | 109 ++++
 src/Core/include/FiniteDifferenceLibrary.h         |   6 +
 src/Core/include/axpby.h                           |  17 +
 src/Core/include/dll_export.h                      |  20 +
 27 files changed, 1767 insertions(+), 110 deletions(-)
 create mode 100755 CMakeLists.txt
 create mode 100644 Wrappers/Python/CMakeLists.txt
 delete mode 100644 Wrappers/Python/conda-recipe/bld.bat
 delete mode 100644 Wrappers/Python/conda-recipe/build.sh
 delete mode 100644 Wrappers/Python/conda-recipe/conda_build_config.yaml
 delete mode 100644 Wrappers/Python/conda-recipe/meta.yaml
 create mode 100644 Wrappers/Python/recipe/bld.bat
 create mode 100644 Wrappers/Python/recipe/build.sh
 create mode 100644 Wrappers/Python/recipe/conda_build_config.yaml
 create mode 100644 Wrappers/Python/recipe/meta.yaml
 create mode 100644 Wrappers/Python/setup-cil.py.in
 delete mode 100644 build/jenkins-build.sh
 create mode 100644 recipe/bld.bat
 create mode 100644 recipe/build.sh
 create mode 100644 recipe/conda_build_config.yaml
 create mode 100644 recipe/meta.yaml
 create mode 100644 src/Core/CMakeLists.txt
 create mode 100644 src/Core/FiniteDifferenceLibrary.c
 create mode 100755 src/Core/axpby.c
 create mode 100644 src/Core/include/FiniteDifferenceLibrary.h
 create mode 100644 src/Core/include/axpby.h
 create mode 100755 src/Core/include/dll_export.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100755
index 0000000..2df50b5
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,7 @@
+cmake_minimum_required(VERSION 3.4)
+
+project (cil LANGUAGES C CXX)
+
+set(CIL_VERSION $ENV{CIL_VERSION})
+add_subdirectory(src/Core)
+add_subdirectory(Wrappers/Python)
diff --git a/Wrappers/Python/CMakeLists.txt b/Wrappers/Python/CMakeLists.txt
new file mode 100644
index 0000000..f325265
--- /dev/null
+++ b/Wrappers/Python/CMakeLists.txt
@@ -0,0 +1,88 @@
+option (BUILD_PYTHON_WRAPPER "Build Python Wrapper" ON)
+
+if (BUILD_PYTHON_WRAPPER)
+  find_package(PythonInterp REQUIRED)
+  
+  set(PYTHON_DEST_DIR "" CACHE PATH "Directory of the Python wrappers")
+  if (PYTHON_DEST_DIR)
+  set(PYTHON_DEST "${PYTHON_DEST_DIR}")
+  else() 
+  set(PYTHON_DEST "${CMAKE_INSTALL_PREFIX}/python")
+  endif()
+  message(STATUS "Python wrappers will be installed in " ${PYTHON_DEST})
+
+  message("CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}")
+
+  set(CMAKE_BUILD_TYPE "Release")
+  
+  find_package(PythonLibs)
+  if (PYTHONINTERP_FOUND)
+    message(STATUS "Found PYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}")
+    message(STATUS "Python version ${PYTHON_VERSION_STRING}")
+  endif()
+  if (PYTHONLIBS_FOUND)
+    message(STATUS "Found PYTHON_INCLUDE_DIRS=${PYTHON_INCLUDE_DIRS}")
+    message(STATUS "Found PYTHON_LIBRARIES=${PYTHON_LIBRARIES}")
+  endif()
+
+  if (PYTHONINTERP_FOUND)
+      message("Python found " ${PYTHON_EXECUTABLE})
+      set(SETUP_PY_IN "${CMAKE_CURRENT_SOURCE_DIR}/setup-cil.py.in")
+      set(SETUP_PY    "${CMAKE_CURRENT_BINARY_DIR}/setup.py")
+      #set(DEPS        "${CMAKE_CURRENT_SOURCE_DIR}/module/__init__.py")
+      set (DEPS       "${CMAKE_BINARY_DIR}/")
+      set(OUTPUT      "${CMAKE_CURRENT_BINARY_DIR}/build/timestamp")
+
+      #configure_file(${SETUP_PY_IN} ${SETUP_PY})
+
+      message("Core binary dir " ${CMAKE_BINARY_DIR}/Core/${CMAKE_BUILD_TYPE})
+      
+      if (CONDA_BUILD)
+        add_custom_command(OUTPUT ${OUTPUT}
+                        #COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}
+                        COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}
+                        COMMAND ${CMAKE_COMMAND} -E env CIL_VERSION=${CIL_VERSION}
+                                                        #PREFIX=${CMAKE_SOURCE_DIR}/src/Core
+                                                        #LIBRARY_INC=${CMAKE_SOURCE_DIR}/src/Core
+                                                        #LIBRARY_LIB=${CMAKE_BINARY_DIR}/src/Core
+                                                        ${PYTHON_EXECUTABLE} ${SETUP_PY} -vv install
+                                                        #echo "EDO"
+                        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+                        COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT}
+                        DEPENDS cilacc)
+
+      else()
+        if (WIN32)
+          add_custom_command(OUTPUT ${OUTPUT}
+                        COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}
+                        COMMAND ${CMAKE_COMMAND} -E env CIL_VERSION=${CIL_VERSION}
+                                                        PREFIX=${CMAKE_SOURCE_DIR}/src/
+                                                        LIBRARY_INC=${CMAKE_SOURCE_DIR}/src/include
+                                                        LIBRARY_LIB=${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}
+                                                        ${PYTHON_EXECUTABLE} ${SETUP_PY} build_ext --inplace
+                        COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT}
+                        DEPENDS cilacc)
+        else()
+          add_custom_command(OUTPUT ${OUTPUT}
+                        COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}
+                        COMMAND ${CMAKE_COMMAND} -E env PREFIX=${CMAKE_SOURCE_DIR}/src/
+                                                        LIBRARY_INC=${CMAKE_SOURCE_DIR}/src/include
+                                                        LIBRARY_LIB=${CMAKE_BINARY_DIR}/
+                                                        ${PYTHON_EXECUTABLE} ${SETUP_PY} build_ext --inplace
+                        COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT}
+                        DEPENDS cilacc
+                        )
+        endif()
+        #set (PYTHON_DEST ${CMAKE_INSTALL_PREFIX}/python/)
+        install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/
+                DESTINATION ${PYTHON_DEST})
+        #file(TOUCH ${PYTHON_DEST}/edo/__init__.py)
+        
+      endif()
+      
+      
+      add_custom_target(PythonWrapper ALL DEPENDS ${OUTPUT})
+      #install(CODE "execute_process(COMMAND ${PYTHON} ${SETUP_PY} install)")
+  endif()
+
+endif()
\ No newline at end of file
diff --git a/Wrappers/Python/ccpi/framework/framework.py b/Wrappers/Python/ccpi/framework/framework.py
index 01ce7ef..1ab1d1e 100755
--- a/Wrappers/Python/ccpi/framework/framework.py
+++ b/Wrappers/Python/ccpi/framework/framework.py
@@ -26,6 +26,26 @@ from datetime import timedelta, datetime
 import warnings
 from functools import reduce
 from numbers import Number
+import ctypes, platform
+
+# dll = os.path.abspath(os.path.join( 
+#          os.path.abspath(os.path.dirname(__file__)),
+#          'libfdiff.dll')
+# )
+
+# check for the extension
+if platform.system() == 'Linux':
+    dll = 'libcilacc.so'
+elif platform.system() == 'Windows':
+    dll = 'cilacc.dll'
+elif platform.system() == 'Darwin':
+    dll = 'libcilacc.dylib'
+else:
+    raise ValueError('Not supported platform, ', platform.system())
+
+#print ("dll location", dll)
+cilacc = ctypes.cdll.LoadLibrary(dll)
+
 
 def find_key(dic, val):
     """return the key of dictionary dic given the value"""
@@ -802,7 +822,69 @@ class DataContainer(object):
     def minimum(self,x2, out=None, *args, **kwargs):
         return self.pixel_wise_binary(numpy.minimum, x2=x2, out=out, *args, **kwargs)
 
-    
+    @staticmethod
+    def axpby(a,x,b,y,out,dtype=numpy.float32):
+        '''performs axpby with cilacc C library
+        
+        Does the operation .. math:: a*x+b*y and stores the result in out
+
+        :param a: scalar
+        :param x: DataContainer
+        :param b: scalar
+        :param y: DataContainer
+        :param out: DataContainer to store the result
+        :param dtype: optional, data type of the DataContainers
+        '''
+
+        c_float_p = ctypes.POINTER(ctypes.c_float)
+        c_double_p = ctypes.POINTER(ctypes.c_double)
+        # get the reference to the data
+        ndx = x.as_array()
+        ndy = y.as_array()
+        ndout = out.as_array()
+
+        if ndx.dtype != dtype:
+            ndx = ndx.astype(dtype)
+        if ndy.dtype != dtype:
+            ndy = ndy.astype(dtype)
+        
+        if dtype == numpy.float32:
+            x_p = ndx.ctypes.data_as(c_float_p)
+            y_p = ndy.ctypes.data_as(c_float_p)
+            out_p = ndout.ctypes.data_as(c_float_p)
+            f = cilacc.saxpby
+
+        elif dtype == numpy.float64:
+            ndx = ndx.astype(numpy.float64)
+            b = b.astype(numpy.float64)
+            x_p = ndx.ctypes.data_as(c_double_p)
+            y_p = ndy.ctypes.data_as(c_double_p)
+            out_p = ndout.ctypes.data_as(c_double_p)
+            f = cilacc.daxpby
+        else:
+            raise TypeError('Unsupported type {}. Expecting numpy.float32 or numpy.float64'.format(dtype))
+
+        #out = numpy.empty_like(a)
+
+        
+        # int psaxpby(float * x, float * y, float * out, float a, float b, long size)
+        cilacc.saxpby.argtypes = [ctypes.POINTER(ctypes.c_float),  # pointer to the first array 
+                                  ctypes.POINTER(ctypes.c_float),  # pointer to the second array 
+                                  ctypes.POINTER(ctypes.c_float),  # pointer to the third array 
+                                  ctypes.c_float,                  # type of A (float)
+                                  ctypes.c_float,                  # type of B (float)
+                                  ctypes.c_long]                   # type of size of first array 
+        cilacc.daxpby.argtypes = [ctypes.POINTER(ctypes.c_double), # pointer to the first array 
+                                  ctypes.POINTER(ctypes.c_double), # pointer to the second array 
+                                  ctypes.POINTER(ctypes.c_double), # pointer to the third array 
+                                  ctypes.c_double,                 # type of A (c_double)
+                                  ctypes.c_double,                 # type of B (c_double)
+                                  ctypes.c_long]                   # type of size of first array 
+
+        if f(x_p, y_p, out_p, a, b, ndx.size) != 0:
+            raise RuntimeError('axpby execution failed')
+        
+
     ## unary operations
     def pixel_wise_unary(self, pwop, *args,  **kwargs):
         out = kwargs.get('out', None)
diff --git a/Wrappers/Python/ccpi/optimisation/operators/GradientOperator.py b/Wrappers/Python/ccpi/optimisation/operators/GradientOperator.py
index 3c32a93..8e07802 100644
--- a/Wrappers/Python/ccpi/optimisation/operators/GradientOperator.py
+++ b/Wrappers/Python/ccpi/optimisation/operators/GradientOperator.py
@@ -21,15 +21,37 @@ from __future__ import print_function
 from __future__ import unicode_literals
 
 from ccpi.optimisation.operators import Operator, LinearOperator, ScaledOperator
+from ccpi.optimisation.operators import FiniteDiff, SparseFiniteDiff
 from ccpi.framework import ImageData, ImageGeometry, BlockGeometry, BlockDataContainer
 import numpy 
-from ccpi.optimisation.operators import FiniteDiff, SparseFiniteDiff
+import warnings
 
-#%%
+NEUMANN = 'Neumann'
+PERIODIC = 'Periodic'
+C = 'c'
+NUMPY = 'numpy'
+CORRELATION_SPACE = "Space"
+CORRELATION_SPACECHANNEL = "SpaceChannels"
 
 class Gradient(LinearOperator):
+
+    """This is a class to compute the first-order forward/backward differences on ImageData
     
+    :param gm_domain: Set up the domain of the function
+    :type gm_domain: `ImageGeometry`
+    :param bnd_cond: Set the boundary conditions to use 'Neumann' or 'Periodic', defaults to 'Neumann'
+    :type bnd_cond: str, optional    
     
+    :param \**kwargs:
+        See below
+
+    :Keyword Arguments:
+        * *correlation* (``str``) --
+          'Space' or 'SpaceChannels', defaults to 'Space'
+        * *backend* (``str``) --
+          'c' or 'numpy', defaults to 'c' if correlation is 'SpaceChannels' or channels = 1
+    """
+
     r'''Gradient Operator: .. math:: \nabla : X -> Y           
             
             Computes first-order forward/backward differences 
@@ -39,29 +61,82 @@ class Gradient(LinearOperator):
                 Example (2D): u\in X, \nabla(u) = [\partial_{y} u, \partial_{x} u]
                               u^{*}\in Y, \nabla^{*}(u^{*}) = \partial_{y} v1 + \partial_{x} v2
         
-                Grad_order = ['channels', 'direction_z', 'direction_y', 'direction_x']
-                Grad_order = ['channels', 'direction_y', 'direction_x']
-                Grad_order = ['direction_z', 'direction_y', 'direction_x']
-                Grad_order = ['channels', 'direction_z', 'direction_y', 'direction_x']
-                
-
     '''
+
+    #kept here for backwards compatability
+    CORRELATION_SPACE = CORRELATION_SPACE
+    CORRELATION_SPACECHANNEL = CORRELATION_SPACECHANNEL
+
+    def __init__(self, gm_domain, bnd_cond = 'Neumann', **kwargs):
+        """Constructor method
+        """        
+        super(Gradient, self).__init__() 
+
+        backend = kwargs.get('backend',C)
+
+        correlation = kwargs.get('correlation',CORRELATION_SPACE)
+
+        if correlation == CORRELATION_SPACE and gm_domain.channels > 1:
+            #numpy implementation only for now
+            backend = NUMPY
+            warnings.warn("Warning: correlation='Space' on multi-channel dataset will use `numpy` backend")
+
+        if backend == NUMPY:
+            self.operator = Gradient_numpy(gm_domain, bnd_cond=bnd_cond, **kwargs)
+        else:
+            self.operator = Gradient_C(gm_domain, bnd_cond=bnd_cond)
+
+
+    def direct(self, x, out=None):
+        """Computes the first-order forward differences
+
+        :param x: Image data
+        :type x: `ImageData`
+        :param out: pre-allocated output memory to store result
+        :type out: `BlockDataContainer`, optional        
+        :return: result data if not passed as parameter
+        :rtype: `BlockDataContainer`
+        """        
+        return self.operator.direct(x, out=out)
+        
+        
+    def adjoint(self, x, out=None):
+        """Computes the first-order backward differences
+
+        :param x: Gradient images for each dimension in ImageGeometry domain
+        :type x: `BlockDataContainer`
+        :param out: pre-allocated output memory to store result
+        :type out: `ImageData`, optional      
+        :return: result data if not passed as parameter
+        :rtype: `ImageData`
+        """            
+        return self.operator.adjoint(x, out=out)
+
+    def domain_geometry(self):
+        '''Returns domain_geometry of Gradient'''
+        
+        return self.operator.gm_domain
     
-                    
-    CORRELATION_SPACE = "Space"
-    CORRELATION_SPACECHANNEL = "SpaceChannels"
+    def range_geometry(self):
+        
+        '''Returns range_geometry of Gradient'''
+        
+        return self.operator.gm_range
 
+class Gradient_numpy(LinearOperator):
+    
     def __init__(self, gm_domain, bnd_cond = 'Neumann', **kwargs):
         
-        super(Gradient, self).__init__() 
+        super(Gradient_numpy, self).__init__() 
                 
         self.gm_domain = gm_domain # Domain of Grad Operator
         
-        self.correlation = kwargs.get('correlation',Gradient.CORRELATION_SPACE)
+        self.correlation = kwargs.get('correlation',CORRELATION_SPACE)
         
-        if self.correlation==Gradient.CORRELATION_SPACE:
-            if self.gm_domain.channels>1:
+        if self.correlation==CORRELATION_SPACE:
+            if self.gm_domain.channels > 1:
                 self.gm_range = BlockGeometry(*[self.gm_domain for _ in range(self.gm_domain.length-1)] )
+
                 if self.gm_domain.length == 4:
                     # 3D + Channel
                     # expected Grad_order = ['channels', 'direction_z', 'direction_y', 'direction_x']
@@ -70,6 +145,7 @@ class Gradient(LinearOperator):
                     # 2D + Channel
                     # expected Grad_order = ['channels', 'direction_y', 'direction_x']
                     expected_order = [ImageGeometry.CHANNEL, ImageGeometry.HORIZONTAL_Y, ImageGeometry.HORIZONTAL_X]
+
                 order = self.gm_domain.get_order_by_label(self.gm_domain.dimension_labels, expected_order)
                 self.ind = order[1:]
                 #self.ind = numpy.arange(1,self.gm_domain.length)
@@ -83,10 +159,12 @@ class Gradient(LinearOperator):
                 else:
                     # 2D
                     expected_order = [ImageGeometry.HORIZONTAL_Y, ImageGeometry.HORIZONTAL_X]    
+
                 self.ind = self.gm_domain.get_order_by_label(self.gm_domain.dimension_labels, expected_order)
                 # self.ind = numpy.arange(self.gm_domain.length)
-        elif self.correlation==Gradient.CORRELATION_SPACECHANNEL:
-            if self.gm_domain.channels>1:
+                
+        elif self.correlation==CORRELATION_SPACECHANNEL:
+            if self.gm_domain.channels > 1:
                 self.gm_range = BlockGeometry(*[self.gm_domain for _ in range(self.gm_domain.length)])
                 self.ind = range(self.gm_domain.length)
             else:
@@ -189,6 +267,146 @@ class Gradient(LinearOperator):
             res.append(spMat.sum_abs_col())
         return BlockDataContainer(*res)
    
+import ctypes, platform
+
+# check for the extension
+if platform.system() == 'Linux':
+    dll = 'libcilacc.so'
+elif platform.system() == 'Windows':
+    dll = 'cilacc.dll'
+elif platform.system() == 'Darwin':
+    dll = 'libcilacc.dylib'
+else:
+    raise ValueError('Not supported platform, ', platform.system())
+
+#print ("dll location", dll)
+cilacc = ctypes.cdll.LoadLibrary(dll)
+
+#FD = ctypes.CDLL(dll)
+
+c_float_p = ctypes.POINTER(ctypes.c_float)
+
+cilacc.openMPtest.restypes = ctypes.c_int32
+
+cilacc.fdiff4D.argtypes = [ctypes.POINTER(ctypes.c_float),
+                       ctypes.POINTER(ctypes.c_float),
+                       ctypes.POINTER(ctypes.c_float),
+                       ctypes.POINTER(ctypes.c_float),
+                       ctypes.POINTER(ctypes.c_float),
+                       ctypes.c_long,
+                       ctypes.c_long,
+                       ctypes.c_long,
+                       ctypes.c_long,
+                       ctypes.c_int32,
+                       ctypes.c_int32]
+
+cilacc.fdiff3D.argtypes = [ctypes.POINTER(ctypes.c_float),
+                       ctypes.POINTER(ctypes.c_float),
+                       ctypes.POINTER(ctypes.c_float),
+                       ctypes.POINTER(ctypes.c_float),
+                       ctypes.c_long,
+                       ctypes.c_long,
+                       ctypes.c_long,
+                       ctypes.c_int32,
+                       ctypes.c_int32]
+
+cilacc.fdiff2D.argtypes = [ctypes.POINTER(ctypes.c_float),
+                       ctypes.POINTER(ctypes.c_float),
+                       ctypes.POINTER(ctypes.c_float),
+                       ctypes.c_long,
+                       ctypes.c_long,
+                       ctypes.c_int32,
+                       ctypes.c_int32]
+
+
+class Gradient_C(LinearOperator):
+    
+    '''Finite Difference Operator:
+            
+            Computes first-order forward/backward differences 
+                     on 2D, 3D, 4D ImageData
+                     under Neumann/Periodic boundary conditions'''
+
+    def __init__(self, gm_domain, gm_range=None, bnd_cond = NEUMANN):
+
+        super(Gradient_C, self).__init__() 
+
+        self.gm_domain = gm_domain
+        self.gm_range = gm_range
+        
+        #default is 'Neumann'
+        self.bnd_cond = 0
+        
+        if bnd_cond == PERIODIC:
+            self.bnd_cond = 1
+        
+        # Domain Geometry = Range Geometry if not stated
+        if self.gm_range is None:
+            self.gm_range = BlockGeometry(*[gm_domain for _ in range(len(gm_domain.shape))])
+        
+
+        if len(gm_domain.shape) == 4:
+            self.fd = cilacc.fdiff4D
+        elif len(gm_domain.shape) == 3:
+            self.fd = cilacc.fdiff3D
+        elif len(gm_domain.shape) == 2:
+            self.fd = cilacc.fdiff2D
+        else:
+            raise ValueError('Number of dimensions not supported, expected 2, 3 or 4, got {}'.format(len(gm_domain.shape)))
+        
+                        
+    @staticmethod 
+    def datacontainer_as_c_pointer(x):
+        ndx = x.as_array()
+        return ndx, ndx.ctypes.data_as(c_float_p)
+        
+    def direct(self, x, out=None):
+        ndx , x_p = Gradient_C.datacontainer_as_c_pointer(x)
+        
+        return_val = False
+        if out is None:
+            out = self.gm_range.allocate(None)
+            return_val = True
+
+        arg1 = [Gradient_C.datacontainer_as_c_pointer(out.get_item(i))[1] for i in range(self.gm_range.shape[0])]
+        arg2 = [el for el in x.shape]
+        args = arg1 + arg2 + [self.bnd_cond, 1]
+        self.fd(x_p, *args)
+        
+        if return_val is True:
+            return out
+
+
+    def adjoint(self, x, out=None):
+
+        return_val = False
+        if out is None:
+            out = self.gm_domain.allocate(None)
+            return_val = True
+
+        ndout, out_p = Gradient_C.datacontainer_as_c_pointer(out)
+
+        arg1 = [Gradient_C.datacontainer_as_c_pointer(x.get_item(i))[1] for i in range(self.gm_range.shape[0])]
+        arg2 = [el for el in out.shape]
+        args = arg1 + arg2 + [self.bnd_cond, 0]
+
+        self.fd(out_p, *args)
+
+        if return_val is True:
+            return out
+
+    def domain_geometry(self):
+        
+        '''Returns domain_geometry of Gradient'''
+        
+        return self.gm_domain
+    
+    def range_geometry(self):
+        
+        '''Returns range_geometry of Gradient'''
+        
+        return self.gm_range
+
        
 if __name__ == '__main__':
     
@@ -302,8 +520,5 @@ if __name__ == '__main__':
     G.direct(u, out = res)          
     G.adjoint(w, out = res1)
           
-    print( (res*w).sum(),  (u*res1).sum() )     
-    
+    print( (res*w).sum(),  (u*res1).sum() )
     
-    
-
diff --git a/Wrappers/Python/conda-recipe/bld.bat b/Wrappers/Python/conda-recipe/bld.bat
deleted file mode 100644
index 97a4e62..0000000
--- a/Wrappers/Python/conda-recipe/bld.bat
+++ /dev/null
@@ -1,10 +0,0 @@
-
-IF NOT DEFINED CIL_VERSION (
-ECHO CIL_VERSION Not Defined.
-exit 1
-)
-ROBOCOPY /E "%RECIPE_DIR%\.." "%SRC_DIR%"
-
-%PYTHON% setup.py build_py
-%PYTHON% setup.py install
-if errorlevel 1 exit 1
diff --git a/Wrappers/Python/conda-recipe/build.sh b/Wrappers/Python/conda-recipe/build.sh
deleted file mode 100644
index 43e85d5..0000000
--- a/Wrappers/Python/conda-recipe/build.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-if [ -z "$CIL_VERSION" ]; then
-    echo "Need to set CIL_VERSION"
-    exit 1
-fi
-mkdir ${SRC_DIR}/ccpi
-cp -r "${RECIPE_DIR}/../../../" ${SRC_DIR}/ccpi
-
-cd ${SRC_DIR}/ccpi/Wrappers/Python
-$PYTHON setup.py install
diff --git a/Wrappers/Python/conda-recipe/conda_build_config.yaml b/Wrappers/Python/conda-recipe/conda_build_config.yaml
deleted file mode 100644
index e5e168b..0000000
--- a/Wrappers/Python/conda-recipe/conda_build_config.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-python:
-  - 2.7 # [not win]
-  - 3.5
-  - 3.6
-numpy:
-  # TODO investigage, as it doesn't currently build with cvxp, requires >1.14
-  - 1.11
-  - 1.12
-  - 1.14
-  - 1.15
diff --git a/Wrappers/Python/conda-recipe/meta.yaml b/Wrappers/Python/conda-recipe/meta.yaml
deleted file mode 100644
index 9d03220..0000000
--- a/Wrappers/Python/conda-recipe/meta.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-package:
-  name: ccpi-framework
-  version: {{ environ['CIL_VERSION'] }}
-
-build:
-  preserve_egg_dir: False
-  script_env:
-    - CIL_VERSION   
-  #number: 0  
-  
-test:
-  requires:
-    - python-wget
-    - cvxpy # [ unix and py36 and np115 ]
-
-  source_files:
-    - ./test # [win]
-    - ./ccpi/Wrappers/Python/test # [not win] 
-
-  commands:
-    - python -c "import os; print (os.getcwd())"
-    - python -m unittest discover # [win]
-    - python -m unittest discover -s ccpi/Wrappers/Python/test # [not win]
-    
-requirements:
-  build:
-    - {{ pin_compatible('numpy', max_pin='x.x') }}
-    - python
-    - setuptools
-
-  run:
-    - {{ pin_compatible('numpy', max_pin='x.x') }}
-    - python
-    - numpy
-    - scipy
-    - matplotlib
-    - h5py
-    - pillow
-  
-about:
-  home: http://www.ccpi.ac.uk
-  license:  Apache 2.0 License
-  summary: 'CCPi Framework'
diff --git a/Wrappers/Python/recipe/bld.bat b/Wrappers/Python/recipe/bld.bat
new file mode 100644
index 0000000..97a4e62
--- /dev/null
+++ b/Wrappers/Python/recipe/bld.bat
@@ -0,0 +1,10 @@
+
+IF NOT DEFINED CIL_VERSION (
+ECHO CIL_VERSION Not Defined.
+exit 1
+)
+ROBOCOPY /E "%RECIPE_DIR%\.." "%SRC_DIR%"
+
+%PYTHON% setup.py build_py
+%PYTHON% setup.py install
+if errorlevel 1 exit 1
diff --git a/Wrappers/Python/recipe/build.sh b/Wrappers/Python/recipe/build.sh
new file mode 100644
index 0000000..43e85d5
--- /dev/null
+++ b/Wrappers/Python/recipe/build.sh
@@ -0,0 +1,9 @@
+if [ -z "$CIL_VERSION" ]; then
+    echo "Need to set CIL_VERSION"
+    exit 1
+fi
+mkdir ${SRC_DIR}/ccpi
+cp -r "${RECIPE_DIR}/../../../" ${SRC_DIR}/ccpi
+
+cd ${SRC_DIR}/ccpi/Wrappers/Python
+$PYTHON setup.py install
diff --git a/Wrappers/Python/recipe/conda_build_config.yaml b/Wrappers/Python/recipe/conda_build_config.yaml
new file mode 100644
index 0000000..e5e168b
--- /dev/null
+++ b/Wrappers/Python/recipe/conda_build_config.yaml
@@ -0,0 +1,10 @@
+python:
+  - 2.7 # [not win]
+  - 3.5
+  - 3.6
+numpy:
+  # TODO investigage, as it doesn't currently build with cvxp, requires >1.14
+  - 1.11
+  - 1.12
+  - 1.14
+  - 1.15
diff --git a/Wrappers/Python/recipe/meta.yaml b/Wrappers/Python/recipe/meta.yaml
new file mode 100644
index 0000000..9d03220
--- /dev/null
+++ b/Wrappers/Python/recipe/meta.yaml
@@ -0,0 +1,43 @@
+package:
+  name: ccpi-framework
+  version: {{ environ['CIL_VERSION'] }}
+
+build:
+  preserve_egg_dir: False
+  script_env:
+    - CIL_VERSION   
+  #number: 0  
+  
+test:
+  requires:
+    - python-wget
+    - cvxpy # [ unix and py36 and np115 ]
+
+  source_files:
+    - ./test # [win]
+    - ./ccpi/Wrappers/Python/test # [not win] 
+
+  commands:
+    - python -c "import os; print (os.getcwd())"
+    - python -m unittest discover # [win]
+    - python -m unittest discover -s ccpi/Wrappers/Python/test # [not win]
+    
+requirements:
+  build:
+    - {{ pin_compatible('numpy', max_pin='x.x') }}
+    - python
+    - setuptools
+
+  run:
+    - {{ pin_compatible('numpy', max_pin='x.x') }}
+    - python
+    - numpy
+    - scipy
+    - matplotlib
+    - h5py
+    - pillow
+  
+about:
+  home: http://www.ccpi.ac.uk
+  license:  Apache 2.0 License
+  summary: 'CCPi Framework'
diff --git a/Wrappers/Python/setup-cil.py.in b/Wrappers/Python/setup-cil.py.in
new file mode 100644
index 0000000..d1cf7fc
--- /dev/null
+++ b/Wrappers/Python/setup-cil.py.in
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+#  CCP in Tomographic Imaging (CCPi) Core Imaging Library (CIL).
+
+#   Copyright 2017 UKRI-STFC
+#   Copyright 2017 University of Manchester
+
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+from distutils.core import setup
+import os
+import sys
+
+
+cil_version=os.environ['CIL_VERSION']
+if  cil_version == '':
+    print("Please set the environmental variable CIL_VERSION")
+    sys.exit(1)
+
+setup(
+    name="ccpi-framework",
+    version=cil_version,
+    packages=['ccpi' , 'ccpi.io',
+              'ccpi.framework', 'ccpi.optimisation', 
+              'ccpi.optimisation.operators',
+              'ccpi.optimisation.algorithms',
+              'ccpi.optimisation.functions',
+              'ccpi.processors',
+              'ccpi.utilities', 'ccpi.utilities.jupyter',
+              'ccpi.contrib','ccpi.contrib.optimisation',
+              'ccpi.contrib.optimisation.algorithms'],
+    data_files = [('share/ccpi', ['data/boat.tiff',
+                                  'data/peppers.tiff',
+                                  'data/camera.png',
+                                  'data/resolution_chart.tiff',
+                                  'data/shapes.png',
+                                  'data/24737_fd_normalised.nxs'])],
+
+    # Project uses reStructuredText, so ensure that the docutils get
+    # installed or upgraded on the target machine
+    #install_requires=['docutils>=0.3'],
+
+#    package_data={
+#        # If any package contains *.txt or *.rst files, include them:
+#        '': ['*.txt', '*.rst'],
+#        # And include any *.msg files found in the 'hello' package, too:
+#        'hello': ['*.msg'],
+#    },
+    # zip_safe = False,
+
+    # metadata for upload to PyPI
+    author="CCPi developers",
+    maintainer="Edoardo Pasca",
+    maintainer_email="edoardo.pasca@stfc.ac.uk",
+    description='CCPi Core Imaging Library - Python Framework Module',
+    license="Apache v2.0",
+    keywords="Python Framework",
+    url="http://www.ccpi.ac.uk/CIL",   # project home page, if any
+
+    # could also include long_description, download_url, classifiers, etc.
+)
diff --git a/Wrappers/Python/test/test_DataContainer.py b/Wrappers/Python/test/test_DataContainer.py
index 9b9a8dd..d2d5b05 100755
--- a/Wrappers/Python/test/test_DataContainer.py
+++ b/Wrappers/Python/test/test_DataContainer.py
@@ -732,6 +732,19 @@ class TestDataContainer(unittest.TestCase):
         u.multiply(2, out=u)
         c = b * 2
         numpy.testing.assert_array_equal(u.as_array(), c)
+
+    def test_axpby(self):
+        print ("test axpby")
+        ig = ImageGeometry(10,10)                                               
+        d1 = ig.allocate(1)                                                     
+        d2 = ig.allocate(2)                                                     
+        out = ig.allocate(None)                                                 
+        # equals to 2 * [1] + 1 * [2] = [4]
+        DataContainer.axpby(2,d1,1,d2,out)
+        res = numpy.ones_like(d1.as_array()) * 4.
+        numpy.testing.assert_array_equal(res, out.as_array())
+
+
         
 
 
diff --git a/Wrappers/Python/test/test_Gradient.py b/Wrappers/Python/test/test_Gradient.py
index 5dc8137..5aeede0 100755
--- a/Wrappers/Python/test/test_Gradient.py
+++ b/Wrappers/Python/test/test_Gradient.py
@@ -23,6 +23,7 @@ from ccpi.framework import BlockDataContainer
 import functools
 
 from ccpi.optimisation.operators import Gradient, Identity, BlockOperator
+from ccpi.optimisation.operators import LinearOperator
 
 class TestGradient(unittest.TestCase):
     def test_Gradient(self): 
@@ -36,17 +37,17 @@ class TestGradient(unittest.TestCase):
         ig3 = ImageGeometry(voxel_num_x = M, voxel_num_y = N, channels = channels) 
         ig4 = ImageGeometry(voxel_num_x = M, voxel_num_y = N, channels = channels, voxel_num_z= K) 
         
-        G1 = Gradient(ig1, correlation = 'Space') 
+        G1 = Gradient(ig1, correlation = 'Space', backend='numpy') 
         print(G1.range_geometry().shape, '2D no channels')
             
-        G4 = Gradient(ig3, correlation = 'SpaceChannels')
-        print(G4.range_geometry().shape, '2D with channels corr')
-        G5 = Gradient(ig3, correlation = 'Space')
+        G4 = Gradient(ig3, correlation = 'SpaceChannels', backend='numpy')
+        print(G4.range_geometry().shape, '2D with channels corr')  
+        G5 = Gradient(ig3, correlation = 'Space', backend='numpy')
         print(G5.range_geometry().shape, '2D with channels no corr')
         
-        G6 = Gradient(ig4, correlation = 'Space')
+        G6 = Gradient(ig4, correlation = 'Space', backend='numpy')
         print(G6.range_geometry().shape, '3D with channels no corr')
-        G7 = Gradient(ig4, correlation = 'SpaceChannels')
+        G7 = Gradient(ig4, correlation = 'SpaceChannels', backend='numpy')
         print(G7.range_geometry().shape, '3D with channels with corr')
         
         
@@ -77,8 +78,8 @@ class TestGradient(unittest.TestCase):
         #check direct/adjoint for space/channels correlation
         
         ig_channel = ImageGeometry(voxel_num_x = 2, voxel_num_y = 3, channels = 2)
-        G_no_channel = Gradient(ig_channel, correlation = 'Space')
-        G_channel = Gradient(ig_channel, correlation = 'SpaceChannels')
+        G_no_channel = Gradient(ig_channel, correlation = 'Space', backend='numpy')
+        G_channel = Gradient(ig_channel, correlation = 'SpaceChannels', backend='numpy')
         
         u3 = ig_channel.allocate('random_int')
         res_no_channel = G_no_channel.direct(u3)
@@ -95,7 +96,7 @@ class TestGradient(unittest.TestCase):
         
         ig2D = ImageGeometry(voxel_num_x = 2, voxel_num_y = 3)
         u4 = ig2D.allocate('random_int')
-        G2D = Gradient(ig2D)
+        G2D = Gradient(ig2D, backend='numpy')
         res = G2D.direct(u4)  
         print(res[0].as_array())
         print(res[1].as_array())
@@ -105,12 +106,133 @@ class TestGradient(unittest.TestCase):
         arr = ig.allocate('random_int' )
         
         # check direct of Gradient and sparse matrix
-        G = Gradient(ig)
+        G = Gradient(ig, backend='numpy')
         norm1 = G.norm(iterations=300)
         print ("should be sqrt(8) {} {}".format(numpy.sqrt(8), norm1))
         numpy.testing.assert_almost_equal(norm1, numpy.sqrt(8), decimal=1)
         ig4 = ImageGeometry(M,N, channels=3)
-        G4 = Gradient(ig4, correlation=Gradient.CORRELATION_SPACECHANNEL)
+        G4 = Gradient(ig4, correlation="SpaceChannels", backend='numpy')
         norm4 = G4.norm(iterations=300)
-        print ("should be sqrt(12) {} {}".format(numpy.sqrt(12), norm4))
+        print("should be sqrt(12) {} {}".format(numpy.sqrt(12), norm4))
         self.assertTrue((norm4 - numpy.sqrt(12))/norm4 < 0.2)
+
+    def test_GradientOperator_4D(self):
+
+        nc, nz, ny, nx = 3, 4, 5, 6
+        size = nc * nz * ny * nx
+        dim = [nc, nz, ny, nx]
+
+        ig = ImageGeometry(voxel_num_x=nx, voxel_num_y=ny, voxel_num_z=nz, channels=nc)
+
+        arr = numpy.arange(size).reshape(dim).astype(numpy.float32)**2
+
+        data = ig.allocate()
+        data.fill(arr)
+
+        #neumann
+        grad_py = Gradient(ig, bnd_cond='Neumann', correlation='SpaceChannels', backend='numpy')
+        gold_direct = grad_py.direct(data)
+        gold_adjoint = grad_py.adjoint(gold_direct)
+
+        grad_c = Gradient(ig, bnd_cond='Neumann', correlation='SpaceChannels', backend='c')
+        out_direct = grad_c.direct(data)
+        out_adjoint = grad_c.adjoint(out_direct)
+
+        #print("GradientOperator, 4D, bnd_cond='Neumann', direct")
+        numpy.testing.assert_array_equal(out_direct.get_item(0).as_array(), gold_direct.get_item(0).as_array())
+        numpy.testing.assert_array_equal(out_direct.get_item(1).as_array(), gold_direct.get_item(1).as_array())
+        numpy.testing.assert_array_equal(out_direct.get_item(2).as_array(), gold_direct.get_item(2).as_array())
+        numpy.testing.assert_array_equal(out_direct.get_item(3).as_array(), gold_direct.get_item(3).as_array())
+
+        #print("GradientOperator, 4D, bnd_cond='Neumann', adjoint")
+        numpy.testing.assert_array_equal(out_adjoint.as_array(), gold_adjoint.as_array())
+
+        #periodic
+        grad_py = Gradient(ig, bnd_cond='Periodic', correlation='SpaceChannels', backend='numpy')
+        gold_direct = grad_py.direct(data)
+        gold_adjoint = grad_py.adjoint(gold_direct)
+
+        grad_c = Gradient(ig, bnd_cond='Periodic', correlation='SpaceChannels', backend='c')
+        out_direct = grad_c.direct(data)
+        out_adjoint = grad_c.adjoint(out_direct)
+
+        #print("Gradient, 4D, bnd_cond='Periodic', direct")
+        numpy.testing.assert_array_equal(out_direct.get_item(0).as_array(), gold_direct.get_item(0).as_array())
+        numpy.testing.assert_array_equal(out_direct.get_item(1).as_array(), gold_direct.get_item(1).as_array())
+        numpy.testing.assert_array_equal(out_direct.get_item(2).as_array(), gold_direct.get_item(2).as_array())
+        numpy.testing.assert_array_equal(out_direct.get_item(3).as_array(), gold_direct.get_item(3).as_array())
+
+        #print("Gradient, 4D, bnd_cond='Periodic', adjoint")
+        numpy.testing.assert_array_equal(out_adjoint.as_array(), gold_adjoint.as_array())
+
+    def test_Gradient_4D_allocate(self):
+
+        nc, nz, ny, nx = 3, 4, 5, 6
+        size = nc * nz * ny * nx
+        dim = [nc, nz, ny, nx]
+
+        ig = ImageGeometry(voxel_num_x=nx, voxel_num_y=ny, voxel_num_z=nz, channels=nc)
+
+
+        arr = numpy.arange(size).reshape(dim).astype(numpy.float32)**2
+
+        data = ig.allocate()
+        data.fill(arr)
+
+        #numpy
+        grad1 = Gradient(ig, bnd_cond='Neumann', correlation='SpaceChannels', backend='numpy')
+        gold_direct = grad1.direct(data)
+        gold_adjoint = grad1.adjoint(gold_direct)
+
+        grad2 = Gradient(ig, bnd_cond='Neumann', correlation='SpaceChannels', backend='numpy')
+        out_direct = grad2.range_geometry().allocate()
+        out_adjoint = grad2.domain_geometry().allocate()
+        grad2.direct(data, out=out_direct)
+        grad2.adjoint(out_direct, out=out_adjoint)
+
+        #print("GradientOperator, 4D, bnd_cond='Neumann', direct")
+        numpy.testing.assert_array_equal(out_direct.get_item(0).as_array(), gold_direct.get_item(0).as_array())
+        numpy.testing.assert_array_equal(out_direct.get_item(1).as_array(), gold_direct.get_item(1).as_array())
+        numpy.testing.assert_array_equal(out_direct.get_item(2).as_array(), gold_direct.get_item(2).as_array())
+        numpy.testing.assert_array_equal(out_direct.get_item(3).as_array(), gold_direct.get_item(3).as_array())
+
+        #print("GradientOperator, 4D, bnd_cond='Neumann', adjoint")
+        numpy.testing.assert_array_equal(out_adjoint.as_array(), gold_adjoint.as_array())
+
+        #c
+        grad1 = Gradient(ig, bnd_cond='Neumann', correlation='SpaceChannels', backend='c')
+        gold_direct = grad1.direct(data)
+        gold_adjoint = grad1.adjoint(gold_direct)
+
+        grad2 = Gradient(ig, bnd_cond='Neumann', correlation='SpaceChannels', backend='c')
+        out_direct = grad2.range_geometry().allocate()
+        out_adjoint = grad2.domain_geometry().allocate()
+        grad2.direct(data, out=out_direct)
+        grad2.adjoint(out_direct, out=out_adjoint)
+
+        #print("GradientOperator, 4D, bnd_cond='Neumann', direct")
+        numpy.testing.assert_array_equal(out_direct.get_item(0).as_array(), gold_direct.get_item(0).as_array())
+        numpy.testing.assert_array_equal(out_direct.get_item(1).as_array(), gold_direct.get_item(1).as_array())
+        numpy.testing.assert_array_equal(out_direct.get_item(2).as_array(), gold_direct.get_item(2).as_array())
+        numpy.testing.assert_array_equal(out_direct.get_item(3).as_array(), gold_direct.get_item(3).as_array())
+
+        #print("GradientOperator, 4D, bnd_cond='Neumann', adjoint")
+        numpy.testing.assert_array_equal(out_adjoint.as_array(), gold_adjoint.as_array())
+
+    def test_Gradient_linearity(self):
+
+        nc, nz, ny, nx = 3, 4, 5, 6
+        ig = ImageGeometry(voxel_num_x=nx, voxel_num_y=ny, voxel_num_z=nz, channels=nc)
+
+        grad = Gradient(ig, bnd_cond='Neumann', correlation='SpaceChannels', backend='c')
+        self.assertTrue(LinearOperator.dot_test(grad))
+
+        grad = Gradient(ig, bnd_cond='Periodic', correlation='SpaceChannels', backend='c')
+        self.assertTrue(LinearOperator.dot_test(grad))
+
+        grad = Gradient(ig, bnd_cond='Neumann', correlation='SpaceChannels', backend='numpy')
+        self.assertTrue(LinearOperator.dot_test(grad))
+
+        grad = Gradient(ig, bnd_cond='Periodic', correlation='SpaceChannels', backend='numpy')
+        self.assertTrue(LinearOperator.dot_test(grad))
+        
\ No newline at end of file
diff --git a/Wrappers/Python/test/test_Operator.py b/Wrappers/Python/test/test_Operator.py
index 3372b9b..96c48da 100644
--- a/Wrappers/Python/test/test_Operator.py
+++ b/Wrappers/Python/test/test_Operator.py
@@ -494,12 +494,13 @@ class TestBlockOperator(unittest.TestCase):
             print("Z1", Z1[0][1].as_array())
             print("RES1", RES1[0][1].as_array())
     def test_timedifference(self):
+
         print ("test_timedifference")
         M, N ,W = 100, 512, 512
         ig = ImageGeometry(M, N, W)
         arr = ig.allocate('random_int')  
         
-        G = Gradient(ig)
+        G = Gradient(ig, backend='numpy')
         Id = Identity(ig)
         
         B = BlockOperator(G, Id)
diff --git a/build/jenkins-build.sh b/build/jenkins-build.sh
deleted file mode 100644
index 009d43d..0000000
--- a/build/jenkins-build.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/env bash
-export CCPI_BUILD_ARGS="-c conda-forge -c ccpi"
-bash <(curl -L https://raw.githubusercontent.com/vais-ral/CCPi-VirtualMachine/master/scripts/jenkins-build.sh)
diff --git a/recipe/bld.bat b/recipe/bld.bat
new file mode 100644
index 0000000..af5ca40
--- /dev/null
+++ b/recipe/bld.bat
@@ -0,0 +1,15 @@
+
+IF NOT DEFINED CIL_VERSION (
+ECHO CIL_VERSION Not Defined.
+exit 1
+)
+
+ROBOCOPY /E "%RECIPE_DIR%\.." "%SRC_DIR%" /XD .git /XD Wrappers\Python\build
+
+mkdir "%SRC_DIR%\build_framework"
+
+cd "%SRC_DIR%\build_framework"
+cmake -G "NMake Makefiles" %RECIPE_DIR%\..\ -DCONDA_BUILD=ON -DCMAKE_BUILD_TYPE="Release" -DLIBRARY_LIB=%CONDA_PREFIX%\lib -DLIBRARY_INC=%CONDA_PREFIX% -DCMAKE_INSTALL_PREFIX=%PREFIX%
+
+nmake install
+if errorlevel 1 exit 1
diff --git a/recipe/build.sh b/recipe/build.sh
new file mode 100644
index 0000000..22cfae8
--- /dev/null
+++ b/recipe/build.sh
@@ -0,0 +1,24 @@
+if [ -z "$CIL_VERSION" ]; then
+    echo "Need to set CIL_VERSION"
+    exit 1
+fi
+# mkdir ${SRC_DIR}/ccpi
+mkdir -p ${SRC_DIR}/ccpi/Wrappers/Python
+cp -r "${RECIPE_DIR}/../Wrappers/Python/test" ${SRC_DIR}/ccpi/Wrappers/Python
+
+# cd ${SRC_DIR}/ccpi/Wrappers/Python
+# $PYTHON setup.py install
+
+
+mkdir ${SRC_DIR}/build_framework
+#cp -r "${RECIPE_DIR}/../" ${SRC_DIR}/build_framework
+
+cd ${SRC_DIR}/build_framework
+cmake ${RECIPE_DIR}/../ -DCONDA_BUILD=ON \
+                        -DCMAKE_BUILD_TYPE="Release"\
+                        -DLIBRARY_LIB=$CONDA_PREFIX/lib \
+                        -DLIBRARY_INC=$CONDA_PREFIX \
+                        -DCMAKE_INSTALL_PREFIX=$PREFIX
+
+make install
+# $PYTHON setup.py install
diff --git a/recipe/conda_build_config.yaml b/recipe/conda_build_config.yaml
new file mode 100644
index 0000000..e5e168b
--- /dev/null
+++ b/recipe/conda_build_config.yaml
@@ -0,0 +1,10 @@
+python:
+  - 2.7 # [not win]
+  - 3.5
+  - 3.6
+numpy:
+  # TODO investigage, as it doesn't currently build with cvxp, requires >1.14
+  - 1.11
+  - 1.12
+  - 1.14
+  - 1.15
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
new file mode 100644
index 0000000..fee8e87
--- /dev/null
+++ b/recipe/meta.yaml
@@ -0,0 +1,45 @@
+package:
+  name: ccpi-framework
+  version: {{ environ['CIL_VERSION'] }}
+
+build:
+  preserve_egg_dir: False
+  script_env:
+    - CIL_VERSION   
+  #number: 0  
+  
+test:
+  requires:
+    - python-wget
+    - cvxpy # [ unix and py36 and np115 ]
+
+  source_files:
+    - ./Wrappers/Python/test # [win]
+    - ./ccpi/Wrappers/Python/test # [not win] 
+
+  commands:
+    - python -c "import os; print ('TESTING IN THIS DIRECTORY' , os.getcwd())"
+    - python -m unittest discover Wrappers/Python/test # [win]
+    - python -m unittest discover -s ccpi/Wrappers/Python/test # [not win]
+    
+requirements:
+  build:
+    - {{ pin_compatible('numpy', max_pin='x.x') }}
+    - python
+    - setuptools
+    - cmake
+
+  run:
+    - {{ pin_compatible('numpy', max_pin='x.x') }}
+    - python
+    - numpy
+    - scipy
+    - matplotlib
+    - h5py
+    - pillow
+    - libgcc-ng # [not win]
+  
+about:
+  home: http://www.ccpi.ac.uk
+  license:  Apache 2.0 License
+  summary: 'CCPi Framework'
diff --git a/src/Core/CMakeLists.txt b/src/Core/CMakeLists.txt
new file mode 100644
index 0000000..a527289
--- /dev/null
+++ b/src/Core/CMakeLists.txt
@@ -0,0 +1,124 @@
+
+set (CMAKE_C_STANDARD 11)
+
+if(APPLE)
+    if(NOT DEFINED OPENMP_INCLUDES OR NOT DEFINED OPENMP_LIBRARIES)
+        set(OPENMP_INCLUDES "OPENMP_INCLUDES-NOTFOUND" CACHE PATH "Need to set OpenMP include dir on OSX")
+        set(OPENMP_LIBRARIES "OPENMP_LIBRARIES-NOTFOUND" CACHE PATH "Need to set OpenMP lib dir on OSX")
+    endif()
+    if(NOT EXISTS ${OPENMP_INCLUDES} OR NOT EXISTS ${OPENMP_LIBRARIES})
+        message(FATAL_ERROR "Need to set OPENMP_INCLUDES and OPENMP_LIBRARIES on OSX.")
+    endif()
+    if(CMAKE_C_COMPILER_ID MATCHES "Clang")
+        set(OpenMP_C "${CMAKE_C_COMPILER}")
+        set(OpenMP_C_FLAGS "-fopenmp=libomp -Wno-unused-command-line-argument")
+        set(OpenMP_C_LIB_NAMES "libomp" "libgomp" "libiomp5")
+        set(OpenMP_libomp_LIBRARY ${OpenMP_C_LIB_NAMES})
+        set(OpenMP_libgomp_LIBRARY ${OpenMP_C_LIB_NAMES})
+        set(OpenMP_libiomp5_LIBRARY ${OpenMP_C_LIB_NAMES})
+    endif()
+    if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+        set(OpenMP_CXX "${CMAKE_CXX_COMPILER}")
+        set(OpenMP_CXX_FLAGS "-fopenmp=libomp -Wno-unused-command-line-argument")
+        set(OpenMP_CXX_LIB_NAMES "libomp" "libgomp" "libiomp5")
+        set(OpenMP_libomp_LIBRARY ${OpenMP_CXX_LIB_NAMES})
+        set(OpenMP_libgomp_LIBRARY ${OpenMP_CXX_LIB_NAMES})
+        set(OpenMP_libiomp5_LIBRARY ${OpenMP_CXX_LIB_NAMES})
+    endif()
+endif()
+
+
+find_package(OpenMP REQUIRED)
+
+add_definitions(${OpenMP_CXX_FLAGS})
+add_definitions(${OpenMP_C_FLAGS})
+  
+  if (APPLE)
+    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
+    set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}")
+    set (CMAKE_STATIC_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_STATIC_LINKER_FLAGS}")   
+    include_directories("${OPENMP_INCLUDES}")
+    link_directories("${OPENMP_LIBRARIES}")
+  else()
+    if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.9.0") 
+      set (OpenMP_EXE_LINKER_FLAGS OpenMP::OpenMP_CXX)
+      set (OpenMP_EXE_LINKER_FLAGS OpenMP::OpenMP_C)
+    else()
+      message(WARNING "Your CMake version is old. OpenMP linking flags  might be incorrect.")
+      # need to explicitly set this. Definitely for gcc, hopefully also for other systems.
+      # See https://gitlab.kitware.com/cmake/cmake/issues/15392
+      set (OpenMP_EXE_LINKER_FLAGS ${OpenMP_CXX_FLAGS})
+      set (OpenMP_EXE_LINKER_FLAGS ${OpenMP_C_FLAGS})
+    endif()
+  endif()
+
+
+#if (OPENMP_FOUND)
+#    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+#    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+#    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS} ${OpenMP_CXX_FLAGS}")
+#   set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS} ${OpenMP_CXX_FLAGS}")
+#   set (CMAKE_STATIC_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_STATIC_LINKER_FLAGS} ${OpenMP_CXX_FLAGS}")
+#   if (UNIX)
+#    set (FLAGS "-O3 -funsigned-char -Wall  -Wl,--no-undefined -march=native")  
+#    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")
+#    set (CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}")
+#  
+#    set (EXTRA_LIBRARIES 
+#      "gomp"
+#      "m"
+#      )
+#   endif()
+# endif()
+
+if (WIN32)
+	set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Ddll_EXPORTS")
+endif()
+
+
+message("CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}")
+message("CMAKE_C_FLAGS ${CMAKE_C_FLAGS}")
+message("CMAKE_EXE_LINKER_FLAGS ${CMAKE_EXE_LINKER_FLAGS}")
+message("CMAKE_SHARED_LINKER_FLAGS ${CMAKE_SHARED_LINKER_FLAGS}")
+message("CMAKE_STATIC_LINKER_FLAGS ${CMAKE_STATIC_LINKER_FLAGS}")
+
+
+
+add_library(cilacc SHARED ${CMAKE_CURRENT_SOURCE_DIR}/axpby.c
+                          ${CMAKE_CURRENT_SOURCE_DIR}/FiniteDifferenceLibrary.c )
+
+target_link_libraries(cilacc ${OpenMP_C_LIB_NAMES} )
+include_directories(cilacc PUBLIC 
+				  ${CMAKE_CURRENT_SOURCE_DIR}/include
+		          )
+
+## Install
+#include(GNUInstallDirs)
+#install(TARGETS cilacc
+#	RUNTIME DESTINATION bin
+#    LIBRARY DESTINATION lib
+#    ARCHIVE DESTINATION lib
+#	CONFIGURATIONS ${CMAKE_BUILD_TYPE} 
+#	)
+
+if (UNIX)
+message ("I'd install into ${CMAKE_INSTALL_PREFIX}/lib")
+install(TARGETS cilacc
+	LIBRARY DESTINATION lib
+	CONFIGURATIONS ${CMAKE_BUILD_TYPE} 
+	)
+elseif(WIN32)
+message ("I'd install into ${CMAKE_INSTALL_PREFIX} lib bin")
+  install(TARGETS cilacc 
+	RUNTIME DESTINATION bin
+	ARCHIVE DESTINATION lib
+	CONFIGURATIONS ${CMAKE_BUILD_TYPE} 
+	)
+endif()
+
+install(DIRECTORY ${PROJECT_SOURCE_DIR}/src/Core/include/
+        DESTINATION ${CMAKE_INSTALL_PREFIX}/include/cil) 
+
+
diff --git a/src/Core/FiniteDifferenceLibrary.c b/src/Core/FiniteDifferenceLibrary.c
new file mode 100644
index 0000000..f5736e2
--- /dev/null
+++ b/src/Core/FiniteDifferenceLibrary.c
@@ -0,0 +1,692 @@
+#define dll_EXPORTS = 1
+
+#include "FiniteDifferenceLibrary.h"
+
+DLL_EXPORT int openMPtest(void)
+{
+	int nThreads;
+#pragma omp parallel
+	{
+		if (omp_get_thread_num() == 0)
+		{
+			nThreads = omp_get_num_threads();
+		}
+	}
+	return nThreads;
+}
+int fdiff_direct_neumann(const float *inimagefull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, long nx, long ny, long nz, long nc)
+{
+	size_t volume = nx * ny * nz;
+
+	const float * inimage = inimagefull;
+	float * outimageX = outimageXfull;
+	float * outimageY = outimageYfull;
+	float * outimageZ = outimageZfull;
+
+	int offset1 = (nz - 1) * nx * ny; //ind to beginning of last slice
+	int offset2 = offset1 + (ny - 1) * nx; //ind to beginning of last row
+
+	long c;
+	for (c = 0; c < nc; c++)
+	{
+#pragma omp parallel
+		{
+			long ind, k, j, i;
+			float pix0;
+			//run over all and then fix boundaries
+#pragma omp for
+			for (ind = 0; ind < nx * ny * (nz - 1); ind++)
+			{
+				pix0 = -inimage[ind];
+
+				outimageX[ind] = pix0 + inimage[ind + 1];
+				outimageY[ind] = pix0 + inimage[ind + nx];
+				outimageZ[ind] = pix0 + inimage[ind + nx * ny];
+			}
+
+#pragma omp for
+			for (ind = 0; ind < nx * (ny - 1); ind++)
+			{
+				pix0 = -inimage[ind + offset1];
+
+				outimageX[ind + offset1] = pix0 + inimage[ind + offset1 + 1];
+				outimageY[ind + offset1] = pix0 + inimage[ind + offset1 + nx];
+			}
+
+#pragma omp for
+			for (ind = 0; ind < nx - 1; ind++)
+			{
+				pix0 = -inimage[ind + offset2];
+
+				outimageX[ind + offset2] = pix0 + inimage[ind + offset2 + 1];
+			}
+
+			//boundaries
+#pragma omp for
+			for (k = 0; k < nz; k++)
+			{
+				for (i = 0; i < nx; i++)
+				{
+					outimageY[(k * ny * nx) + (ny - 1) * nx + i] = 0;
+				}
+			}
+
+#pragma omp for
+			for (k = 0; k < nz; k++)
+			{
+				for (j = 0; j < ny; j++)
+				{
+					outimageX[k * ny * nx + j * nx + nx - 1] = 0;
+				}
+			}
+
+			if (nz > 1)
+			{
+#pragma omp for
+				for (ind = 0; ind < ny * nx; ind++)
+				{
+					outimageZ[nx * ny * (nz - 1) + ind] = 0;
+				}
+			}
+		}
+
+		inimage += volume;
+		outimageX += volume;
+		outimageY += volume;
+		outimageZ += volume;
+	}
+
+
+	//now the rest of the channels
+	if (nc > 1)
+	{
+		long ind;
+
+		for (c = 0; c < nc - 1; c++)
+		{
+			float * outimageC = outimageCfull + c * volume;
+			const float * inimage = inimagefull + c * volume;
+
+#pragma omp parallel for
+			for (ind = 0; ind < volume; ind++)
+			{
+				outimageC[ind] = -inimage[ind] + inimage[ind + volume];
+			}
+		}
+
+#pragma omp parallel for
+		for (ind = 0; ind < volume; ind++)
+		{
+			outimageCfull[(nc - 1) * volume + ind] = 0;
+		}
+	}
+
+	return 0;
+}
+int fdiff_direct_neumann_L21(const float *inimagefull, float *outimageL21normfull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, long nx, long ny, long nz, long nc)
+{
+	size_t volume = nx * ny * nz;
+	int z_dim = nz - 1;
+
+	const float * inimage = inimagefull;
+	float * outimageX = outimageXfull;
+	float * outimageY = outimageYfull;
+	float * outimageZ = outimageZfull;
+	float * outimageL21norm = outimageL21normfull;
+
+
+	int offset1 = (nz - 1) * nx * ny; //ind to beginning of last slice
+	int offset2 = offset1 + (ny - 1) * nx; //ind to beginning of last row
+
+	long c;
+	for (c = 0; c < nc; c++)
+	{
+#pragma omp parallel
+		{
+			long ind, k;
+			float pix0;
+			//run over all and then fix boundaries
+#pragma omp for
+			for (ind = 0; ind < nx * ny * (nz - 1); ind++)
+			{
+				pix0 = -inimage[ind];
+
+				outimageX[ind] = pix0 + inimage[ind + 1];
+				outimageY[ind] = pix0 + inimage[ind + nx];
+				outimageZ[ind] = pix0 + inimage[ind + nx * ny];
+			}
+
+#pragma omp for
+			for (ind = 0; ind < nx * (ny - 1); ind++)
+			{
+				pix0 = -inimage[ind + offset1];
+
+				outimageX[ind + offset1] = pix0 + inimage[ind + offset1 + 1];
+				outimageY[ind + offset1] = pix0 + inimage[ind + offset1 + nx];
+			}
+
+#pragma omp for
+			for (ind = 0; ind < nx - 1; ind++)
+			{
+				pix0 = -inimage[ind + offset2];
+
+				outimageX[ind + offset2] = pix0 + inimage[ind + offset2 + 1];
+			}
+
+			//boundaries
+#pragma omp for
+			for (k = 0; k < nz; k++)
+			{
+				for (int i = 0; i < nx; i++)
+				{
+					outimageY[(k * ny * nx) + (ny - 1) * nx + i] = 0;
+				}
+			}
+
+#pragma omp for
+			for (k = 0; k < nz; k++)
+			{
+				for (int j = 0; j < ny; j++)
+				{
+					outimageX[k * ny * nx + j * nx + nx - 1] = 0;
+				}
+			}
+
+			if (z_dim)
+			{
+#pragma omp for
+				for (ind = 0; ind < ny * nx; ind++)
+				{
+					outimageZ[nx * ny * (nz - 1) + ind] = 0;
+				}
+
+#pragma omp for
+				for (ind = 0; ind < volume; ind++)
+				{
+					outimageL21norm[ind] = outimageX[ind] * outimageX[ind] + outimageY[ind] * outimageY[ind] + outimageZ[ind] * outimageZ[ind];
+				}
+
+			}
+			else
+			{
+
+#pragma omp for
+				for (ind = 0; ind < volume; ind++)
+				{
+					outimageL21norm[ind] = outimageX[ind] * outimageX[ind] + outimageY[ind] * outimageY[ind];
+				}
+			}
+		}
+
+	
+		inimage += volume;
+		outimageX += volume;
+		outimageY += volume;
+		outimageZ += volume;
+		outimageL21norm += volume;
+	}
+
+
+	//now the rest of the channels
+	if (nc > 1)
+	{
+		long ind;
+
+		for (c = 0; c < nc - 1; c++)
+		{
+			float * outimageC = outimageCfull + c * volume;
+			float * outimageL21norm = outimageL21normfull + c * volume;
+			const float * inimage = inimagefull + c * volume;
+
+#pragma omp parallel for
+			for (ind = 0; ind < volume; ind++)
+			{
+				outimageC[ind] = -inimage[ind] + inimage[ind + volume];
+				outimageL21norm[ind] += outimageC[ind] * outimageC[ind];
+
+				//sqrt
+				outimageL21norm[ind] = (float)sqrt(outimageL21norm[ind]);
+			}
+		}
+
+#pragma omp parallel for
+		for (ind = 0; ind < volume; ind++)
+		{
+			outimageCfull[(nc - 1) * volume + ind] = 0;
+		}
+	}
+
+	return 0;
+}
+int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, long nx, long ny, long nz, long nc)
+{
+	size_t volume = nx * ny * nz;
+
+	const float * inimage = inimagefull;
+	float * outimageX = outimageXfull;
+	float * outimageY = outimageYfull;
+	float * outimageZ = outimageZfull;
+
+	int offset1 = (nz - 1) * nx * ny; //ind to beginning of last slice
+	int offset2 = offset1 + (ny - 1) * nx; //ind to beginning of last row
+
+
+	long c;
+	for (c = 0; c < nc; c++)
+	{
+
+#pragma omp parallel
+		{
+			long ind, k;
+			float pix0;
+			//run over all and then fix boundaries
+#pragma omp for
+			for (ind = 0; ind < nx * ny * (nz - 1); ind++)
+			{
+				pix0 = -inimage[ind];
+
+				outimageX[ind] = pix0 + inimage[ind + 1];
+				outimageY[ind] = pix0 + inimage[ind + nx];
+				outimageZ[ind] = pix0 + inimage[ind + nx * ny];
+			}
+
+#pragma omp for
+			for (ind = 0; ind < nx * (ny - 1); ind++)
+			{
+				pix0 = -inimage[ind + offset1];
+
+				outimageX[ind + offset1] = pix0 + inimage[ind + offset1 + 1];
+				outimageY[ind + offset1] = pix0 + inimage[ind + offset1 + nx];
+			}
+
+#pragma omp for
+			for (ind = 0; ind < nx - 1; ind++)
+			{
+				pix0 = -inimage[ind + offset2];
+
+				outimageX[ind + offset2] = pix0 + inimage[ind + offset2 + 1];
+			}
+
+			//boundaries
+#pragma omp for
+			for (k = 0; k < nz; k++)
+			{
+				for (int i = 0; i < nx; i++)
+				{
+					int ind1 = (k * ny * nx);
+					int ind2 = ind1 + (ny - 1) * nx;
+
+					outimageY[ind2 + i] = -inimage[ind2 + i] + inimage[ind1 + i];
+				}
+			}
+
+#pragma omp for
+			for (k = 0; k < nz; k++)
+			{
+				for (int j = 0; j < ny; j++)
+				{
+					int ind1 = k * ny * nx + j * nx;
+					int ind2 = ind1 + nx - 1;
+
+					outimageX[ind2] = -inimage[ind2] + inimage[ind1];
+				}
+			}
+
+
+			if (nz > 1)
+			{
+#pragma omp for
+				for (ind = 0; ind < ny * nx; ind++)
+				{
+					outimageZ[nx * ny * (nz - 1) + ind] = -inimage[nx * ny * (nz - 1) + ind] + inimage[ind];
+				}
+			}
+		}
+
+		inimage += volume;
+		outimageX += volume;
+		outimageY += volume;
+		outimageZ += volume;
+	}
+
+	//now the rest of the channels
+	if (nc > 1)
+	{
+		long ind;
+
+		for (c = 0; c < nc - 1; c++)
+		{
+			float * outimageC = outimageCfull + c * volume;
+			const float * inimage = inimagefull + c * volume;
+
+#pragma omp parallel for
+			for (ind = 0; ind < volume; ind++)
+			{
+				outimageC[ind] = -inimage[ind] + inimage[ind + volume];
+			}
+		}
+
+#pragma omp parallel for
+		for (ind = 0; ind < volume; ind++)
+		{
+			outimageCfull[(nc - 1) * volume + ind] = -inimagefull[(nc - 1) * volume + ind] + inimagefull[ind];
+		}
+	}
+
+	return 0;
+}
+int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const float *inimageYfull, const float *inimageZfull, const float *inimageCfull, long nx, long ny, long nz, long nc)
+{
+	//runs over full data in x, y, z. then corrects elements for bounday conditions and sums
+	size_t volume = nx * ny * nz;
+
+	//assumes nx and ny > 1
+	int z_dim = nz - 1;
+
+	float * outimage = outimagefull;
+	const float * inimageX = inimageXfull;
+	const float * inimageY = inimageYfull;
+	const float * inimageZ = inimageZfull;
+
+	float * tempX = (float*)malloc(volume * sizeof(float));
+	float * tempY = (float*)malloc(volume * sizeof(float));
+	float * tempZ;
+	
+	if(z_dim)
+	{
+		tempZ = (float*)malloc(volume * sizeof(float));
+	}
+
+	long c;
+	for (c = 0; c < nc; c++) //just calculating x, y and z in each channel here
+	{
+#pragma omp parallel
+		{
+			long ind, k;
+
+#pragma omp for
+			for (ind = 1; ind < nx * ny * nz; ind++)
+			{
+				tempX[ind] = -inimageX[ind] + inimageX[ind - 1];
+			}
+#pragma omp for
+			for (ind = nx; ind < nx * ny * nz; ind++)
+			{
+				tempY[ind] = -inimageY[ind] + inimageY[ind - nx];
+
+			}
+
+			//boundaries
+#pragma omp for
+			for (k = 0; k < nz; k++)
+			{
+				for (int j = 0; j < ny; j++)
+				{
+					tempX[k * ny * nx + j * nx] = -inimageX[k * ny * nx + j * nx];
+					tempX[k * ny * nx + j * nx + nx - 1] = inimageX[k * ny * nx + j * nx + nx - 2];
+				}
+			}
+#pragma omp for
+			for (k = 0; k < nz; k++)
+			{
+				for (int i = 0; i < nx; i++)
+				{
+					tempY[(k * ny * nx) + i] = -inimageY[(k * ny * nx) + i];
+					tempY[(k * ny * nx) + nx * (ny - 1) + i] = inimageY[(k * ny * nx) + nx * (ny - 2) + i];
+				}
+			}
+
+			if (z_dim)
+			{
+#pragma omp for
+				for (ind = nx * ny; ind < nx * ny * nz; ind++)
+				{
+					tempZ[ind] = -inimageZ[ind] + inimageZ[ind - nx * ny];
+				}				
+#pragma omp for
+				for (ind = 0; ind < ny * nx; ind++)
+				{
+					tempZ[ind] = -inimageZ[ind];
+					tempZ[nx * ny * (nz - 1) + ind] = inimageZ[nx * ny * (nz - 2) + ind];
+				}
+#pragma omp for
+				for (ind = 0; ind < volume; ind++)
+				{
+					outimage[ind] = tempX[ind] + tempY[ind] + tempZ[ind];
+				}
+
+			}
+			else
+			{
+#pragma omp for
+				for (ind = 0; ind < volume; ind++)
+				{
+					outimage[ind] = tempX[ind] + tempY[ind];
+				}
+			}
+
+		}
+
+		outimage += volume;
+		inimageX += volume;
+		inimageY += volume;
+		inimageZ += volume;
+	}
+	free(tempX);
+	free(tempY);
+
+	if(z_dim)
+		free(tempZ);
+
+
+	//	//now the rest of the channels
+	if (nc > 1)
+	{
+		long ind;
+
+		for (c = 1; c < nc - 1; c++)
+		{
+
+#pragma omp parallel for
+			for (ind = 0; ind < volume; ind++)
+			{
+				outimagefull[ind + c * volume] += -inimageCfull[ind + c * volume] + inimageCfull[ind + (c - 1) * volume];
+			}
+		}
+
+#pragma omp parallel for
+		for (ind = 0; ind < volume; ind++)
+		{
+			outimagefull[ind] += -inimageCfull[ind];
+			outimagefull[(nc - 1) * volume + ind] += inimageCfull[(nc - 2) * volume + ind];
+		}
+
+	}
+
+	return 0;
+}
+int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const float *inimageYfull, const float *inimageZfull, const float *inimageCfull, long nx, long ny, long nz, long nc)
+{
+	//runs over full data in x, y, z. then correctects elements for bounday conditions and sums
+	size_t volume = nx * ny * nz;
+
+	//assumes nx and ny > 1
+	int z_dim = nz - 1;
+
+	float * outimage = outimagefull;
+	const float * inimageX = inimageXfull;
+	const float * inimageY = inimageYfull;
+	const float * inimageZ = inimageZfull;
+
+	float * tempX = (float*)malloc(volume * sizeof(float));
+	float * tempY = (float*)malloc(volume * sizeof(float));
+	float * tempZ;
+
+	if (z_dim)
+	{
+		tempZ = (float*)malloc(volume * sizeof(float));
+	}
+
+	long c;
+	for (c = 0; c < nc; c++) //just calculating x, y and z in each channel here
+	{
+#pragma omp parallel
+		{
+			long ind, k;
+
+			//run over all and then fix boundaries
+#pragma omp for
+			for (ind = 1; ind < volume; ind++)
+			{
+				tempX[ind] = -inimageX[ind] + inimageX[ind - 1];
+			}
+#pragma omp for
+			for (ind = nx; ind < volume; ind++)
+			{
+				tempY[ind] = -inimageY[ind] + inimageY[ind - nx];
+			}
+
+			//boundaries
+#pragma omp for
+			for (k = 0; k < nz; k++)
+			{
+				for (int i = 0; i < nx; i++)
+				{
+					tempY[(k * ny * nx) + i] = -inimageY[(k * ny * nx) + i] + inimageY[(k * ny * nx) + nx * (ny - 1) + i];
+				}
+			}
+#pragma omp for
+			for (k = 0; k < nz; k++)
+			{
+				for (int j = 0; j < ny; j++)
+				{
+					tempX[k * ny * nx + j * nx] = -inimageX[k * ny * nx + j * nx] + inimageX[k * ny * nx + j * nx + nx - 1];
+				}
+			}
+
+			if (z_dim)
+			{
+
+#pragma omp for
+				for (ind = nx * ny; ind < nx * ny * nz; ind++)
+				{
+					tempZ[ind] = -inimageZ[ind] + inimageZ[ind - nx * ny];
+				}
+#pragma omp for
+				for (ind = 0; ind < ny * nx; ind++)
+				{
+					tempZ[ind] = -inimageZ[ind] + inimageZ[nx * ny * (nz - 1) + ind];
+				}
+
+#pragma omp for
+				for (ind = 0; ind < volume; ind++)
+				{
+					outimage[ind] = tempX[ind] + tempY[ind] + tempZ[ind];
+				}
+
+			}
+			else
+			{
+#pragma omp for
+				for (ind = 0; ind < volume; ind++)
+				{
+					outimage[ind] = tempX[ind] + tempY[ind];
+				}
+			}
+
+		}
+
+		outimage += volume;
+		inimageX += volume;
+		inimageY += volume;
+		inimageZ += volume;
+	}
+	free(tempX);
+	free(tempY);
+
+	if(z_dim)
+		free(tempZ);
+
+	//now the rest of the channels
+	if (nc > 1)
+	{
+		long ind;
+
+		for (c = 1; c < nc; c++)
+		{
+
+#pragma omp parallel for
+			for (ind = 0; ind < volume; ind++)
+			{
+				outimagefull[ind + c * volume] += -inimageCfull[ind + c * volume] + inimageCfull[ind + (c - 1) * volume];
+			}
+		}
+
+#pragma omp parallel for
+		for (ind = 0; ind < volume; ind++)
+		{
+			outimagefull[ind] += -inimageCfull[ind] + inimageCfull[(nc - 1) * volume + ind];
+		}
+	}
+
+	return 0;
+}
+
+
+DLL_EXPORT int fdiff4D(float *imagefull, float *gradCfull, float *gradZfull, float *gradYfull, float *gradXfull, long nc, long nz, long ny, long nx, int boundary, int direction)
+{
+	if (boundary)
+	{
+		if (direction)
+			fdiff_direct_periodic(imagefull, gradXfull, gradYfull, gradZfull, gradCfull, nx, ny, nz, nc);
+		else
+			fdiff_adjoint_periodic(imagefull, gradXfull, gradYfull, gradZfull, gradCfull, nx, ny, nz, nc);
+	}
+	else
+		{
+		if (direction)
+			fdiff_direct_neumann(imagefull, gradXfull, gradYfull, gradZfull, gradCfull, nx, ny, nz, nc);
+		else
+			fdiff_adjoint_neumann(imagefull, gradXfull, gradYfull, gradZfull, gradCfull, nx, ny, nz, nc);
+	}
+	
+	return 0;
+}
+DLL_EXPORT int fdiff3D(float *imagefull, float *gradZfull, float *gradYfull, float *gradXfull, long nz, long ny, long nx, int boundary, int direction)
+{
+	if (boundary)
+	{
+		if (direction)
+			fdiff_direct_periodic(imagefull, gradXfull, gradYfull, gradZfull, NULL, nx, ny, nz, 1);
+		else
+			fdiff_adjoint_periodic(imagefull, gradXfull, gradYfull, gradZfull, NULL, nx, ny, nz, 1);
+	}
+	else
+	{
+		if (direction)
+			fdiff_direct_neumann(imagefull, gradXfull, gradYfull, gradZfull, NULL, nx, ny, nz, 1);
+		else
+			fdiff_adjoint_neumann(imagefull, gradXfull, gradYfull, gradZfull, NULL, nx, ny, nz, 1);
+	}
+
+	return 0;
+}
+DLL_EXPORT int fdiff2D(float *imagefull, float *gradYfull, float *gradXfull, long ny, long nx, int boundary, int direction)
+{
+	if (boundary)
+	{
+		if (direction)
+			fdiff_direct_periodic(imagefull, gradXfull, gradYfull, NULL, NULL, nx, ny, 1, 1);
+		else
+			fdiff_adjoint_periodic(imagefull, gradXfull, gradYfull, NULL, NULL, nx, ny, 1, 1);
+	}
+	else
+	{
+		if (direction)
+			fdiff_direct_neumann(imagefull, gradXfull, gradYfull, NULL, NULL, nx, ny, 1, 1);
+		else
+			fdiff_adjoint_neumann(imagefull, gradXfull, gradYfull, NULL, NULL, nx, ny, 1, 1);
+	}
+
+	return 0;
+}
diff --git a/src/Core/axpby.c b/src/Core/axpby.c
new file mode 100755
index 0000000..c4d162d
--- /dev/null
+++ b/src/Core/axpby.c
@@ -0,0 +1,109 @@
+#include "axpby.h"
+
+
+DLL_EXPORT int padd(float * x, float * y, float * out, long size){
+    long i = 0;
+#pragma omp parallel for
+    for (i=0; i < size; i++)
+    {
+        *(out + i ) = *(x + i) + *(y+i); 
+    }
+    return 0;
+}
+
+DLL_EXPORT int psubtract(float * x, float * y, float * out, long size){
+    long i = 0;
+#pragma omp parallel
+{
+//#pragma omp single
+//{
+//		printf("current number of threads %d\n", omp_get_num_threads());
+//}
+#pragma omp for
+    for (i=0; i < size; i++)
+    {
+        *(out + i ) = *(x + i) - *(y+i); 
+    }
+}
+    return 0;
+    
+}
+
+DLL_EXPORT int pmultiply(float * x, float * y, float * out, long size){
+    long i = 0;
+#pragma omp parallel for
+    for (i=0; i < size; i++)
+    {
+        *(out + i ) = *(x + i) * *(y+i); 
+    }
+    return 0;
+}
+
+DLL_EXPORT int pdivide(float * x, float * y, float * out, long size, float default_value)
+{
+    long i = 0;
+#pragma omp parallel for
+    for (i=0; i < size; i++)
+    {
+        *(out + i ) = *(y+i) ? *(x + i) / *(y+i) : default_value;
+    }
+    return 0;
+}
+DLL_EXPORT int ppower(float * x, float * y, float * out, long size){
+    long i = 0;
+#pragma omp parallel for
+    for (i=0; i < size; i++)
+    {
+        *(out + i ) = (float)pow(*(x + i) , *(y+i)) ; 
+    }
+    return 0;
+}
+
+DLL_EXPORT int pminimum(float * x, float * y, float * out, long size){
+    long i = 0;
+#pragma omp parallel for
+    for (i=0; i < size; i++)
+    {
+        *(out + i ) = *(y+i) > (*x+i) ? *(x + i) :  *(y+i); 
+    }
+    return 0;
+}
+
+DLL_EXPORT int pmaximum(float * x, float * y, float * out, long size) {
+	long i = 0;
+#pragma omp parallel for
+	for (i = 0; i < size; i++)
+	{
+		*(out + i) = *(y + i) < (*x + i) ? *(x + i) : *(y + i);
+	}
+	return 0;
+}
+
+
+DLL_EXPORT int saxpby(float * x, float * y, float * out, float a, float b, long size){
+    long i = 0;
+#pragma omp parallel
+{
+#pragma omp for
+    for (i=0; i < size; i++)
+    {
+        *(out + i ) = a * ( *(x + i) ) + b * ( *(y + i) ); 
+    }
+}
+    return 0;
+    
+}
+
+DLL_EXPORT int daxpby(double * x, double * y, double * out, double a, double b, long size) {
+	long i = 0;
+#pragma omp parallel
+	{
+#pragma omp for
+		for (i = 0; i < size; i++)
+		{
+			*(out + i) = a * (*(x + i)) + b * (*(y + i));
+		}
+	}
+	return 0;
+
+}
\ No newline at end of file
diff --git a/src/Core/include/FiniteDifferenceLibrary.h b/src/Core/include/FiniteDifferenceLibrary.h
new file mode 100644
index 0000000..6e426af
--- /dev/null
+++ b/src/Core/include/FiniteDifferenceLibrary.h
@@ -0,0 +1,6 @@
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "omp.h"
+//#include "ipp.h"
+#include "dll_export.h"
\ No newline at end of file
diff --git a/src/Core/include/axpby.h b/src/Core/include/axpby.h
new file mode 100644
index 0000000..2849547
--- /dev/null
+++ b/src/Core/include/axpby.h
@@ -0,0 +1,17 @@
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "omp.h"
+#include "dll_export.h"
+
+
+DLL_EXPORT int padd(float * x, float * y, float * out, long size);
+DLL_EXPORT int psubtract(float * x, float * y, float * out, long size);
+DLL_EXPORT int pmultiply(float * x, float * y, float * out, long size);
+DLL_EXPORT int pdivide(float * x, float * y, float * out, long size, float default_value);
+DLL_EXPORT int ppower(float * x, float * y, float * out, long size);
+DLL_EXPORT int pminimum(float * x, float * y, float * out, long size);
+DLL_EXPORT int pmaximum(float * x, float * y, float * out, long size);
+
+DLL_EXPORT int saxpby(float * x, float * y, float * out, float a, float b, long size);
+DLL_EXPORT int daxpby(double * x, double * y, double * out, double a, double b, long size);
diff --git a/src/Core/include/dll_export.h b/src/Core/include/dll_export.h
new file mode 100755
index 0000000..6b816c3
--- /dev/null
+++ b/src/Core/include/dll_export.h
@@ -0,0 +1,20 @@
+#pragma once
+#ifndef DLLEXPORT_H
+#define DLLEXPORT_H
+
+#if defined(_WIN32) || defined(__WIN32__)
+#if defined(dll_EXPORTS)  // add by CMake 
+#define  DLL_EXPORT __declspec(dllexport)
+#define EXPIMP_TEMPLATE
+#else
+#define  DLL_EXPORT __declspec(dllimport)
+#define EXPIMP_TEMPLATE extern
+#endif 
+#elif defined(linux) || defined(__linux) || defined(__APPLE__)
+#define DLL_EXPORT
+#ifndef __cdecl
+#define __cdecl
+#endif
+#endif
+
+#endif
-- 
cgit v1.2.3