Dataprocessor opt (#216)

* initial implementation of Algorithm and Gradient Descent * added example on gradient descent * refined definition of Algorithm and Gradient Descent * allow memory efficient code in dataprocessor * revert to master version * added test for DataProcessor
author: Edoardo Pasca <edo.paskino@gmail.com> 2019-03-05 16:14:08 +0000
committer: GitHub <noreply@github.com> 2019-03-05 16:14:08 +0000
commit: 03ad730071bb772c3cc9c65ebb1b8f5c0136e391 (patch)
tree: dbdbcf03fdce9135b293a99be261aefbed9aaf8b /Wrappers
parent: 8b218f9fd8ef283c5d1dde0e7268301de64f47d4 (diff)
download: framework-03ad730071bb772c3cc9c65ebb1b8f5c0136e391.tar.gz
framework-03ad730071bb772c3cc9c65ebb1b8f5c0136e391.tar.bz2
framework-03ad730071bb772c3cc9c65ebb1b8f5c0136e391.tar.xz
framework-03ad730071bb772c3cc9c65ebb1b8f5c0136e391.zip
4 files changed, 446 insertions, 16 deletions
diff --git a/Wrappers/Python/ccpi/framework.py b/Wrappers/Python/ccpi/framework.py
index 69a17dc..24f4ca6 100644
--- a/Wrappers/Python/ccpi/framework.py
+++ b/Wrappers/Python/ccpi/framework.py
@@ -1009,9 +1009,9 @@ class DataProcessor(object):
         '''
         raise NotImplementedError('Implement basic checks for input DataContainer')
         
-    def get_output(self):
+    def get_output(self, out=None):
         for k,v in self.__dict__.items():
-            if v is None:
+            if v is None and k != 'output':
                 raise ValueError('Key {0} is None'.format(k))
         shouldRun = False
         if self.runTime == -1:
@@ -1022,10 +1022,18 @@ class DataProcessor(object):
         # CHECK this
         if self.store_output and shouldRun:
             self.runTime = datetime.now()
-            self.output = self.process()
-            return self.output
+            try:
+                self.output = self.process(out=out)
+                return self.output
+            except TypeError as te:
+                self.output = self.process()
+                return self.output
         self.runTime = datetime.now()
-        return self.process()
+        try:
+            return self.process(out=out)
+        except TypeError as te:
+            return self.process()
+            
     
     def set_input_processor(self, processor):
         if issubclass(type(processor), DataProcessor):
@@ -1046,7 +1054,7 @@ class DataProcessor(object):
             dsi = self.input
         return dsi
         
-    def process(self):
+    def process(self, out=None):
         raise NotImplementedError('process must be implemented')
         
     
@@ -1093,16 +1101,57 @@ class AX(DataProcessor):
     def check_input(self, dataset):
         return True
         
-    def process(self):
+    def process(self, out=None):
         
         dsi = self.get_input()
         a = self.scalar
+        if out is None:
+            y = DataContainer( a * dsi.as_array() , True, 
+                        dimension_labels=dsi.dimension_labels )
+            #self.setParameter(output_dataset=y)
+            return y
+        else:
+            out.fill(a * dsi.as_array())
+    
+
+###### Example of DataProcessors
+
+class CastDataContainer(DataProcessor):
+    '''Example DataProcessor
+    Cast a DataContainer array to a different type.
+
+    y := a*x
+    where:
+
+    a is a scalar
+
+    x a DataContainer.
+    '''
+    
+    def __init__(self, dtype=None):
+        kwargs = {'dtype':dtype, 
+                  'input':None, 
+                  }
         
-        y = DataContainer( a * dsi.as_array() , True, 
-                    dimension_labels=dsi.dimension_labels )
-        #self.setParameter(output_dataset=y)
-        return y
+        #DataProcessor.__init__(self, **kwargs)
+        super(CastDataContainer, self).__init__(**kwargs)
     
+    def check_input(self, dataset):
+        return True
+        
+    def process(self, out=None):
+        
+        dsi = self.get_input()
+        dtype = self.dtype
+        if out is None:
+            y = numpy.asarray(dsi.as_array(), dtype=dtype)
+            
+            return type(dsi)(numpy.asarray(dsi.as_array(), dtype=dtype),
+                                dimension_labels=dsi.dimension_labels )
+        else:
+            out.fill(numpy.asarray(dsi.as_array(), dtype=dtype))
+    
+        
         
     
     
@@ -1126,7 +1175,7 @@ class PixelByPixelDataProcessor(DataProcessor):
     def check_input(self, dataset):
         return True
     
-    def process(self):
+    def process(self, out=None):
         
         pyfunc = self.pyfunc
         dsi = self.get_input()
@@ -1185,12 +1234,22 @@ if __name__ == '__main__':
     #ax.apply()
     print ("ax  in {0} out {1}".format(c.as_array().flatten(),
            ax.get_output().as_array().flatten()))
+    
+    cast = CastDataContainer(dtype=numpy.float32)
+    cast.set_input(c)
+    out = cast.get_output()
+    out *= 0 
     axm = AX()
     axm.scalar = 0.5
-    axm.set_input(c)
+    axm.set_input_processor(cast)
+    axm.get_output(out)
     #axm.apply()
     print ("axm in {0} out {1}".format(c.as_array(), axm.get_output().as_array()))
     
+    # check out in DataSetProcessor
+   #a = numpy.asarray([i for i in range( size )])
+    
+        
     # create a PixelByPixelDataProcessor
     
     #define a python function which will take only one input (the pixel value)
diff --git a/Wrappers/Python/ccpi/processors.py b/Wrappers/Python/ccpi/processors.py
index 6a9057a..3a3671a 100755
--- a/Wrappers/Python/ccpi/processors.py
+++ b/Wrappers/Python/ccpi/processors.py
@@ -102,7 +102,7 @@ class Normalizer(DataProcessor):
         rel_norm_error = (b + a) / (b * a) * (df + dd)
         return rel_norm_error
         
-    def process(self):
+    def process(self, out=None):
         
         projections = self.get_input()
         dark = self.dark_field
@@ -400,7 +400,7 @@ class CenterOfRotationFinder(DataProcessor):
         mask[:,centercol-1:centercol+2] = numpy.zeros((nrow, 3), dtype='float32')
         return mask
     
-    def process(self):
+    def process(self, out=None):
         
         projections = self.get_input()
         
@@ -442,7 +442,7 @@ class AcquisitionDataPadder(DataProcessor):
             raise ValueError("Expected input dimensions is 2 or 3, got {0}"\
                              .format(dataset.number_of_dimensions))
 
-    def process(self):
+    def process(self, out=None):
         projections = self.get_input()
         w = projections.get_dimension_size('horizontal')
         delta = w - 2 * self.center_of_rotation
diff --git a/Wrappers/Python/test/test_DataProcessor.py b/Wrappers/Python/test/test_DataProcessor.py
new file mode 100755
index 0000000..1c1de3a
--- /dev/null
+++ b/Wrappers/Python/test/test_DataProcessor.py
@@ -0,0 +1,76 @@
+import sys
+import unittest
+import numpy
+from ccpi.framework import DataProcessor
+from ccpi.framework import DataContainer
+from ccpi.framework import ImageData
+from ccpi.framework import AcquisitionData
+from ccpi.framework import ImageGeometry
+from ccpi.framework import AcquisitionGeometry
+from timeit import default_timer as timer
+
+from ccpi.framework import AX, CastDataContainer, PixelByPixelDataProcessor
+
+class TestDataProcessor(unittest.TestCase):
+
+    def test_DataProcessorChaining(self):
+        shape = (2,3,4,5)
+        size = shape[0]
+        for i in range(1, len(shape)):
+            size = size * shape[i]
+        #print("a refcount " , sys.getrefcount(a))
+        a = numpy.asarray([i for i in range( size )])
+        a = numpy.reshape(a, shape)
+        ds = DataContainer(a, False, ['X', 'Y','Z' ,'W'])
+        c = ds.subset(['Z','W','X'])
+        arr = c.as_array()
+        #[ 0 60  1 61  2 62  3 63  4 64  5 65  6 66  7 67  8 68  9 69 10 70 11 71
+        # 12 72 13 73 14 74 15 75 16 76 17 77 18 78 19 79]
+    
+        ax = AX()
+        ax.scalar = 2
+        ax.set_input(c)
+        #ax.apply()
+        print ("ax  in {0} out {1}".format(c.as_array().flatten(),
+               ax.get_output().as_array().flatten()))
+        numpy.testing.assert_array_equal(ax.get_output().as_array(), arr*2)
+        
+        cast = CastDataContainer(dtype=numpy.float32)
+        cast.set_input(c)
+        out = cast.get_output()
+        self.assertTrue(out.as_array().dtype == numpy.float32)
+        out *= 0 
+        axm = AX()
+        axm.scalar = 0.5
+        axm.set_input(c)
+        axm.get_output(out)
+        numpy.testing.assert_array_equal(out.as_array(), arr*0.5)
+        
+        # check out in DataSetProcessor
+        #a = numpy.asarray([i for i in range( size )])
+           
+        # create a PixelByPixelDataProcessor
+        
+        #define a python function which will take only one input (the pixel value)
+        pyfunc = lambda x: -x if x > 20 else x
+        clip = PixelByPixelDataProcessor()
+        clip.pyfunc = pyfunc 
+        clip.set_input(c)    
+        #clip.apply()
+        v = clip.get_output().as_array()
+        
+        self.assertTrue(v.max() == 19)
+        self.assertTrue(v.min() == -79)
+        
+        print ("clip in {0} out {1}".format(c.as_array(), clip.get_output().as_array()))
+        
+        #dsp = DataProcessor()
+        #dsp.set_input(ds)
+        #dsp.input = a
+        # pipeline
+    
+        chain = AX()
+        chain.scalar = 0.5
+        chain.set_input_processor(ax)
+        print ("chain in {0} out {1}".format(ax.get_output().as_array(), chain.get_output().as_array()))
+        numpy.testing.assert_array_equal(chain.get_output().as_array(), arr)
+\ No newline at end of file
diff --git a/Wrappers/Python/wip/demo_gradient_descent.py b/Wrappers/Python/wip/demo_gradient_descent.py
new file mode 100755
index 0000000..4d6647e
--- /dev/null
+++ b/Wrappers/Python/wip/demo_gradient_descent.py
@@ -0,0 +1,295 @@
+
+from ccpi.framework import ImageData, ImageGeometry, AcquisitionGeometry, DataContainer
+from ccpi.optimisation.algs import FISTA, FBPD, CGLS
+from ccpi.optimisation.funcs import Norm2sq, ZeroFun, Norm1, TV2D, Norm2
+
+from ccpi.optimisation.ops import LinearOperatorMatrix, TomoIdentity
+from ccpi.optimisation.ops import Identity
+from ccpi.optimisation.ops import FiniteDiff2D
+
+# Requires CVXPY, see http://www.cvxpy.org/
+# CVXPY can be installed in anaconda using
+# conda install -c cvxgrp cvxpy libgcc
+
+# Whether to use or omit CVXPY
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+class Algorithm(object):
+    def __init__(self, *args, **kwargs):
+        pass
+    def set_up(self, *args, **kwargs):
+        raise NotImplementedError()
+    def update(self):
+        raise NotImplementedError()
+    
+    def should_stop(self):
+        raise NotImplementedError()
+    
+    def __iter__(self):
+        return self
+    
+    def __next__(self):
+        if self.should_stop():
+            raise StopIteration()
+        else:
+            self.update()
+        
+class GradientDescent(Algorithm):
+    x = None
+    rate = 0
+    objective_function = None
+    regulariser = None
+    iteration = 0
+    stop_cryterion = 'max_iter'
+    __max_iteration = 0
+    __loss = []
+    def __init__(self, **kwargs):
+        args = ['x_init', 'objective_function', 'rate']
+        present = True
+        for k,v in kwargs.items():
+            if k in args:
+                args.pop(args.index(k))
+        if len(args) == 0:
+            return self.set_up(x_init=kwargs['x_init'],
+                               objective_function=kwargs['objective_function'],
+                               rate=kwargs['rate'])
+    
+    def should_stop(self):
+        return self.iteration >= self.max_iteration
+    
+    def set_up(self, x_init, objective_function, rate):
+        self.x = x_init.copy()
+        self.x_update = x_init.copy()
+        self.objective_function = objective_function
+        self.rate = rate
+        self.__loss.append(objective_function(x_init))
+        
+    def update(self):
+        
+        self.objective_function.gradient(self.x, out=self.x_update)
+        self.x_update *= -self.rate
+        self.x += self.x_update
+        self.__loss.append(self.objective_function(self.x))
+        self.iteration += 1
+        
+    def get_output(self):
+        return self.x
+    def get_current_loss(self):
+        return self.__loss[-1]
+    @property
+    def loss(self):
+        return self.__loss
+    @property
+    def max_iteration(self):
+        return self.__max_iteration
+    @max_iteration.setter
+    def max_iteration(self, value):
+        assert isinstance(value, int)
+        self.__max_iteration = value
+        
+    
+
+
+
+# Problem data.
+m = 30
+n = 20
+np.random.seed(1)
+Amat = np.random.randn(m, n)
+A = LinearOperatorMatrix(Amat)
+bmat = np.random.randn(m)
+bmat.shape = (bmat.shape[0],1)
+
+# A = Identity()
+# Change n to equal to m.
+
+b = DataContainer(bmat)
+
+# Regularization parameter
+lam = 10
+opt = {'memopt':True}
+# Create object instances with the test data A and b.
+f = Norm2sq(A,b,c=0.5, memopt=True)
+g0 = ZeroFun()
+
+# Initial guess
+x_init = DataContainer(np.zeros((n,1)))
+
+f.grad(x_init)
+
+# Run FISTA for least squares plus zero function.
+x_fista0, it0, timing0, criter0 = FISTA(x_init, f, g0 , opt=opt)
+
+# Print solution and final objective/criterion value for comparison
+print("FISTA least squares plus zero function solution and objective value:")
+print(x_fista0.array)
+print(criter0[-1])
+
+gd = GradientDescent(x_init=x_init, objective_function=f, rate=0.001)
+gd.max_iteration = 5000
+
+for i,el in enumerate(gd):
+    if i%100 == 0:
+        print ("\rIteration {} Loss: {}".format(gd.iteration, 
+               gd.get_current_loss()))
+
+
+#%%
+
+
+#
+#if use_cvxpy:
+#    # Compare to CVXPY
+#    
+#    # Construct the problem.
+#    x0 = Variable(n)
+#    objective0 = Minimize(0.5*sum_squares(Amat*x0 - bmat.T[0]) )
+#    prob0 = Problem(objective0)
+#    
+#    # The optimal objective is returned by prob.solve().
+#    result0 = prob0.solve(verbose=False,solver=SCS,eps=1e-9)
+#    
+#    # The optimal solution for x is stored in x.value and optimal objective value 
+#    # is in result as well as in objective.value
+#    print("CVXPY least squares plus zero function solution and objective value:")
+#    print(x0.value)
+#    print(objective0.value)
+#
+## Plot criterion curve to see FISTA converge to same value as CVX.
+#iternum = np.arange(1,1001)
+#plt.figure()
+#plt.loglog(iternum[[0,-1]],[objective0.value, objective0.value], label='CVX LS')
+#plt.loglog(iternum,criter0,label='FISTA LS')
+#plt.legend()
+#plt.show()
+#
+## Create 1-norm object instance
+#g1 = Norm1(lam)
+#
+#g1(x_init)
+#x_rand = DataContainer(np.reshape(np.random.rand(n),(n,1)))
+#x_rand2 = DataContainer(np.reshape(np.random.rand(n-1),(n-1,1)))
+#v = g1.prox(x_rand,0.02)
+##vv = g1.prox(x_rand2,0.02)
+#vv = v.copy() 
+#vv *= 0
+#print (">>>>>>>>>>vv" , vv.as_array())
+#vv.fill(v)
+#print (">>>>>>>>>>fill" , vv.as_array())
+#g1.proximal(x_rand, 0.02, out=vv)
+#print (">>>>>>>>>>v" , v.as_array())
+#print (">>>>>>>>>>gradient" , vv.as_array())
+#
+#print (">>>>>>>>>>" , (v-vv).as_array())
+#import sys
+##sys.exit(0)
+## Combine with least squares and solve using generic FISTA implementation
+#x_fista1, it1, timing1, criter1 = FISTA(x_init, f, g1,opt=opt)
+#
+## Print for comparison
+#print("FISTA least squares plus 1-norm solution and objective value:")
+#print(x_fista1)
+#print(criter1[-1])
+#
+#if use_cvxpy:
+#    # Compare to CVXPY
+#    
+#    # Construct the problem.
+#    x1 = Variable(n)
+#    objective1 = Minimize(0.5*sum_squares(Amat*x1 - bmat.T[0]) + lam*norm(x1,1) )
+#    prob1 = Problem(objective1)
+#    
+#    # The optimal objective is returned by prob.solve().
+#    result1 = prob1.solve(verbose=False,solver=SCS,eps=1e-9)
+#    
+#    # The optimal solution for x is stored in x.value and optimal objective value 
+#    # is in result as well as in objective.value
+#    print("CVXPY least squares plus 1-norm solution and objective value:")
+#    print(x1.value)
+#    print(objective1.value)
+#    
+## Now try another algorithm FBPD for same problem:
+#x_fbpd1, itfbpd1, timingfbpd1, criterfbpd1 = FBPD(x_init,Identity(), None, f, g1)
+#print(x_fbpd1)
+#print(criterfbpd1[-1])
+#
+## Plot criterion curve to see both FISTA and FBPD converge to same value.
+## Note that FISTA is very efficient for 1-norm minimization so it beats
+## FBPD in this test by a lot. But FBPD can handle a larger class of problems 
+## than FISTA can.
+#plt.figure()
+#plt.loglog(iternum[[0,-1]],[objective1.value, objective1.value], label='CVX LS+1')
+#plt.loglog(iternum,criter1,label='FISTA LS+1')
+#plt.legend()
+#plt.show()
+#
+#plt.figure()
+#plt.loglog(iternum[[0,-1]],[objective1.value, objective1.value], label='CVX LS+1')
+#plt.loglog(iternum,criter1,label='FISTA LS+1')
+#plt.loglog(iternum,criterfbpd1,label='FBPD LS+1')
+#plt.legend()
+#plt.show()
+
+# Now try 1-norm and TV denoising with FBPD, first 1-norm.
+
+# Set up phantom size NxN by creating ImageGeometry, initialising the 
+# ImageData object with this geometry and empty array and finally put some
+# data into its array, and display as image.
+N = 64
+ig = ImageGeometry(voxel_num_x=N,voxel_num_y=N)
+Phantom = ImageData(geometry=ig)
+
+x = Phantom.as_array()
+x[round(N/4):round(3*N/4),round(N/4):round(3*N/4)] = 0.5
+x[round(N/8):round(7*N/8),round(3*N/8):round(5*N/8)] = 1
+
+plt.imshow(x)
+plt.title('Phantom image')
+plt.show()
+
+# Identity operator for denoising
+I = TomoIdentity(ig)
+
+# Data and add noise
+y = I.direct(Phantom)
+y.array = y.array + 0.1*np.random.randn(N, N)
+
+plt.imshow(y.array)
+plt.title('Noisy image')
+plt.show()
+
+
+###################
+# Data fidelity term
+f_denoise = Norm2sq(I,y,c=0.5,memopt=True)
+
+# 1-norm regulariser
+lam1_denoise = 1.0
+g1_denoise = Norm1(lam1_denoise)
+
+# Initial guess
+x_init_denoise = ImageData(np.zeros((N,N)))
+
+# Combine with least squares and solve using generic FISTA implementation
+x_fista1_denoise, it1_denoise, timing1_denoise, criter1_denoise = \
+   FISTA(x_init_denoise, f_denoise, g1_denoise, opt=opt)
+
+print(x_fista1_denoise)
+print(criter1_denoise[-1])
+
+f_2 = 
+gd = GradientDescent(x_init=x_init_denoise, 
+                     objective_function=f, rate=0.001)
+gd.max_iteration = 5000
+
+for i,el in enumerate(gd):
+    if i%100 == 0:
+        print ("\rIteration {} Loss: {}".format(gd.iteration, 
+               gd.get_current_loss()))
+
+plt.imshow(gd.get_output().as_array())
+plt.title('GD image')
+plt.show()
+
author	Edoardo Pasca <edo.paskino@gmail.com>	2019-03-05 16:14:08 +0000
committer	GitHub <noreply@github.com>	2019-03-05 16:14:08 +0000
commit	03ad730071bb772c3cc9c65ebb1b8f5c0136e391 (patch)
tree	dbdbcf03fdce9135b293a99be261aefbed9aaf8b /Wrappers
parent	8b218f9fd8ef283c5d1dde0e7268301de64f47d4 (diff)
download	framework-03ad730071bb772c3cc9c65ebb1b8f5c0136e391.tar.gz framework-03ad730071bb772c3cc9c65ebb1b8f5c0136e391.tar.bz2 framework-03ad730071bb772c3cc9c65ebb1b8f5c0136e391.tar.xz framework-03ad730071bb772c3cc9c65ebb1b8f5c0136e391.zip