From c20e4b3fe683fa27ab3dcba03a3f5cdfb9b8db0f Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <wjp@usecode.org>
Date: Fri, 22 Jan 2021 11:48:32 +0100
Subject: Fix supersampling version of cone_bp and add test

---
 cuda/3d/cone_bp.cu               |  8 ++++----
 tests/python/test_rec_scaling.py | 27 ++++++++++++++++++++++++---
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu
index df1e19f..3525eb4 100644
--- a/cuda/3d/cone_bp.cu
+++ b/cuda/3d/cone_bp.cu
@@ -213,15 +213,15 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start
 			float fZs = fZ;
 			for (int iSubZ = 0; iSubZ < iRaysPerVoxelDim; ++iSubZ) {
 
-				const float fUNum = fCu.w + fX * fCu.x + fY * fCu.y + fZ * fCu.z;
-				const float fVNum = fCv.w + fX * fCv.x + fY * fCv.y + fZ * fCv.z;
-				const float fDen  = fCd.w + fX * fCd.x + fY * fCd.y + fZ * fCd.z;
+				const float fUNum = fCu.w + fXs * fCu.x + fYs * fCu.y + fZs * fCu.z;
+				const float fVNum = fCv.w + fXs * fCv.x + fYs * fCv.y + fZs * fCv.z;
+				const float fDen  = fCd.w + fXs * fCd.x + fYs * fCd.y + fZs * fCd.z;
 
 				const float fr = __fdividef(1.0f, fDen);
 				const float fU = fUNum * fr;
 				const float fV = fVNum * fr;
 
-				fVal += tex3D(gT_coneProjTexture, fU, fV, fAngle) * fr;
+				fVal += tex3D(gT_coneProjTexture, fU, fAngle, fV) * fr * fr;
 
 				fZs += fSubStep;
 			}
diff --git a/tests/python/test_rec_scaling.py b/tests/python/test_rec_scaling.py
index 621fd8a..ee27efc 100644
--- a/tests/python/test_rec_scaling.py
+++ b/tests/python/test_rec_scaling.py
@@ -76,7 +76,7 @@ def ProjectionGeometries(type):
 
 
 class TestRecScale(unittest.TestCase):
-  def single_test(self, geom_type, proj_type, alg, iters):
+  def single_test(self, geom_type, proj_type, alg, iters, vss, dss):
     if alg == 'FBP' and 'fanflat' in geom_type:
       self.skipTest('CPU FBP is parallel-beam only')
     is3D = (geom_type in ['parallel3d', 'cone'])
@@ -88,7 +88,12 @@ class TestRecScale(unittest.TestCase):
         else:
           vol = np.zeros((64,64,64),dtype=np.float32)
           vol[25:35,25:35,25:35] = 1
-        proj_id = astra.create_projector(proj_type, pg, vg)
+        options = {}
+        if vss > 1:
+          options["VoxelSuperSampling"] = vss
+        if dss > 1:
+          options["DetectorSuperSampling"] = vss
+        proj_id = astra.create_projector(proj_type, pg, vg, options=options)
         if not is3D:
           sino_id, sinogram = astra.create_sino(vol, proj_id)
         else:
@@ -185,6 +190,14 @@ __algs_cone = {
 }
 
 
+__combinations_ss = {
+  'parallel': [ { 'projector': 'cuda', 'alg': 'SIRT_CUDA', 'iters': 50 } ],
+  'fanflat': [ { 'projector': 'cuda', 'alg': 'SIRT_CUDA', 'iters': 50 } ],
+  'parallel3d': [ { 'projector': 'cuda3d', 'alg': 'SIRT3D_CUDA', 'iters': 200 } ],
+  'cone': [ { 'projector': 'cuda3d', 'alg': 'SIRT3D_CUDA', 'iters': 200 } ]
+}
+
+
 
 for k, l in __combinations.items():
   for v in l:
@@ -199,7 +212,7 @@ for k, l in __combinations.items():
       A = __algs
     for a, i in A.items():
       def f(k, v, a, i):
-        return lambda self: self.single_test(k, v, a, i)
+        return lambda self: self.single_test(k, v, a, i, 1, 1)
       setattr(TestRecScale, 'test_' + a + '_' + k + '_' + v, f(k,v,a,i))
 
 for k, l in __combinations_adjoint.items():
@@ -208,6 +221,14 @@ for k, l in __combinations_adjoint.items():
       return lambda self: self.single_test_adjoint3D(k, v)
     setattr(TestRecScale, 'test_adjoint_' + k + '_' + v, g(k,v))
 
+for k, l in __combinations_ss.items():
+  for A in l:
+    for vss in [1, 2]:
+      for dss in [1, 2]:
+        def h(k, v, a, i, vss, dss):
+          return lambda self: self.single_test(k, v, a, i, vss, dss)
+        setattr(TestRecScale, 'test_ss_' + a + '_' + k + '_' + v + '_' + str(vss) + '_' + str(dss), h(k, A['projector'], A['alg'], A['iters'], vss, dss))
+
 if __name__ == '__main__':
   unittest.main()
 
-- 
cgit v1.2.3