summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cuda/2d/algo.cu12
-rw-r--r--cuda/2d/fan_bp.cu62
-rw-r--r--cuda/2d/par_bp.cu20
-rw-r--r--cuda/3d/cone_bp.cu6
-rw-r--r--src/CudaReconstructionAlgorithm2D.cpp3
-rw-r--r--tests/python/test_line2d.py59
6 files changed, 132 insertions, 30 deletions
diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu
index b4c2864..11422ff 100644
--- a/cuda/2d/algo.cu
+++ b/cuda/2d/algo.cu
@@ -258,10 +258,10 @@ bool ReconAlgo::copyDataToGPU(const float* pfSinogram, unsigned int iSinogramPit
if (!ok)
return false;
- // rescale sinogram to adjust for pixel size
- processSino<opMul>(D_sinoData, fSinogramScale,
- //1.0f/(fPixelSize*fPixelSize),
- sinoPitch, dims);
+ // rescale sinogram
+ if (fSinogramScale != 1.0f)
+ processSino<opMul>(D_sinoData, fSinogramScale,
+ sinoPitch, dims);
ok = copyVolumeToDevice(pfReconstruction, iReconstructionPitch,
dims,
@@ -331,11 +331,11 @@ bool ReconAlgo::callBP(float* D_volumeData, unsigned int volumePitch,
if (parProjs) {
assert(!fanProjs);
return BP(D_volumeData, volumePitch, D_projData, projPitch,
- dims, parProjs, outputScale);
+ dims, parProjs, fOutputScale * outputScale);
} else {
assert(fanProjs);
return FanBP(D_volumeData, volumePitch, D_projData, projPitch,
- dims, fanProjs, outputScale);
+ dims, fanProjs, fOutputScale * outputScale);
}
}
diff --git a/cuda/2d/fan_bp.cu b/cuda/2d/fan_bp.cu
index dac3ac2..2072cd4 100644
--- a/cuda/2d/fan_bp.cu
+++ b/cuda/2d/fan_bp.cu
@@ -48,7 +48,7 @@ const unsigned int g_anglesPerBlock = 16;
const unsigned int g_blockSliceSize = 32;
const unsigned int g_blockSlices = 16;
-const unsigned int g_MaxAngles = 2560;
+const unsigned int g_MaxAngles = 2240;
__constant__ float gC_SrcX[g_MaxAngles];
__constant__ float gC_SrcY[g_MaxAngles];
@@ -56,6 +56,7 @@ __constant__ float gC_DetSX[g_MaxAngles];
__constant__ float gC_DetSY[g_MaxAngles];
__constant__ float gC_DetUX[g_MaxAngles];
__constant__ float gC_DetUY[g_MaxAngles];
+__constant__ float gC_Scale[g_MaxAngles];
static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int width, unsigned int height, cudaTextureAddressMode mode = cudaAddressModeBorder)
@@ -96,8 +97,6 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s
float fVal = 0.0f;
float fA = startAngle + 0.5f;
- // TODO: Distance correction?
-
for (int angle = startAngle; angle < endAngle; ++angle)
{
const float fSrcX = gC_SrcX[angle];
@@ -106,15 +105,24 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s
const float fDetSY = gC_DetSY[angle];
const float fDetUX = gC_DetUX[angle];
const float fDetUY = gC_DetUY[angle];
+ const float fScale = gC_Scale[angle];
const float fXD = fSrcX - fX;
const float fYD = fSrcY - fY;
const float fNum = fDetSY * fXD - fDetSX * fYD + fX*fSrcY - fY*fSrcX;
const float fDen = fDetUX * fYD - fDetUY * fXD;
-
- const float fT = fNum / fDen;
- fVal += tex2D(gT_FanProjTexture, fT, fA);
+
+ // fDen = || u (x-s) || (2x2 determinant)
+
+ // Scale factor is the approximate number of rays traversing this pixel,
+ // given by the inverse size of a detector pixel scaled by the magnification
+ // factor of this pixel.
+ // Magnification factor is || u (d-s) || / || u (x-s) ||
+
+ const float fr = __fdividef(1.0f, fDen);
+ const float fT = fNum * fr;
+ fVal += tex2D(gT_FanProjTexture, fT, fA) * fScale * fr;
fA += 1.0f;
}
@@ -148,8 +156,6 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
float fVal = 0.0f;
float fA = startAngle + 0.5f;
- // TODO: Distance correction?
-
for (int angle = startAngle; angle < endAngle; ++angle)
{
const float fSrcX = gC_SrcX[angle];
@@ -158,6 +164,7 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
const float fDetSY = gC_DetSY[angle];
const float fDetUX = gC_DetUX[angle];
const float fDetUY = gC_DetUY[angle];
+ const float fScale = gC_Scale[angle];
// TODO: Optimize these loops...
float fX = fXb;
@@ -169,9 +176,10 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
const float fNum = fDetSY * fXD - fDetSX * fYD + fX*fSrcY - fY*fSrcX;
const float fDen = fDetUX * fYD - fDetUY * fXD;
-
- const float fT = fNum / fDen;
- fVal += tex2D(gT_FanProjTexture, fT, fA);
+ const float fr = __fdividef(1.0f, fDen);
+
+ const float fT = fNum * fr;
+ fVal += tex2D(gT_FanProjTexture, fT, fA) * fScale * fr;
fY -= fSubStep;
}
fX += fSubStep;
@@ -202,8 +210,6 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi
float* volData = (float*)D_volData;
- // TODO: Distance correction?
-
// TODO: Constant memory vs parameters.
const float fSrcX = gC_SrcX[0];
const float fSrcY = gC_SrcY[0];
@@ -211,15 +217,17 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi
const float fDetSY = gC_DetSY[0];
const float fDetUX = gC_DetUX[0];
const float fDetUY = gC_DetUY[0];
+ const float fScale = gC_Scale[0];
const float fXD = fSrcX - fX;
const float fYD = fSrcY - fY;
const float fNum = fDetSY * fXD - fDetSX * fYD + fX*fSrcY - fY*fSrcX;
const float fDen = fDetUX * fYD - fDetUY * fXD;
-
- const float fT = fNum / fDen;
- const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f);
+ const float fr = __fdividef(1.0f, fDen);
+
+ const float fT = fNum * fr;
+ const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f) * fScale * fr;
volData[Y*volPitch+X] += fVal * fOutputScale;
}
@@ -248,7 +256,7 @@ __global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, un
float fVal = 0.0f;
float fA = startAngle + 0.5f;
- // TODO: Distance correction?
+ // TODO: Update for new projection scaling
for (int angle = startAngle; angle < endAngle; ++angle)
{
@@ -299,6 +307,13 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
#undef TRANSFER_TO_CONSTANT
+ for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
+ double detsize = sqrt(angles[i].fDetUX * angles[i].fDetUX + angles[i].fDetUY * angles[i].fDetUY);
+ double scale = (angles[i].fDetUX * (angles[i].fSrcY - angles[i].fDetSY) - angles[i].fDetUY * (angles[i].fSrcX - angles[i].fDetSX)) / detsize;
+ tmp[i] = (float)scale;
+ }
+ cudaMemcpyToSymbol(gC_Scale, tmp, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
+
delete[] tmp;
dim3 dimBlock(g_blockSlices, g_blockSliceSize);
@@ -346,6 +361,14 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
#undef TRANSFER_TO_CONSTANT
+ for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
+ double detsize = sqrt(angles[i].fDetUX * angles[i].fDetUX + angles[i].fDetUY * angles[i].fDetUY);
+ double scale = (angles[i].fDetUX * (angles[i].fSrcY - angles[i].fDetSY) - angles[i].fDetUY * (angles[i].fSrcX - angles[i].fDetSX)) / detsize;
+ tmp[i] = (float)scale;
+ }
+ cudaMemcpyToSymbol(gC_Scale, tmp, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
+
+
delete[] tmp;
dim3 dimBlock(g_blockSlices, g_blockSliceSize);
@@ -389,6 +412,11 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
#undef TRANSFER_TO_CONSTANT
+ double detsize = sqrt(angles[angle].fDetUX * angles[angle].fDetUX + angles[angle].fDetUY * angles[angle].fDetUY);
+ double scale = (angles[angle].fDetUX * (angles[angle].fSrcY - angles[angle].fDetSY) - angles[angle].fDetUY * (angles[angle].fSrcX - angles[angle].fDetSX)) / detsize;
+ float tmp = (float)scale;
+ cudaMemcpyToSymbol(gC_Scale, &tmp, sizeof(float), 0, cudaMemcpyHostToDevice);
+
dim3 dimBlock(g_blockSlices, g_blockSliceSize);
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
(dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize);
diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu
index 09a6554..21484b1 100644
--- a/cuda/2d/par_bp.cu
+++ b/cuda/2d/par_bp.cu
@@ -53,6 +53,7 @@ const unsigned int g_MaxAngles = 2560;
__constant__ float gC_angle_scaled_sin[g_MaxAngles];
__constant__ float gC_angle_scaled_cos[g_MaxAngles];
__constant__ float gC_angle_offset[g_MaxAngles];
+__constant__ float gC_angle_scale[g_MaxAngles];
static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int width, unsigned int height, cudaTextureAddressMode mode = cudaAddressModeBorder)
{
@@ -70,6 +71,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi
return true;
}
+// TODO: Templated version with/without scale? (Or only the global outputscale)
__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
@@ -97,9 +99,10 @@ __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int star
const float scaled_cos_theta = gC_angle_scaled_cos[angle];
const float scaled_sin_theta = gC_angle_scaled_sin[angle];
const float TOffset = gC_angle_offset[angle];
+ const float scale = gC_angle_scale[angle];
const float fT = fX * scaled_cos_theta - fY * scaled_sin_theta + TOffset;
- fVal += tex2D(gT_projTexture, fT, fA);
+ fVal += tex2D(gT_projTexture, fT, fA) * scale;
fA += 1.0f;
}
@@ -138,6 +141,7 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s
const float cos_theta = gC_angle_scaled_cos[angle];
const float sin_theta = gC_angle_scaled_sin[angle];
const float TOffset = gC_angle_offset[angle];
+ const float scale = gC_angle_scale[angle];
float fT = fX * cos_theta - fY * sin_theta + TOffset;
@@ -145,7 +149,7 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s
float fTy = fT;
fT += fSubStep * cos_theta;
for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) {
- fVal += tex2D(gT_projTexture, fTy, fA);
+ fVal += tex2D(gT_projTexture, fTy, fA) * scale;
fTy -= fSubStep * sin_theta;
}
}
@@ -172,6 +176,8 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset
const float fT = fX * angle_cos - fY * angle_sin + offset;
const float fVal = tex2D(gT_projTexture, fT, 0.5f);
+ // NB: The 'scale' constant in devBP has been folded into fOutputScale by the caller here
+
D_volData[Y*volPitch+X] += fVal * fOutputScale;
}
@@ -186,27 +192,34 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,
float* angle_scaled_sin = new float[dims.iProjAngles];
float* angle_scaled_cos = new float[dims.iProjAngles];
float* angle_offset = new float[dims.iProjAngles];
+ float* angle_scale = new float[dims.iProjAngles];
bindProjDataTexture(D_projData, projPitch, dims.iProjDets, dims.iProjAngles);
for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
double d = angles[i].fDetUX * angles[i].fRayY - angles[i].fDetUY * angles[i].fRayX;
angle_scaled_cos[i] = angles[i].fRayY / d;
- angle_scaled_sin[i] = -angles[i].fRayX / d; // TODO: Check signs
+ angle_scaled_sin[i] = -angles[i].fRayX / d;
angle_offset[i] = (angles[i].fDetSY * angles[i].fRayX - angles[i].fDetSX * angles[i].fRayY) / d;
+ angle_scale[i] = sqrt(angles[i].fRayX * angles[i].fRayX + angles[i].fRayY * angles[i].fRayY) / abs(d);
}
+ //fprintf(stderr, "outputscale in BP_internal: %f, %f\n", fOutputScale, angle_scale[0]);
+ //fprintf(stderr, "ray in BP_internal: %f,%f (length %f)\n", angles[0].fRayX, angles[0].fRayY, sqrt(angles[0].fRayX * angles[0].fRayX + angles[0].fRayY * angles[0].fRayY));
cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_scaled_sin, angle_scaled_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_scaled_cos, angle_scaled_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
cudaError_t e3 = cudaMemcpyToSymbol(gC_angle_offset, angle_offset, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
+ cudaError_t e4 = cudaMemcpyToSymbol(gC_angle_scale, angle_scale, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
assert(e1 == cudaSuccess);
assert(e2 == cudaSuccess);
assert(e3 == cudaSuccess);
+ assert(e4 == cudaSuccess);
delete[] angle_scaled_sin;
delete[] angle_scaled_cos;
delete[] angle_offset;
+ delete[] angle_scale;
dim3 dimBlock(g_blockSlices, g_blockSliceSize);
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
@@ -267,6 +280,7 @@ bool BP_SART(float* D_volumeData, unsigned int volumePitch,
float angle_scaled_cos = angles[angle].fRayY / d;
float angle_scaled_sin = -angles[angle].fRayX / d; // TODO: Check signs
float angle_offset = (angles[angle].fDetSY * angles[angle].fRayX - angles[angle].fDetSX * angles[angle].fRayY) / d;
+ fOutputScale *= sqrt(angles[angle].fRayX * angles[angle].fRayX + angles[angle].fRayY * angles[angle].fRayY) / abs(d);
dim3 dimBlock(g_blockSlices, g_blockSliceSize);
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu
index feebda2..2a7ec80 100644
--- a/cuda/3d/cone_bp.cu
+++ b/cuda/3d/cone_bp.cu
@@ -140,8 +140,10 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng
if (FDKWEIGHT) {
// The correct factor here is this one:
// Z[idx] += (fr*fCd.w)*(fr*fCd.w)*fVal;
- // This is the square of the inverse magnification factor
- // from fX,fY,fZ to the detector.
+ // This is the square of the magnification factor
+ // from fX,fY,fZ to the virtual detector, where the
+ // virtual detector is the plane through the origin
+ // parallel to the detector, so spanned by u,v.
// Since we are assuming we have a circular cone
// beam trajectory, fCd.w is constant, and we instead
diff --git a/src/CudaReconstructionAlgorithm2D.cpp b/src/CudaReconstructionAlgorithm2D.cpp
index 1e81390..939a026 100644
--- a/src/CudaReconstructionAlgorithm2D.cpp
+++ b/src/CudaReconstructionAlgorithm2D.cpp
@@ -309,8 +309,7 @@ void CCudaReconstructionAlgorithm2D::run(int _iNrIterations)
m_bAlgoInit = true;
}
- float fPixelSize = volgeom.getPixelLengthX();
- float fSinogramScale = 1.0f/(fPixelSize*fPixelSize);
+ float fSinogramScale = 1.0f;
ok = m_pAlgo->copyDataToGPU(m_pSinogram->getDataConst(), m_pSinogram->getGeometry()->getDetectorCount(), fSinogramScale,
m_pReconstruction->getDataConst(), volgeom.getGridColCount(),
diff --git a/tests/python/test_line2d.py b/tests/python/test_line2d.py
index 4c3d174..8b6e200 100644
--- a/tests/python/test_line2d.py
+++ b/tests/python/test_line2d.py
@@ -586,10 +586,66 @@ class Test2DKernel(unittest.TestCase):
else:
raise RuntimeError("Unsupported projector")
+ def single_test_adjoint(self, type, proj_type):
+ shape = np.random.randint(*range2d, size=2)
+ if FLEXVOL:
+ if not NONSQUARE:
+ pixsize = np.array([0.5, 0.5]) + np.random.random()
+ else:
+ pixsize = 0.5 + np.random.random(size=2)
+ origin = 10 * np.random.random(size=2)
+ else:
+ pixsize = (1.,1.)
+ origin = (0.,0.)
+ vg = astra.create_vol_geom(shape[1], shape[0],
+ origin[0] - 0.5 * shape[0] * pixsize[0],
+ origin[0] + 0.5 * shape[0] * pixsize[0],
+ origin[1] - 0.5 * shape[1] * pixsize[1],
+ origin[1] + 0.5 * shape[1] * pixsize[1])
+
+ if type == 'parallel':
+ pg = gen_random_geometry_parallel()
+ projector_id = astra.create_projector(proj_type, pg, vg)
+ elif type == 'parallel_vec':
+ pg = gen_random_geometry_parallel_vec()
+ projector_id = astra.create_projector(proj_type, pg, vg)
+ elif type == 'fanflat':
+ pg = gen_random_geometry_fanflat()
+ projector_id = astra.create_projector(proj_type_to_fan(proj_type), pg, vg)
+ elif type == 'fanflat_vec':
+ pg = gen_random_geometry_fanflat_vec()
+ projector_id = astra.create_projector(proj_type_to_fan(proj_type), pg, vg)
+
+ for i in range(5):
+ X = np.random.random((shape[1], shape[0]))
+ Y = np.random.random(astra.geom_size(pg))
+
+ sinogram_id, fX = astra.create_sino(X, projector_id)
+ bp_id, fTY = astra.create_backprojection(Y, projector_id)
+
+ astra.data2d.delete(sinogram_id)
+ astra.data2d.delete(bp_id)
+
+ da = np.dot(fX.ravel(), Y.ravel())
+ db = np.dot(X.ravel(), fTY.ravel())
+ m = np.abs(da - db)
+ TOL = 1e-3 if 'cuda' not in proj_type else 1e-1
+ if m / da >= TOL:
+ print(vg)
+ print(pg)
+ print(m/da, da/db, da, db)
+ self.assertTrue(m / da < TOL)
+ astra.projector.delete(projector_id)
+
+
def multi_test(self, type, proj_type):
np.random.seed(seed)
for _ in range(nloops):
self.single_test(type, proj_type)
+ def multi_test_adjoint(self, type, proj_type):
+ np.random.seed(seed)
+ for _ in range(nloops):
+ self.single_test_adjoint(type, proj_type)
__combinations = { 'parallel': [ 'line', 'linear', 'distance_driven', 'strip', 'cuda' ],
'parallel_vec': [ 'line', 'linear', 'distance_driven', 'strip', 'cuda' ],
@@ -600,7 +656,10 @@ for k, l in __combinations.items():
for v in l:
def f(k,v):
return lambda self: self.multi_test(k, v)
+ def f_adj(k,v):
+ return lambda self: self.multi_test_adjoint(k, v)
setattr(Test2DKernel, 'test_' + k + '_' + v, f(k,v))
+ setattr(Test2DKernel, 'test_' + k + '_' + v + '_adjoint', f_adj(k,v))
if __name__ == '__main__':
unittest.main()