summaryrefslogtreecommitdiffstats
path: root/cuda/2d
diff options
context:
space:
mode:
authorWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2019-09-27 15:16:26 +0200
committerWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2019-09-27 15:16:26 +0200
commit54af7e8e22a3f1c9d90b13291b28d39778c05564 (patch)
tree260310b16d624261bb80f82979af27750022259b /cuda/2d
parent1fec36f7ccadd5f7dcf2bb59b0654dc53653b0f3 (diff)
parentb629db207bb263495bfff2e61ce189ccac27b4b9 (diff)
downloadastra-54af7e8e22a3f1c9d90b13291b28d39778c05564.tar.gz
astra-54af7e8e22a3f1c9d90b13291b28d39778c05564.tar.bz2
astra-54af7e8e22a3f1c9d90b13291b28d39778c05564.tar.xz
astra-54af7e8e22a3f1c9d90b13291b28d39778c05564.zip
Merge branch 'consistent_scaling'
Diffstat (limited to 'cuda/2d')
-rw-r--r--cuda/2d/algo.cu19
-rw-r--r--cuda/2d/astra.cu3
-rw-r--r--cuda/2d/cgls.cu65
-rw-r--r--cuda/2d/em.cu65
-rw-r--r--cuda/2d/fan_bp.cu329
-rw-r--r--cuda/2d/fan_fp.cu85
-rw-r--r--cuda/2d/fbp.cu20
-rw-r--r--cuda/2d/fft.cu207
-rw-r--r--cuda/2d/par_bp.cu77
-rw-r--r--cuda/2d/par_fp.cu76
-rw-r--r--cuda/2d/sart.cu5
-rw-r--r--cuda/2d/sirt.cu63
12 files changed, 175 insertions, 839 deletions
diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu
index b4c2864..be15b25 100644
--- a/cuda/2d/algo.cu
+++ b/cuda/2d/algo.cu
@@ -134,8 +134,8 @@ bool ReconAlgo::setGeometry(const astra::CVolumeGeometry2D* pVolGeom,
delete[] fanProjs;
fanProjs = 0;
- fOutputScale = 1.0f;
- ok = convertAstraGeometry(pVolGeom, pProjGeom, parProjs, fanProjs, fOutputScale);
+ fProjectorScale = 1.0f;
+ ok = convertAstraGeometry(pVolGeom, pProjGeom, parProjs, fanProjs, fProjectorScale);
if (!ok)
return false;
@@ -242,7 +242,7 @@ bool ReconAlgo::allocateBuffers()
return true;
}
-bool ReconAlgo::copyDataToGPU(const float* pfSinogram, unsigned int iSinogramPitch, float fSinogramScale,
+bool ReconAlgo::copyDataToGPU(const float* pfSinogram, unsigned int iSinogramPitch,
const float* pfReconstruction, unsigned int iReconstructionPitch,
const float* pfVolMask, unsigned int iVolMaskPitch,
const float* pfSinoMask, unsigned int iSinoMaskPitch)
@@ -258,11 +258,6 @@ bool ReconAlgo::copyDataToGPU(const float* pfSinogram, unsigned int iSinogramPit
if (!ok)
return false;
- // rescale sinogram to adjust for pixel size
- processSino<opMul>(D_sinoData, fSinogramScale,
- //1.0f/(fPixelSize*fPixelSize),
- sinoPitch, dims);
-
ok = copyVolumeToDevice(pfReconstruction, iReconstructionPitch,
dims,
D_volumeData, volumePitch);
@@ -316,11 +311,11 @@ bool ReconAlgo::callFP(float* D_volumeData, unsigned int volumePitch,
if (parProjs) {
assert(!fanProjs);
return FP(D_volumeData, volumePitch, D_projData, projPitch,
- dims, parProjs, fOutputScale * outputScale);
+ dims, parProjs, fProjectorScale * outputScale);
} else {
assert(fanProjs);
return FanFP(D_volumeData, volumePitch, D_projData, projPitch,
- dims, fanProjs, fOutputScale * outputScale);
+ dims, fanProjs, fProjectorScale * outputScale);
}
}
@@ -331,11 +326,11 @@ bool ReconAlgo::callBP(float* D_volumeData, unsigned int volumePitch,
if (parProjs) {
assert(!fanProjs);
return BP(D_volumeData, volumePitch, D_projData, projPitch,
- dims, parProjs, outputScale);
+ dims, parProjs, fProjectorScale * outputScale);
} else {
assert(fanProjs);
return FanBP(D_volumeData, volumePitch, D_projData, projPitch,
- dims, fanProjs, outputScale);
+ dims, fanProjs, fProjectorScale * outputScale);
}
}
diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu
index ec03517..7ff1c95 100644
--- a/cuda/2d/astra.cu
+++ b/cuda/2d/astra.cu
@@ -302,7 +302,8 @@ static bool convertAstraGeometry_internal(const CVolumeGeometry2D* pVolGeom,
pProjs[i].scale(factor);
}
// CHECKME: Check factor
- fOutputScale *= pVolGeom->getPixelLengthX() * pVolGeom->getPixelLengthY();
+ // NB: Only valid for square pixels
+ fOutputScale *= pVolGeom->getPixelLengthX();
return true;
}
diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu
index b6a9fae..e7238b9 100644
--- a/cuda/2d/cgls.cu
+++ b/cuda/2d/cgls.cu
@@ -29,10 +29,6 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
#include "astra/cuda/2d/util.h"
#include "astra/cuda/2d/arith.h"
-#ifdef STANDALONE
-#include "testutil.h"
-#endif
-
#include <cstdio>
#include <cassert>
@@ -102,14 +98,14 @@ bool CGLS::setBuffers(float* _D_volumeData, unsigned int _volumePitch,
return true;
}
-bool CGLS::copyDataToGPU(const float* pfSinogram, unsigned int iSinogramPitch, float fSinogramScale,
+bool CGLS::copyDataToGPU(const float* pfSinogram, unsigned int iSinogramPitch,
const float* pfReconstruction, unsigned int iReconstructionPitch,
const float* pfVolMask, unsigned int iVolMaskPitch,
const float* pfSinoMask, unsigned int iSinoMaskPitch)
{
sliceInitialized = false;
- return ReconAlgo::copyDataToGPU(pfSinogram, iSinogramPitch, fSinogramScale, pfReconstruction, iReconstructionPitch, pfVolMask, iVolMaskPitch, pfSinoMask, iSinoMaskPitch);
+ return ReconAlgo::copyDataToGPU(pfSinogram, iSinogramPitch, pfReconstruction, iReconstructionPitch, pfVolMask, iVolMaskPitch, pfSinoMask, iSinoMaskPitch);
}
bool CGLS::iterate(unsigned int iterations)
@@ -206,60 +202,3 @@ float CGLS::computeDiffNorm()
}
-
-#ifdef STANDALONE
-
-using namespace astraCUDA;
-
-int main()
-{
- float* D_volumeData;
- float* D_sinoData;
-
- SDimensions dims;
- dims.iVolWidth = 1024;
- dims.iVolHeight = 1024;
- dims.iProjAngles = 512;
- dims.iProjDets = 1536;
- dims.fDetScale = 1.0f;
- dims.iRaysPerDet = 1;
- unsigned int volumePitch, sinoPitch;
-
- allocateVolume(D_volumeData, dims.iVolWidth, dims.iVolHeight, volumePitch);
- zeroVolume(D_volumeData, volumePitch, dims.iVolWidth, dims.iVolHeight);
- printf("pitch: %u\n", volumePitch);
-
- allocateVolume(D_sinoData, dims.iProjDets, dims.iProjAngles, sinoPitch);
- zeroVolume(D_sinoData, sinoPitch, dims.iProjDets, dims.iProjAngles);
- printf("pitch: %u\n", sinoPitch);
-
- unsigned int y, x;
- float* sino = loadImage("sino.png", y, x);
-
- float* img = new float[dims.iVolWidth*dims.iVolHeight];
-
- copySinogramToDevice(sino, dims.iProjDets, dims.iProjDets, dims.iProjAngles, D_sinoData, sinoPitch);
-
- float* angle = new float[dims.iProjAngles];
-
- for (unsigned int i = 0; i < dims.iProjAngles; ++i)
- angle[i] = i*(M_PI/dims.iProjAngles);
-
- CGLS cgls;
-
- cgls.setGeometry(dims, angle);
- cgls.init();
-
- cgls.setBuffers(D_volumeData, volumePitch, D_sinoData, sinoPitch);
-
- cgls.iterate(25);
-
- delete[] angle;
-
- copyVolumeFromDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);
-
- saveImage("vol.png",dims.iVolHeight,dims.iVolWidth,img);
-
- return 0;
-}
-#endif
diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu
index aa272d8..df140ec 100644
--- a/cuda/2d/em.cu
+++ b/cuda/2d/em.cu
@@ -29,10 +29,6 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
#include "astra/cuda/2d/util.h"
#include "astra/cuda/2d/arith.h"
-#ifdef STANDALONE
-#include "testutil.h"
-#endif
-
#include <cstdio>
#include <cassert>
@@ -168,64 +164,3 @@ float EM::computeDiffNorm()
}
-
-#ifdef STANDALONE
-
-using namespace astraCUDA;
-
-int main()
-{
- float* D_volumeData;
- float* D_sinoData;
-
- SDimensions dims;
- dims.iVolWidth = 1024;
- dims.iVolHeight = 1024;
- dims.iProjAngles = 512;
- dims.iProjDets = 1536;
- dims.fDetScale = 1.0f;
- dims.iRaysPerDet = 1;
- unsigned int volumePitch, sinoPitch;
-
- allocateVolume(D_volumeData, dims.iVolWidth, dims.iVolHeight, volumePitch);
- zeroVolume(D_volumeData, volumePitch, dims.iVolWidth, dims.iVolHeight);
- printf("pitch: %u\n", volumePitch);
-
- allocateVolume(D_sinoData, dims.iProjDets, dims.iProjAngles, sinoPitch);
- zeroVolume(D_sinoData, sinoPitch, dims.iProjDets, dims.iProjAngles);
- printf("pitch: %u\n", sinoPitch);
-
- unsigned int y, x;
- float* sino = loadImage("sino.png", y, x);
-
- float* img = new float[dims.iVolWidth*dims.iVolHeight];
-
- copySinogramToDevice(sino, dims.iProjDets, dims.iProjDets, dims.iProjAngles, D_sinoData, sinoPitch);
-
- float* angle = new float[dims.iProjAngles];
-
- for (unsigned int i = 0; i < dims.iProjAngles; ++i)
- angle[i] = i*(M_PI/dims.iProjAngles);
-
- EM em;
-
- em.setGeometry(dims, angle);
- em.init();
-
- // TODO: Initialize D_volumeData with an unfiltered backprojection
-
- em.setBuffers(D_volumeData, volumePitch, D_sinoData, sinoPitch);
-
- em.iterate(25);
-
-
- delete[] angle;
-
- copyVolumeFromDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);
-
- saveImage("vol.png",dims.iVolHeight,dims.iVolWidth,img);
-
- return 0;
-}
-
-#endif
diff --git a/cuda/2d/fan_bp.cu b/cuda/2d/fan_bp.cu
index dac3ac2..76d2fb9 100644
--- a/cuda/2d/fan_bp.cu
+++ b/cuda/2d/fan_bp.cu
@@ -28,10 +28,6 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
#include "astra/cuda/2d/util.h"
#include "astra/cuda/2d/arith.h"
-#ifdef STANDALONE
-#include "testutil.h"
-#endif
-
#include <cstdio>
#include <cassert>
#include <iostream>
@@ -50,12 +46,16 @@ const unsigned int g_blockSlices = 16;
const unsigned int g_MaxAngles = 2560;
-__constant__ float gC_SrcX[g_MaxAngles];
-__constant__ float gC_SrcY[g_MaxAngles];
-__constant__ float gC_DetSX[g_MaxAngles];
-__constant__ float gC_DetSY[g_MaxAngles];
-__constant__ float gC_DetUX[g_MaxAngles];
-__constant__ float gC_DetUY[g_MaxAngles];
+struct DevFanParams {
+ float fNumC;
+ float fNumX;
+ float fNumY;
+ float fDenC;
+ float fDenX;
+ float fDenY;
+};
+
+__constant__ DevFanParams gC_C[g_MaxAngles];
static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int width, unsigned int height, cudaTextureAddressMode mode = cudaAddressModeBorder)
@@ -74,6 +74,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi
return true;
}
+template<bool FBPWEIGHT>
__global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
@@ -96,25 +97,25 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s
float fVal = 0.0f;
float fA = startAngle + 0.5f;
- // TODO: Distance correction?
-
for (int angle = startAngle; angle < endAngle; ++angle)
{
- const float fSrcX = gC_SrcX[angle];
- const float fSrcY = gC_SrcY[angle];
- const float fDetSX = gC_DetSX[angle];
- const float fDetSY = gC_DetSY[angle];
- const float fDetUX = gC_DetUX[angle];
- const float fDetUY = gC_DetUY[angle];
-
- const float fXD = fSrcX - fX;
- const float fYD = fSrcY - fY;
-
- const float fNum = fDetSY * fXD - fDetSX * fYD + fX*fSrcY - fY*fSrcX;
- const float fDen = fDetUX * fYD - fDetUY * fXD;
-
- const float fT = fNum / fDen;
- fVal += tex2D(gT_FanProjTexture, fT, fA);
+ const float fNumC = gC_C[angle].fNumC;
+ const float fNumX = gC_C[angle].fNumX;
+ const float fNumY = gC_C[angle].fNumY;
+ const float fDenX = gC_C[angle].fDenX;
+ const float fDenY = gC_C[angle].fDenY;
+
+ const float fNum = fNumC + fNumX * fX + fNumY * fY;
+ const float fDen = (FBPWEIGHT ? 1.0 : gC_C[angle].fDenC) + fDenX * fX + fDenY * fY;
+
+ // Scale factor is the approximate number of rays traversing this pixel,
+ // given by the inverse size of a detector pixel scaled by the magnification
+ // factor of this pixel.
+ // Magnification factor is || u (d-s) || / || u (x-s) ||
+
+ const float fr = __fdividef(1.0f, fDen);
+ const float fT = fNum * fr;
+ fVal += tex2D(gT_FanProjTexture, fT, fA) * (FBPWEIGHT ? fr * fr : fr);
fA += 1.0f;
}
@@ -148,30 +149,27 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
float fVal = 0.0f;
float fA = startAngle + 0.5f;
- // TODO: Distance correction?
-
for (int angle = startAngle; angle < endAngle; ++angle)
{
- const float fSrcX = gC_SrcX[angle];
- const float fSrcY = gC_SrcY[angle];
- const float fDetSX = gC_DetSX[angle];
- const float fDetSY = gC_DetSY[angle];
- const float fDetUX = gC_DetUX[angle];
- const float fDetUY = gC_DetUY[angle];
+ const float fNumC = gC_C[angle].fNumC;
+ const float fNumX = gC_C[angle].fNumX;
+ const float fNumY = gC_C[angle].fNumY;
+ const float fDenC = gC_C[angle].fDenC;
+ const float fDenX = gC_C[angle].fDenX;
+ const float fDenY = gC_C[angle].fDenY;
// TODO: Optimize these loops...
float fX = fXb;
for (int iSubX = 0; iSubX < dims.iRaysPerPixelDim; ++iSubX) {
float fY = fYb;
for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) {
- const float fXD = fSrcX - fX;
- const float fYD = fSrcY - fY;
-
- const float fNum = fDetSY * fXD - fDetSX * fYD + fX*fSrcY - fY*fSrcX;
- const float fDen = fDetUX * fYD - fDetUY * fXD;
-
- const float fT = fNum / fDen;
- fVal += tex2D(gT_FanProjTexture, fT, fA);
+
+ const float fNum = fNumC + fNumX * fX + fNumY * fY;
+ const float fDen = fDenC + fDenX * fX + fDenY * fY;
+ const float fr = __fdividef(1.0f, fDen);
+
+ const float fT = fNum * fr;
+ fVal += tex2D(gT_FanProjTexture, fT, fA) * fr;
fY -= fSubStep;
}
fX += fSubStep;
@@ -202,77 +200,97 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi
float* volData = (float*)D_volData;
- // TODO: Distance correction?
-
- // TODO: Constant memory vs parameters.
- const float fSrcX = gC_SrcX[0];
- const float fSrcY = gC_SrcY[0];
- const float fDetSX = gC_DetSX[0];
- const float fDetSY = gC_DetSY[0];
- const float fDetUX = gC_DetUX[0];
- const float fDetUY = gC_DetUY[0];
+ const float fNumC = gC_C[0].fNumC;
+ const float fNumX = gC_C[0].fNumX;
+ const float fNumY = gC_C[0].fNumY;
+ const float fDenC = gC_C[0].fDenC;
+ const float fDenX = gC_C[0].fDenX;
+ const float fDenY = gC_C[0].fDenY;
- const float fXD = fSrcX - fX;
- const float fYD = fSrcY - fY;
+ const float fNum = fNumC + fNumX * fX + fNumY * fY;
+ const float fDen = fDenC + fDenX * fX + fDenY * fY;
- const float fNum = fDetSY * fXD - fDetSX * fYD + fX*fSrcY - fY*fSrcX;
- const float fDen = fDetUX * fYD - fDetUY * fXD;
-
- const float fT = fNum / fDen;
+ const float fr = __fdividef(1.0f, fDen);
+ const float fT = fNum * fr;
+ // NB: The scale constant in devBP is cancelled out by the SART weighting
const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f);
volData[Y*volPitch+X] += fVal * fOutputScale;
}
-// Weighted BP for use in fan beam FBP
-// Each pixel/ray is weighted by 1/L^2 where L is the distance to the source.
-__global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
+struct Vec2 {
+ double x;
+ double y;
+ Vec2(double x_, double y_) : x(x_), y(y_) { }
+ Vec2 operator+(const Vec2 &b) const {
+ return Vec2(x + b.x, y + b.y);
+ }
+ Vec2 operator-(const Vec2 &b) const {
+ return Vec2(x - b.x, y - b.y);
+ }
+ Vec2 operator-() const {
+ return Vec2(-x, -y);
+ }
+ double norm() const {
+ return sqrt(x*x + y*y);
+ }
+};
+
+double det2(const Vec2 &a, const Vec2 &b) {
+ return a.x * b.y - a.y * b.x;
+}
+
+
+bool transferConstants(const SFanProjection* angles, unsigned int iProjAngles, bool FBP)
{
- const int relX = threadIdx.x;
- const int relY = threadIdx.y;
+ DevFanParams *p = new DevFanParams[iProjAngles];
- int endAngle = startAngle + g_anglesPerBlock;
- if (endAngle > dims.iProjAngles)
- endAngle = dims.iProjAngles;
- const int X = blockIdx.x * g_blockSlices + relX;
- const int Y = blockIdx.y * g_blockSliceSize + relY;
+ // We need three values in the kernel:
+ // projected coordinates of pixels on the detector:
+ // || x (s-d) || + ||s d|| / || u (s-x) ||
- if (X >= dims.iVolWidth || Y >= dims.iVolHeight)
- return;
+ // ray density weighting factor for the adjoint
+ // || u (s-d) || / ( |u| * || u (s-x) || )
- const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f );
- const float fY = - ( Y - 0.5f*dims.iVolHeight + 0.5f );
+ // fan-beam FBP weighting factor
+ // ( || u s || / || u (s-x) || ) ^ 2
- float* volData = (float*)D_volData;
- float fVal = 0.0f;
- float fA = startAngle + 0.5f;
- // TODO: Distance correction?
+ for (unsigned int i = 0; i < iProjAngles; ++i) {
+ Vec2 u(angles[i].fDetUX, angles[i].fDetUY);
+ Vec2 s(angles[i].fSrcX, angles[i].fSrcY);
+ Vec2 d(angles[i].fDetSX, angles[i].fDetSY);
- for (int angle = startAngle; angle < endAngle; ++angle)
- {
- const float fSrcX = gC_SrcX[angle];
- const float fSrcY = gC_SrcY[angle];
- const float fDetSX = gC_DetSX[angle];
- const float fDetSY = gC_DetSY[angle];
- const float fDetUX = gC_DetUX[angle];
- const float fDetUY = gC_DetUY[angle];
-
- const float fXD = fSrcX - fX;
- const float fYD = fSrcY - fY;
-
- const float fNum = fDetSY * fXD - fDetSX * fYD + fX*fSrcY - fY*fSrcX;
- const float fDen = fDetUX * fYD - fDetUY * fXD;
-
- const float fWeight = fXD*fXD + fYD*fYD;
-
- const float fT = fNum / fDen;
- fVal += tex2D(gT_FanProjTexture, fT, fA) / fWeight;
- fA += 1.0f;
+
+
+ double fScale;
+ if (!FBP) {
+ // goal: 1/fDen = || u (s-d) || / ( |u| * || u (s-x) || )
+ // fDen = ( |u| * || u (s-x) || ) / || u (s-d) ||
+ // i.e. scale = |u| / || u (s-d) ||
+
+ fScale = u.norm() / det2(u, s-d);
+ } else {
+ // goal: 1/fDen = || u s || / || u (s-x) ||
+ // fDen = || u (s-x) || / || u s ||
+ // i.e., scale = 1 / || u s ||
+
+ fScale = 1.0 / det2(u, s);
+ }
+
+ p[i].fNumC = fScale * det2(s,d);
+ p[i].fNumX = fScale * (s-d).y;
+ p[i].fNumY = -fScale * (s-d).x;
+ p[i].fDenC = fScale * det2(u, s); // == 1.0 for FBP
+ p[i].fDenX = fScale * u.y;
+ p[i].fDenY = -fScale * u.x;
}
- volData[Y*volPitch+X] += fVal * fOutputScale;
+ // TODO: Check for errors
+ cudaMemcpyToSymbol(gC_C, p, iProjAngles*sizeof(DevFanParams), 0, cudaMemcpyHostToDevice);
+
+ return true;
}
@@ -285,21 +303,9 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
bindProjDataTexture(D_projData, projPitch, dims.iProjDets, dims.iProjAngles);
- // transfer angles to constant memory
- float* tmp = new float[dims.iProjAngles];
-
-#define TRANSFER_TO_CONSTANT(name) do { for (unsigned int i = 0; i < dims.iProjAngles; ++i) tmp[i] = angles[i].f##name ; cudaMemcpyToSymbol(gC_##name, tmp, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); } while (0)
-
- TRANSFER_TO_CONSTANT(SrcX);
- TRANSFER_TO_CONSTANT(SrcY);
- TRANSFER_TO_CONSTANT(DetSX);
- TRANSFER_TO_CONSTANT(DetSY);
- TRANSFER_TO_CONSTANT(DetUX);
- TRANSFER_TO_CONSTANT(DetUY);
-
-#undef TRANSFER_TO_CONSTANT
-
- delete[] tmp;
+ bool ok = transferConstants(angles, dims.iProjAngles, false);
+ if (!ok)
+ return false;
dim3 dimBlock(g_blockSlices, g_blockSliceSize);
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
@@ -312,7 +318,7 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
if (dims.iRaysPerPixelDim > 1)
devFanBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
else
- devFanBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
+ devFanBP<false><<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
}
cudaThreadSynchronize();
@@ -332,21 +338,9 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
bindProjDataTexture(D_projData, projPitch, dims.iProjDets, dims.iProjAngles);
- // transfer angles to constant memory
- float* tmp = new float[dims.iProjAngles];
-
-#define TRANSFER_TO_CONSTANT(name) do { for (unsigned int i = 0; i < dims.iProjAngles; ++i) tmp[i] = angles[i].f##name ; cudaMemcpyToSymbol(gC_##name, tmp, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); } while (0)
-
- TRANSFER_TO_CONSTANT(SrcX);
- TRANSFER_TO_CONSTANT(SrcY);
- TRANSFER_TO_CONSTANT(DetSX);
- TRANSFER_TO_CONSTANT(DetSY);
- TRANSFER_TO_CONSTANT(DetUX);
- TRANSFER_TO_CONSTANT(DetUY);
-
-#undef TRANSFER_TO_CONSTANT
-
- delete[] tmp;
+ bool ok = transferConstants(angles, dims.iProjAngles, true);
+ if (!ok)
+ return false;
dim3 dimBlock(g_blockSlices, g_blockSliceSize);
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
@@ -356,7 +350,7 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
cudaStreamCreate(&stream);
for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
- devFanBP_FBPWeighted<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
+ devFanBP<true><<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
}
cudaThreadSynchronize();
@@ -377,17 +371,9 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
// only one angle
bindProjDataTexture(D_projData, projPitch, dims.iProjDets, 1, cudaAddressModeClamp);
- // transfer angle to constant memory
-#define TRANSFER_TO_CONSTANT(name) do { cudaMemcpyToSymbol(gC_##name, &(angles[angle].f##name), sizeof(float), 0, cudaMemcpyHostToDevice); } while (0)
-
- TRANSFER_TO_CONSTANT(SrcX);
- TRANSFER_TO_CONSTANT(SrcY);
- TRANSFER_TO_CONSTANT(DetSX);
- TRANSFER_TO_CONSTANT(DetSY);
- TRANSFER_TO_CONSTANT(DetUX);
- TRANSFER_TO_CONSTANT(DetUY);
-
-#undef TRANSFER_TO_CONSTANT
+ bool ok = transferConstants(angles + angle, 1, false);
+ if (!ok)
+ return false;
dim3 dimBlock(g_blockSlices, g_blockSliceSize);
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
@@ -448,66 +434,3 @@ bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch,
}
-
-#ifdef STANDALONE
-
-using namespace astraCUDA;
-
-int main()
-{
- float* D_volumeData;
- float* D_projData;
-
- SDimensions dims;
- dims.iVolWidth = 128;
- dims.iVolHeight = 128;
- dims.iProjAngles = 180;
- dims.iProjDets = 256;
- dims.fDetScale = 1.0f;
- dims.iRaysPerDet = 1;
- unsigned int volumePitch, projPitch;
-
- SFanProjection projs[180];
-
- projs[0].fSrcX = 0.0f;
- projs[0].fSrcY = 1536.0f;
- projs[0].fDetSX = 128.0f;
- projs[0].fDetSY = -512.0f;
- projs[0].fDetUX = -1.0f;
- projs[0].fDetUY = 0.0f;
-
-#define ROTATE0(name,i,alpha) do { projs[i].f##name##X = projs[0].f##name##X * cos(alpha) - projs[0].f##name##Y * sin(alpha); projs[i].f##name##Y = projs[0].f##name##X * sin(alpha) + projs[0].f##name##Y * cos(alpha); } while(0)
-
- for (int i = 1; i < 180; ++i) {
- ROTATE0(Src, i, i*2*M_PI/180);
- ROTATE0(DetS, i, i*2*M_PI/180);
- ROTATE0(DetU, i, i*2*M_PI/180);
- }
-
-#undef ROTATE0
-
- allocateVolume(D_volumeData, dims.iVolWidth, dims.iVolHeight, volumePitch);
- printf("pitch: %u\n", volumePitch);
-
- allocateVolume(D_projData, dims.iProjDets, dims.iProjAngles, projPitch);
- printf("pitch: %u\n", projPitch);
-
- unsigned int y, x;
- float* sino = loadImage("sino.png", y, x);
-
- float* img = new float[dims.iVolWidth*dims.iVolHeight];
-
- memset(img, 0, dims.iVolWidth*dims.iVolHeight*sizeof(float));
-
- copyVolumeToDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);
- copySinogramToDevice(sino, dims.iProjDets, dims.iProjDets, dims.iProjAngles, D_projData, projPitch);
-
- FanBP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs, 1.0f);
-
- copyVolumeFromDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);
-
- saveImage("vol.png",dims.iVolHeight,dims.iVolWidth,img);
-
- return 0;
-}
-#endif
diff --git a/cuda/2d/fan_fp.cu b/cuda/2d/fan_fp.cu
index 3479650..60c02f8 100644
--- a/cuda/2d/fan_fp.cu
+++ b/cuda/2d/fan_fp.cu
@@ -28,10 +28,6 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
#include "astra/cuda/2d/util.h"
#include "astra/cuda/2d/arith.h"
-#ifdef STANDALONE
-#include "testutil.h"
-#endif
-
#include <cstdio>
#include <cassert>
#include <iostream>
@@ -308,84 +304,3 @@ bool FanFP(float* D_volumeData, unsigned int volumePitch,
}
}
-
-#ifdef STANDALONE
-
-using namespace astraCUDA;
-
-int main()
-{
- float* D_volumeData;
- float* D_projData;
-
- SDimensions dims;
- dims.iVolWidth = 128;
- dims.iVolHeight = 128;
- dims.iProjAngles = 180;
- dims.iProjDets = 256;
- dims.fDetScale = 1.0f;
- dims.iRaysPerDet = 1;
- unsigned int volumePitch, projPitch;
-
- SFanProjection projs[180];
-
- projs[0].fSrcX = 0.0f;
- projs[0].fSrcY = 1536.0f;
- projs[0].fDetSX = 128.0f;
- projs[0].fDetSY = -512.0f;
- projs[0].fDetUX = -1.0f;
- projs[0].fDetUY = 0.0f;
-
-#define ROTATE0(name,i,alpha) do { projs[i].f##name##X = projs[0].f##name##X * cos(alpha) - projs[0].f##name##Y * sin(alpha); projs[i].f##name##Y = projs[0].f##name##X * sin(alpha) + projs[0].f##name##Y * cos(alpha); } while(0)
-
- for (int i = 1; i < 180; ++i) {
- ROTATE0(Src, i, i*2*M_PI/180);
- ROTATE0(DetS, i, i*2*M_PI/180);
- ROTATE0(DetU, i, i*2*M_PI/180);
- }
-
-#undef ROTATE0
-
- allocateVolume(D_volumeData, dims.iVolWidth, dims.iVolHeight, volumePitch);
- printf("pitch: %u\n", volumePitch);
-
- allocateVolume(D_projData, dims.iProjDets, dims.iProjAngles, projPitch);
- printf("pitch: %u\n", projPitch);
-
- unsigned int y, x;
- float* img = loadImage("phantom128.png", y, x);
-
- float* sino = new float[dims.iProjAngles * dims.iProjDets];
-
- memset(sino, 0, dims.iProjAngles * dims.iProjDets * sizeof(float));
-
- copyVolumeToDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);
- copySinogramToDevice(sino, dims.iProjDets, dims.iProjDets, dims.iProjAngles, D_projData, projPitch);
-
- float* angle = new float[dims.iProjAngles];
-
- for (unsigned int i = 0; i < dims.iProjAngles; ++i)
- angle[i] = i*(M_PI/dims.iProjAngles);
-
- FanFP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs, 1.0f);
-
- delete[] angle;
-
- copySinogramFromDevice(sino, dims.iProjDets, dims.iProjDets, dims.iProjAngles, D_projData, projPitch);
-
- float s = 0.0f;
- for (unsigned int y = 0; y < dims.iProjAngles; ++y)
- for (unsigned int x = 0; x < dims.iProjDets; ++x)
- s += sino[y*dims.iProjDets+x] * sino[y*dims.iProjDets+x];
- printf("cpu norm: %f\n", s);
-
- //zeroVolume(D_projData, projPitch, dims.iProjDets, dims.iProjAngles);
- s = dotProduct2D(D_projData, projPitch, dims.iProjDets, dims.iProjAngles);
- printf("gpu norm: %f\n", s);
-
- saveImage("sino.png",dims.iProjAngles,dims.iProjDets,sino);
-
-
- return 0;
-}
-#endif
diff --git a/cuda/2d/fbp.cu b/cuda/2d/fbp.cu
index a5b8a7a..4fc3983 100644
--- a/cuda/2d/fbp.cu
+++ b/cuda/2d/fbp.cu
@@ -58,7 +58,8 @@ int FBP::calcFourierFilterSize(int _iDetectorCount)
FBP::FBP() : ReconAlgo()
{
D_filter = 0;
-
+ m_bShortScan = false;
+ fReconstructionScale = 1.0f;
}
FBP::~FBP()
@@ -72,6 +73,8 @@ void FBP::reset()
freeComplexOnDevice((cufftComplex *)D_filter);
D_filter = 0;
}
+ m_bShortScan = false;
+ fReconstructionScale = 1.0f;
}
bool FBP::init()
@@ -79,6 +82,12 @@ bool FBP::init()
return true;
}
+bool FBP::setReconstructionScale(float fScale)
+{
+ fReconstructionScale = fScale;
+ return true;
+}
+
bool FBP::setFilter(const astra::SFilterConfig &_cfg)
{
if (D_filter)
@@ -292,7 +301,7 @@ bool FBP::iterate(unsigned int iterations)
astraCUDA3d::FDK_PreWeight(tmp, fOriginSource,
fOriginDetector, 0.0f,
- fFanDetSize, 1.0f, /* fPixelSize */ 1.0f,
+ fFanDetSize, 1.0f,
m_bShortScan, dims3d, pfAngles);
} else {
// TODO: How should different detector pixel size in different
@@ -319,17 +328,14 @@ bool FBP::iterate(unsigned int iterations)
}
if (fanProjs) {
- float fOutputScale = 1.0 / (/*fPixelSize * fPixelSize * fPixelSize * */ fFanDetSize * fFanDetSize);
-
- ok = FanBP_FBPWeighted(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, fanProjs, fOutputScale);
+ ok = FanBP_FBPWeighted(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, fanProjs, fProjectorScale * fReconstructionScale);
} else {
// scale by number of angles. For the fan-beam case, this is already
// handled by FDK_PreWeight
float fOutputScale = (M_PI / 2.0f) / (float)dims.iProjAngles;
- //fOutputScale /= fDetSize * fDetSize;
- ok = BP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, parProjs, fOutputScale);
+ ok = BP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, parProjs, fOutputScale * fProjectorScale * fReconstructionScale);
}
if(!ok)
{
diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu
index 2e94b79..8361ad2 100644
--- a/cuda/2d/fft.cu
+++ b/cuda/2d/fft.cu
@@ -314,210 +314,3 @@ void genCuFFTFilter(const SFilterConfig &_cfg, int _iProjectionCount,
}
-
-
-#ifdef STANDALONE
-
-__global__ static void doubleFourierOutput_kernel(int _iProjectionCount,
- int _iDetectorCount,
- cufftComplex* _pFourierOutput)
-{
- int iIndex = threadIdx.x + blockIdx.x * blockDim.x;
- int iProjectionIndex = iIndex / _iDetectorCount;
- int iDetectorIndex = iIndex % _iDetectorCount;
-
- if(iProjectionIndex >= _iProjectionCount)
- {
- return;
- }
-
- if(iDetectorIndex <= (_iDetectorCount / 2))
- {
- return;
- }
-
- int iOtherDetectorIndex = _iDetectorCount - iDetectorIndex;
-
- _pFourierOutput[iProjectionIndex * _iDetectorCount + iDetectorIndex].x = _pFourierOutput[iProjectionIndex * _iDetectorCount + iOtherDetectorIndex].x;
- _pFourierOutput[iProjectionIndex * _iDetectorCount + iDetectorIndex].y = -_pFourierOutput[iProjectionIndex * _iDetectorCount + iOtherDetectorIndex].y;
-}
-
-static void doubleFourierOutput(int _iProjectionCount, int _iDetectorCount,
- cufftComplex * _pFourierOutput)
-{
- const int iBlockSize = 256;
- int iElementCount = _iProjectionCount * _iDetectorCount;
- int iBlockCount = (iElementCount + iBlockSize - 1) / iBlockSize;
-
- doubleFourierOutput_kernel<<< iBlockCount, iBlockSize >>>(_iProjectionCount,
- _iDetectorCount,
- _pFourierOutput);
- CHECK_ERROR("doubleFourierOutput_kernel failed");
-}
-
-
-
-static void writeToMatlabFile(const char * _fileName, const float * _pfData,
- int _iRowCount, int _iColumnCount)
-{
- std::ofstream out(_fileName);
-
- for(int iRowIndex = 0; iRowIndex < _iRowCount; iRowIndex++)
- {
- for(int iColumnIndex = 0; iColumnIndex < _iColumnCount; iColumnIndex++)
- {
- out << _pfData[iColumnIndex + iRowIndex * _iColumnCount] << " ";
- }
-
- out << std::endl;
- }
-}
-
-static void convertComplexToRealImg(const cufftComplex * _pComplex,
- int _iElementCount,
- float * _pfReal, float * _pfImaginary)
-{
- for(int iIndex = 0; iIndex < _iElementCount; iIndex++)
- {
- _pfReal[iIndex] = _pComplex[iIndex].x;
- _pfImaginary[iIndex] = _pComplex[iIndex].y;
- }
-}
-
-void testCudaFFT()
-{
- const int iProjectionCount = 2;
- const int iDetectorCount = 1024;
- const int iTotalElementCount = iProjectionCount * iDetectorCount;
-
- float * pfHostProj = new float[iTotalElementCount];
- memset(pfHostProj, 0, sizeof(float) * iTotalElementCount);
-
- for(int iProjectionIndex = 0; iProjectionIndex < iProjectionCount; iProjectionIndex++)
- {
- for(int iDetectorIndex = 0; iDetectorIndex < iDetectorCount; iDetectorIndex++)
- {
-// int
-
-// pfHostProj[iIndex] = (float)rand() / (float)RAND_MAX;
- }
- }
-
- writeToMatlabFile("proj.mat", pfHostProj, iProjectionCount, iDetectorCount);
-
- float * pfDevProj = NULL;
- SAFE_CALL(cudaMalloc((void **)&pfDevProj, sizeof(float) * iTotalElementCount));
- SAFE_CALL(cudaMemcpy(pfDevProj, pfHostProj, sizeof(float) * iTotalElementCount, cudaMemcpyHostToDevice));
-
- cufftComplex * pDevFourProj = NULL;
- SAFE_CALL(cudaMalloc((void **)&pDevFourProj, sizeof(cufftComplex) * iTotalElementCount));
-
- cufftHandle plan;
- cufftResult result;
-
- result = cufftPlan1d(&plan, iDetectorCount, CUFFT_R2C, iProjectionCount);
- if(result != CUFFT_SUCCESS)
- {
- ASTRA_ERROR("Failed to plan 1d r2c fft");
- }
-
- result = cufftExecR2C(plan, pfDevProj, pDevFourProj);
- if(result != CUFFT_SUCCESS)
- {
- ASTRA_ERROR("Failed to exec 1d r2c fft");
- }
-
- cufftDestroy(plan);
-
- doubleFourierOutput(iProjectionCount, iDetectorCount, pDevFourProj);
-
- cufftComplex * pHostFourProj = new cufftComplex[iTotalElementCount];
- SAFE_CALL(cudaMemcpy(pHostFourProj, pDevFourProj, sizeof(cufftComplex) * iTotalElementCount, cudaMemcpyDeviceToHost));
-
- float * pfHostFourProjReal = new float[iTotalElementCount];
- float * pfHostFourProjImaginary = new float[iTotalElementCount];
-
- convertComplexToRealImg(pHostFourProj, iTotalElementCount, pfHostFourProjReal, pfHostFourProjImaginary);
-
- writeToMatlabFile("proj_four_real.mat", pfHostFourProjReal, iProjectionCount, iDetectorCount);
- writeToMatlabFile("proj_four_imaginary.mat", pfHostFourProjImaginary, iProjectionCount, iDetectorCount);
-
- float * pfDevInFourProj = NULL;
- SAFE_CALL(cudaMalloc((void **)&pfDevInFourProj, sizeof(float) * iTotalElementCount));
-
- result = cufftPlan1d(&plan, iDetectorCount, CUFFT_C2R, iProjectionCount);
- if(result != CUFFT_SUCCESS)
- {
- ASTRA_ERROR("Failed to plan 1d c2r fft");
- }
-
- result = cufftExecC2R(plan, pDevFourProj, pfDevInFourProj);
- if(result != CUFFT_SUCCESS)
- {
- ASTRA_ERROR("Failed to exec 1d c2r fft");
- }
-
- cufftDestroy(plan);
-
- rescaleInverseFourier(iProjectionCount, iDetectorCount, pfDevInFourProj);
-
- float * pfHostInFourProj = new float[iTotalElementCount];
- SAFE_CALL(cudaMemcpy(pfHostInFourProj, pfDevInFourProj, sizeof(float) * iTotalElementCount, cudaMemcpyDeviceToHost));
-
- writeToMatlabFile("in_four.mat", pfHostInFourProj, iProjectionCount, iDetectorCount);
-
- SAFE_CALL(cudaFree(pDevFourProj));
- SAFE_CALL(cudaFree(pfDevProj));
-
- delete [] pfHostInFourProj;
- delete [] pfHostFourProjReal;
- delete [] pfHostFourProjImaginary;
- delete [] pfHostProj;
- delete [] pHostFourProj;
-}
-
-void downloadDebugFilterComplex(float * _pfHostSinogram, int _iProjectionCount,
- int _iDetectorCount,
- cufftComplex * _pDevFilter,
- int _iFilterDetCount)
-{
- cufftComplex * pHostFilter = NULL;
- size_t complMemSize = sizeof(cufftComplex) * _iFilterDetCount * _iProjectionCount;
- pHostFilter = (cufftComplex *)malloc(complMemSize);
- SAFE_CALL(cudaMemcpy(pHostFilter, _pDevFilter, complMemSize, cudaMemcpyDeviceToHost));
-
- for(int iTargetProjIndex = 0; iTargetProjIndex < _iProjectionCount; iTargetProjIndex++)
- {
- for(int iTargetDetIndex = 0; iTargetDetIndex < min(_iDetectorCount, _iFilterDetCount); iTargetDetIndex++)
- {
- cufftComplex source = pHostFilter[iTargetDetIndex + iTargetProjIndex * _iFilterDetCount];
- float fReadValue = sqrtf(source.x * source.x + source.y * source.y);
- _pfHostSinogram[iTargetDetIndex + iTargetProjIndex * _iDetectorCount] = fReadValue;
- }
- }
-
- free(pHostFilter);
-}
-
-void downloadDebugFilterReal(float * _pfHostSinogram, int _iProjectionCount,
- int _iDetectorCount, float * _pfDevFilter,
- int _iFilterDetCount)
-{
- float * pfHostFilter = NULL;
- size_t memSize = sizeof(float) * _iFilterDetCount * _iProjectionCount;
- pfHostFilter = (float *)malloc(memSize);
- SAFE_CALL(cudaMemcpy(pfHostFilter, _pfDevFilter, memSize, cudaMemcpyDeviceToHost));
-
- for(int iTargetProjIndex = 0; iTargetProjIndex < _iProjectionCount; iTargetProjIndex++)
- {
- for(int iTargetDetIndex = 0; iTargetDetIndex < min(_iDetectorCount, _iFilterDetCount); iTargetDetIndex++)
- {
- float fSource = pfHostFilter[iTargetDetIndex + iTargetProjIndex * _iFilterDetCount];
- _pfHostSinogram[iTargetDetIndex + iTargetProjIndex * _iDetectorCount] = fSource;
- }
- }
-
- free(pfHostFilter);
-}
-
-#endif
diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu
index 09a6554..f080abb 100644
--- a/cuda/2d/par_bp.cu
+++ b/cuda/2d/par_bp.cu
@@ -28,10 +28,6 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
#include "astra/cuda/2d/util.h"
#include "astra/cuda/2d/arith.h"
-#ifdef STANDALONE
-#include "testutil.h"
-#endif
-
#include <cstdio>
#include <cassert>
#include <iostream>
@@ -53,6 +49,7 @@ const unsigned int g_MaxAngles = 2560;
__constant__ float gC_angle_scaled_sin[g_MaxAngles];
__constant__ float gC_angle_scaled_cos[g_MaxAngles];
__constant__ float gC_angle_offset[g_MaxAngles];
+__constant__ float gC_angle_scale[g_MaxAngles];
static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int width, unsigned int height, cudaTextureAddressMode mode = cudaAddressModeBorder)
{
@@ -70,6 +67,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi
return true;
}
+// TODO: Templated version with/without scale? (Or only the global outputscale)
__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
@@ -97,9 +95,10 @@ __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int star
const float scaled_cos_theta = gC_angle_scaled_cos[angle];
const float scaled_sin_theta = gC_angle_scaled_sin[angle];
const float TOffset = gC_angle_offset[angle];
+ const float scale = gC_angle_scale[angle];
const float fT = fX * scaled_cos_theta - fY * scaled_sin_theta + TOffset;
- fVal += tex2D(gT_projTexture, fT, fA);
+ fVal += tex2D(gT_projTexture, fT, fA) * scale;
fA += 1.0f;
}
@@ -138,6 +137,7 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s
const float cos_theta = gC_angle_scaled_cos[angle];
const float sin_theta = gC_angle_scaled_sin[angle];
const float TOffset = gC_angle_offset[angle];
+ const float scale = gC_angle_scale[angle];
float fT = fX * cos_theta - fY * sin_theta + TOffset;
@@ -145,7 +145,7 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s
float fTy = fT;
fT += fSubStep * cos_theta;
for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) {
- fVal += tex2D(gT_projTexture, fTy, fA);
+ fVal += tex2D(gT_projTexture, fTy, fA) * scale;
fTy -= fSubStep * sin_theta;
}
}
@@ -172,6 +172,8 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset
const float fT = fX * angle_cos - fY * angle_sin + offset;
const float fVal = tex2D(gT_projTexture, fT, 0.5f);
+ // NB: The 'scale' constant in devBP is cancelled out by the SART weighting
+
D_volData[Y*volPitch+X] += fVal * fOutputScale;
}
@@ -186,27 +188,34 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,
float* angle_scaled_sin = new float[dims.iProjAngles];
float* angle_scaled_cos = new float[dims.iProjAngles];
float* angle_offset = new float[dims.iProjAngles];
+ float* angle_scale = new float[dims.iProjAngles];
bindProjDataTexture(D_projData, projPitch, dims.iProjDets, dims.iProjAngles);
for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
double d = angles[i].fDetUX * angles[i].fRayY - angles[i].fDetUY * angles[i].fRayX;
angle_scaled_cos[i] = angles[i].fRayY / d;
- angle_scaled_sin[i] = -angles[i].fRayX / d; // TODO: Check signs
+ angle_scaled_sin[i] = -angles[i].fRayX / d;
angle_offset[i] = (angles[i].fDetSY * angles[i].fRayX - angles[i].fDetSX * angles[i].fRayY) / d;
+ angle_scale[i] = sqrt(angles[i].fRayX * angles[i].fRayX + angles[i].fRayY * angles[i].fRayY) / abs(d);
}
+ //fprintf(stderr, "outputscale in BP_internal: %f, %f\n", fOutputScale, angle_scale[0]);
+ //fprintf(stderr, "ray in BP_internal: %f,%f (length %f)\n", angles[0].fRayX, angles[0].fRayY, sqrt(angles[0].fRayX * angles[0].fRayX + angles[0].fRayY * angles[0].fRayY));
cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_scaled_sin, angle_scaled_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_scaled_cos, angle_scaled_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
cudaError_t e3 = cudaMemcpyToSymbol(gC_angle_offset, angle_offset, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
+ cudaError_t e4 = cudaMemcpyToSymbol(gC_angle_scale, angle_scale, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
assert(e1 == cudaSuccess);
assert(e2 == cudaSuccess);
assert(e3 == cudaSuccess);
+ assert(e4 == cudaSuccess);
delete[] angle_scaled_sin;
delete[] angle_scaled_cos;
delete[] angle_offset;
+ delete[] angle_scale;
dim3 dimBlock(g_blockSlices, g_blockSliceSize);
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
@@ -267,6 +276,8 @@ bool BP_SART(float* D_volumeData, unsigned int volumePitch,
float angle_scaled_cos = angles[angle].fRayY / d;
float angle_scaled_sin = -angles[angle].fRayX / d; // TODO: Check signs
float angle_offset = (angles[angle].fDetSY * angles[angle].fRayX - angles[angle].fDetSX * angles[angle].fRayY) / d;
+ // NB: The adjoint scaling factor from regular BP is cancelled out by the SART weighting
+ //fOutputScale *= sqrt(angles[angle].fRayX * angles[angle].fRayX + angles[angle].fRayY * angles[angle].fRayY) / abs(d);
dim3 dimBlock(g_blockSlices, g_blockSliceSize);
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
@@ -282,55 +293,3 @@ bool BP_SART(float* D_volumeData, unsigned int volumePitch,
}
-
-#ifdef STANDALONE
-
-using namespace astraCUDA;
-
-int main()
-{
- float* D_volumeData;
- float* D_projData;
-
- SDimensions dims;
- dims.iVolWidth = 1024;
- dims.iVolHeight = 1024;
- dims.iProjAngles = 512;
- dims.iProjDets = 1536;
- dims.fDetScale = 1.0f;
- dims.iRaysPerDet = 1;
-
- unsigned int volumePitch, projPitch;
-
- allocateVolume(D_volumeData, dims.iVolWidth, dims.iVolHeight, volumePitch);
- printf("pitch: %u\n", volumePitch);
-
- allocateVolume(D_projData, dims.iProjDets, dims.iProjAngles, projPitch);
- printf("pitch: %u\n", projPitch);
-
- unsigned int y, x;
- float* sino = loadImage("sino.png", y, x);
-
- float* img = new float[dims.iVolWidth*dims.iVolHeight];
-
- memset(img, 0, dims.iVolWidth*dims.iVolHeight*sizeof(float));
-
- copyVolumeToDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);
- copySinogramToDevice(sino, dims.iProjDets, dims.iProjDets, dims.iProjAngles, D_projData, projPitch);
-
- float* angle = new float[dims.iProjAngles];
-
- for (unsigned int i = 0; i < dims.iProjAngles; ++i)
- angle[i] = i*(M_PI/dims.iProjAngles);
-
- BP(D_volumeData, volumePitch, D_projData, projPitch, dims, angle, 0, 1.0f);
-
- delete[] angle;
-
- copyVolumeFromDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);
-
- saveImage("vol.png",dims.iVolHeight,dims.iVolWidth,img);
-
- return 0;
-}
-#endif
diff --git a/cuda/2d/par_fp.cu b/cuda/2d/par_fp.cu
index 0835301..ea436c3 100644
--- a/cuda/2d/par_fp.cu
+++ b/cuda/2d/par_fp.cu
@@ -28,10 +28,6 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
#include "astra/cuda/2d/util.h"
#include "astra/cuda/2d/arith.h"
-#ifdef STANDALONE
-#include "testutil.h"
-#endif
-
#include <cstdio>
#include <cassert>
#include <iostream>
@@ -115,10 +111,9 @@ __global__ void FPhorizontal_simple(float* D_projData, unsigned int projPitch, u
float fSliceStep = cos_theta / sin_theta;
float fDistCorr;
if (sin_theta > 0.0f)
- fDistCorr = -fDetStep;
+ fDistCorr = outputScale / sin_theta;
else
- fDistCorr = fDetStep;
- fDistCorr *= outputScale;
+ fDistCorr = -outputScale / sin_theta;
float fVal = 0.0f;
// project detector on slice
@@ -193,10 +188,9 @@ __global__ void FPvertical_simple(float* D_projData, unsigned int projPitch, uns
float fSliceStep = sin_theta / cos_theta;
float fDistCorr;
if (cos_theta < 0.0f)
- fDistCorr = -fDetStep;
+ fDistCorr = -outputScale / cos_theta;
else
- fDistCorr = fDetStep;
- fDistCorr *= outputScale;
+ fDistCorr = outputScale / cos_theta;
float fVal = 0.0f;
float fP = (detector - 0.5f*dims.iProjDets + 0.5f - gC_angle_offset[angle]) * fDetStep + (startSlice - 0.5f*dims.iVolHeight + 0.5f) * fSliceStep + 0.5f*dims.iVolWidth - 0.5f + 0.5f;
@@ -375,65 +369,3 @@ bool FP(float* D_volumeData, unsigned int volumePitch,
}
-
-#ifdef STANDALONE
-
-using namespace astraCUDA;
-
-int main()
-{
- float* D_volumeData;
- float* D_projData;
-
- SDimensions dims;
- dims.iVolWidth = 1024;
- dims.iVolHeight = 1024;
- dims.iProjAngles = 512;
- dims.iProjDets = 1536;
- dims.fDetScale = 1.0f;
- dims.iRaysPerDet = 1;
- unsigned int volumePitch, projPitch;
-
- allocateVolume(D_volumeData, dims.iVolWidth, dims.iVolHeight, volumePitch);
- printf("pitch: %u\n", volumePitch);
-
- allocateVolume(D_projData, dims.iProjDets, dims.iProjAngles, projPitch);
- printf("pitch: %u\n", projPitch);
-
- unsigned int y, x;
- float* img = loadImage("phantom.png", y, x);
-
- float* sino = new float[dims.iProjAngles * dims.iProjDets];
-
- memset(sino, 0, dims.iProjAngles * dims.iProjDets * sizeof(float));
-
- copyVolumeToDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);
- copySinogramToDevice(sino, dims.iProjDets, dims.iProjDets, dims.iProjAngles, D_projData, projPitch);
-
- float* angle = new float[dims.iProjAngles];
-
- for (unsigned int i = 0; i < dims.iProjAngles; ++i)
- angle[i] = i*(M_PI/dims.iProjAngles);
-
- FP(D_volumeData, volumePitch, D_projData, projPitch, dims, angle, 0, 1.0f);
-
- delete[] angle;
-
- copySinogramFromDevice(sino, dims.iProjDets, dims.iProjDets, dims.iProjAngles, D_projData, projPitch);
-
- float s = 0.0f;
- for (unsigned int y = 0; y < dims.iProjAngles; ++y)
- for (unsigned int x = 0; x < dims.iProjDets; ++x)
- s += sino[y*dims.iProjDets+x] * sino[y*dims.iProjDets+x];
- printf("cpu norm: %f\n", s);
-
- //zeroVolume(D_projData, projPitch, dims.iProjDets, dims.iProjAngles);
- s = dotProduct2D(D_projData, projPitch, dims.iProjDets, dims.iProjAngles, 1, 0);
- printf("gpu norm: %f\n", s);
-
- saveImage("sino.png",dims.iProjAngles,dims.iProjDets,sino);
-
-
- return 0;
-}
-#endif
diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu
index 64973ba..12ad6df 100644
--- a/cuda/2d/sart.cu
+++ b/cuda/2d/sart.cu
@@ -254,11 +254,11 @@ bool SART::callFP_SART(float* D_volumeData, unsigned int volumePitch,
if (parProjs) {
assert(!fanProjs);
return FP(D_volumeData, volumePitch, D_projData, projPitch,
- d, &parProjs[angle], outputScale);
+ d, &parProjs[angle], outputScale * fProjectorScale);
} else {
assert(fanProjs);
return FanFP(D_volumeData, volumePitch, D_projData, projPitch,
- d, &fanProjs[angle], outputScale);
+ d, &fanProjs[angle], outputScale * fProjectorScale);
}
}
@@ -266,6 +266,7 @@ bool SART::callBP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
unsigned int angle, float outputScale)
{
+ // NB: No fProjectorScale here, as that it is cancelled out in the SART weighting
if (parProjs) {
assert(!fanProjs);
return BP_SART(D_volumeData, volumePitch, D_projData, projPitch,
diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu
index 2621490..2c5fdc9 100644
--- a/cuda/2d/sirt.cu
+++ b/cuda/2d/sirt.cu
@@ -29,10 +29,6 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
#include "astra/cuda/2d/util.h"
#include "astra/cuda/2d/arith.h"
-#ifdef STANDALONE
-#include "testutil.h"
-#endif
-
#include <cstdio>
#include <cassert>
@@ -302,62 +298,3 @@ float SIRT::computeDiffNorm()
}
-
-#ifdef STANDALONE
-
-using namespace astraCUDA;
-
-int main()
-{
- float* D_volumeData;
- float* D_sinoData;
-
- SDimensions dims;
- dims.iVolWidth = 1024;
- dims.iVolHeight = 1024;
- dims.iProjAngles = 512;
- dims.iProjDets = 1536;
- dims.fDetScale = 1.0f;
- dims.iRaysPerDet = 1;
- unsigned int volumePitch, sinoPitch;
-
- allocateVolume(D_volumeData, dims.iVolWidth, dims.iVolHeight, volumePitch);
- zeroVolume(D_volumeData, volumePitch, dims.iVolWidth, dims.iVolHeight);
- printf("pitch: %u\n", volumePitch);
-
- allocateVolume(D_sinoData, dims.iProjDets, dims.iProjAngles, sinoPitch);
- zeroVolume(D_sinoData, sinoPitch, dims.iProjDets, dims.iProjAngles);
- printf("pitch: %u\n", sinoPitch);
-
- unsigned int y, x;
- float* sino = loadImage("sino.png", y, x);
-
- float* img = new float[dims.iVolWidth*dims.iVolHeight];
-
- copySinogramToDevice(sino, dims.iProjDets, dims.iProjDets, dims.iProjAngles, D_sinoData, sinoPitch);
-
- float* angle = new float[dims.iProjAngles];
-
- for (unsigned int i = 0; i < dims.iProjAngles; ++i)
- angle[i] = i*(M_PI/dims.iProjAngles);
-
- SIRT sirt;
-
- sirt.setGeometry(dims, angle);
- sirt.init();
-
- sirt.setBuffers(D_volumeData, volumePitch, D_sinoData, sinoPitch);
-
- sirt.iterate(25);
-
-
- delete[] angle;
-
- copyVolumeFromDevice(img, dims.iVolWidth, dims, D_volumeData, volumePitch);
-
- saveImage("vol.png",dims.iVolHeight,dims.iVolWidth,img);
-
- return 0;
-}
-#endif
-