summaryrefslogtreecommitdiffstats
path: root/cuda
diff options
context:
space:
mode:
authorWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2015-12-04 16:35:00 +0100
committerWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2015-12-04 16:35:00 +0100
commitf3ac1849f2b141ea1845752e1ca2317845e90e3a (patch)
treeadcdc9013af5cca4165857408ed7181ffdf0de64 /cuda
parent8144bf0397ee1913b830d82058ccd40df741f1b3 (diff)
parent0d015b1c91581ee5ef3e936f03e4c62fbc7ea362 (diff)
downloadastra-f3ac1849f2b141ea1845752e1ca2317845e90e3a.tar.gz
astra-f3ac1849f2b141ea1845752e1ca2317845e90e3a.tar.bz2
astra-f3ac1849f2b141ea1845752e1ca2317845e90e3a.tar.xz
astra-f3ac1849f2b141ea1845752e1ca2317845e90e3a.zip
Merge branch 'master'
Diffstat (limited to 'cuda')
-rw-r--r--cuda/2d/algo.cu7
-rw-r--r--cuda/2d/algo.h3
-rw-r--r--cuda/2d/astra.cu12
-rw-r--r--cuda/2d/cgls.cu4
-rw-r--r--cuda/2d/em.cu4
-rw-r--r--cuda/2d/fan_bp.cu47
-rw-r--r--cuda/2d/fan_bp.h9
-rw-r--r--cuda/2d/par_bp.cu31
-rw-r--r--cuda/2d/par_bp.h4
-rw-r--r--cuda/2d/sart.cu10
-rw-r--r--cuda/2d/sart.h2
-rw-r--r--cuda/2d/sirt.cu6
-rw-r--r--cuda/3d/algo3d.cu18
-rw-r--r--cuda/3d/algo3d.h9
-rw-r--r--cuda/3d/astra3d.cu1005
-rw-r--r--cuda/3d/astra3d.h215
-rw-r--r--cuda/3d/cgls3d.cu6
-rw-r--r--cuda/3d/cone_bp.cu26
-rw-r--r--cuda/3d/cone_bp.h7
-rw-r--r--cuda/3d/mem3d.cu270
-rw-r--r--cuda/3d/mem3d.h99
-rw-r--r--cuda/3d/par3d_bp.cu25
-rw-r--r--cuda/3d/par3d_bp.h6
-rw-r--r--cuda/3d/sirt3d.cu8
24 files changed, 856 insertions, 977 deletions
diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu
index 144fabd..dc74e51 100644
--- a/cuda/2d/algo.cu
+++ b/cuda/2d/algo.cu
@@ -336,16 +336,17 @@ bool ReconAlgo::callFP(float* D_volumeData, unsigned int volumePitch,
}
bool ReconAlgo::callBP(float* D_volumeData, unsigned int volumePitch,
- float* D_projData, unsigned int projPitch)
+ float* D_projData, unsigned int projPitch,
+ float outputScale)
{
if (angles) {
assert(!fanProjs);
return BP(D_volumeData, volumePitch, D_projData, projPitch,
- dims, angles, TOffsets);
+ dims, angles, TOffsets, outputScale);
} else {
assert(fanProjs);
return FanBP(D_volumeData, volumePitch, D_projData, projPitch,
- dims, fanProjs);
+ dims, fanProjs, outputScale);
}
}
diff --git a/cuda/2d/algo.h b/cuda/2d/algo.h
index a75905e..99959c8 100644
--- a/cuda/2d/algo.h
+++ b/cuda/2d/algo.h
@@ -118,7 +118,8 @@ protected:
float* D_projData, unsigned int projPitch,
float outputScale);
bool callBP(float* D_volumeData, unsigned int volumePitch,
- float* D_projData, unsigned int projPitch);
+ float* D_projData, unsigned int projPitch,
+ float outputScale);
SDimensions dims;
diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu
index 2f72db0..4c69628 100644
--- a/cuda/2d/astra.cu
+++ b/cuda/2d/astra.cu
@@ -368,21 +368,19 @@ bool AstraFBP::run()
}
+ float fOutputScale = (M_PI / 2.0f) / (float)pData->dims.iProjAngles;
+
if (pData->bFanBeam) {
- ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections);
+ ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections, fOutputScale);
} else {
- ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets);
+ ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets, fOutputScale);
}
if(!ok)
{
return false;
}
- processVol<opMul>(pData->D_volumeData,
- (M_PI / 2.0f) / (float)pData->dims.iProjAngles,
- pData->volumePitch, pData->dims);
-
return true;
}
@@ -594,7 +592,7 @@ bool BPalgo::iterate(unsigned int)
{
// TODO: This zeroVolume makes an earlier memcpy of D_volumeData redundant
zeroVolumeData(D_volumeData, volumePitch, dims);
- callBP(D_volumeData, volumePitch, D_sinoData, sinoPitch);
+ callBP(D_volumeData, volumePitch, D_sinoData, sinoPitch, 1.0f);
return true;
}
diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu
index 9ead563..f402914 100644
--- a/cuda/2d/cgls.cu
+++ b/cuda/2d/cgls.cu
@@ -135,7 +135,7 @@ bool CGLS::iterate(unsigned int iterations)
// p = A'*r
zeroVolumeData(D_p, pPitch, dims);
- callBP(D_p, pPitch, D_r, rPitch);
+ callBP(D_p, pPitch, D_r, rPitch, 1.0f);
if (useVolumeMask)
processVol<opMul>(D_p, D_maskData, pPitch, dims);
@@ -166,7 +166,7 @@ bool CGLS::iterate(unsigned int iterations)
// z = A'*r
zeroVolumeData(D_z, zPitch, dims);
- callBP(D_z, zPitch, D_r, rPitch);
+ callBP(D_z, zPitch, D_r, rPitch, 1.0f);
if (useVolumeMask)
processVol<opMul>(D_z, D_maskData, zPitch, dims);
diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu
index 00127c0..8593b08 100644
--- a/cuda/2d/em.cu
+++ b/cuda/2d/em.cu
@@ -102,7 +102,7 @@ bool EM::precomputeWeights()
#endif
{
processSino<opSet>(D_projData, 1.0f, projPitch, dims);
- callBP(D_pixelWeight, pixelPitch, D_projData, projPitch);
+ callBP(D_pixelWeight, pixelPitch, D_projData, projPitch, 1.0f);
}
processVol<opInvert>(D_pixelWeight, pixelPitch, dims);
@@ -137,7 +137,7 @@ bool EM::iterate(unsigned int iterations)
// Do BP of projData into tmpData
zeroVolumeData(D_tmpData, tmpPitch, dims);
- callBP(D_tmpData, tmpPitch, D_projData, projPitch);
+ callBP(D_tmpData, tmpPitch, D_projData, projPitch, 1.0f);
// Multiply volumeData with tmpData divided by pixel weights
processVol<opMul2>(D_volumeData, D_tmpData, D_pixelWeight, pixelPitch, dims);
diff --git a/cuda/2d/fan_bp.cu b/cuda/2d/fan_bp.cu
index 74e8b12..b4321ba 100644
--- a/cuda/2d/fan_bp.cu
+++ b/cuda/2d/fan_bp.cu
@@ -77,7 +77,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi
return true;
}
-__global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims)
+__global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -121,11 +121,11 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s
fA += 1.0f;
}
- volData[Y*volPitch+X] += fVal;
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
// supersampling version
-__global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims)
+__global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -146,6 +146,8 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
float* volData = (float*)D_volData;
+ fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
+
float fVal = 0.0f;
float fA = startAngle + 0.5f;
@@ -180,14 +182,14 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
fA += 1.0f;
}
- volData[Y*volPitch+X] += fVal / (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
// BP specifically for SART.
// It includes (free) weighting with voxel weight.
// It assumes the proj texture is set up _without_ padding, unlike regular BP.
-__global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDimensions dims)
+__global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -222,12 +224,12 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi
const float fT = fNum / fDen;
const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f);
- volData[Y*volPitch+X] += fVal;
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
// Weighted BP for use in fan beam FBP
// Each pixel/ray is weighted by 1/L^2 where L is the distance to the source.
-__global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims)
+__global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -273,13 +275,14 @@ __global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, un
fA += 1.0f;
}
- volData[Y*volPitch+X] += fVal;
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
assert(dims.iProjAngles <= g_MaxAngles);
@@ -310,9 +313,9 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
if (dims.iRaysPerPixelDim > 1)
- devFanBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims);
+ devFanBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
else
- devFanBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims);
+ devFanBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
}
cudaThreadSynchronize();
@@ -325,7 +328,8 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
assert(dims.iProjAngles <= g_MaxAngles);
@@ -355,7 +359,7 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
cudaStreamCreate(&stream);
for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
- devFanBP_FBPWeighted<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims);
+ devFanBP_FBPWeighted<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
}
cudaThreadSynchronize();
@@ -370,7 +374,8 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
unsigned int angle,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
// only one angle
bindProjDataTexture(D_projData, projPitch, dims.iProjDets, 1, cudaAddressModeClamp);
@@ -391,7 +396,7 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
(dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize);
- devFanBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, dims);
+ devFanBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, dims, fOutputScale);
cudaThreadSynchronize();
cudaTextForceKernelsCompletion();
@@ -401,7 +406,8 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
bool FanBP(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {
SDimensions subdims = dims;
@@ -413,7 +419,7 @@ bool FanBP(float* D_volumeData, unsigned int volumePitch,
bool ret;
ret = FanBP_internal(D_volumeData, volumePitch,
D_projData + iAngle * projPitch, projPitch,
- subdims, angles + iAngle);
+ subdims, angles + iAngle, fOutputScale);
if (!ret)
return false;
}
@@ -422,7 +428,8 @@ bool FanBP(float* D_volumeData, unsigned int volumePitch,
bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {
SDimensions subdims = dims;
@@ -434,7 +441,7 @@ bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch,
bool ret;
ret = FanBP_FBPWeighted_internal(D_volumeData, volumePitch,
D_projData + iAngle * projPitch, projPitch,
- subdims, angles + iAngle);
+ subdims, angles + iAngle, fOutputScale);
if (!ret)
return false;
@@ -498,7 +505,7 @@ int main()
copyVolumeToDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);
copySinogramToDevice(sino, dims.iProjDets, dims.iProjDets, dims.iProjAngles, D_projData, projPitch);
- FanBP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs);
+ FanBP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs, 1.0f);
copyVolumeFromDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);
diff --git a/cuda/2d/fan_bp.h b/cuda/2d/fan_bp.h
index e4e69b0..3ebe1e8 100644
--- a/cuda/2d/fan_bp.h
+++ b/cuda/2d/fan_bp.h
@@ -33,16 +33,19 @@ namespace astraCUDA {
_AstraExport bool FanBP(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles);
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale);
_AstraExport bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
unsigned int angle,
- const SDimensions& dims, const SFanProjection* angles);
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale);
_AstraExport bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles);
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale);
}
diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu
index 635200f..d9f7325 100644
--- a/cuda/2d/par_bp.cu
+++ b/cuda/2d/par_bp.cu
@@ -73,7 +73,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi
return true;
}
-__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims)
+__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -123,11 +123,11 @@ __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int star
}
- volData[Y*volPitch+X] += fVal;
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
// supersampling version
-__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims)
+__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -152,6 +152,8 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s
float fA = startAngle + 0.5f;
const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f;
+ fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
+
if (offsets) {
for (int angle = startAngle; angle < endAngle; ++angle)
@@ -196,10 +198,10 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s
}
- volData[Y*volPitch+X] += fVal / (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
-__global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset, float angle_sin, float angle_cos, const SDimensions dims)
+__global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset, float angle_sin, float angle_cos, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -218,13 +220,13 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset
const float fT = fT_base + fX * angle_cos - fY * angle_sin + offset;
const float fVal = tex2D(gT_projTexture, fT, 0.5f);
- D_volData[Y*volPitch+X] += fVal;
+ D_volData[Y*volPitch+X] += fVal * fOutputScale;
}
bool BP_internal(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const float* angles, const float* TOffsets)
+ const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale)
{
// TODO: process angles block by block
assert(dims.iProjAngles <= g_MaxAngles);
@@ -261,9 +263,9 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,
for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
if (dims.iRaysPerPixelDim > 1)
- devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims);
+ devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale);
else
- devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims);
+ devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale);
}
cudaThreadSynchronize();
@@ -276,7 +278,7 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,
bool BP(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const float* angles, const float* TOffsets)
+ const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale)
{
for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {
SDimensions subdims = dims;
@@ -289,7 +291,8 @@ bool BP(float* D_volumeData, unsigned int volumePitch,
ret = BP_internal(D_volumeData, volumePitch,
D_projData + iAngle * projPitch, projPitch,
subdims, angles + iAngle,
- TOffsets ? TOffsets + iAngle : 0);
+ TOffsets ? TOffsets + iAngle : 0,
+ fOutputScale);
if (!ret)
return false;
}
@@ -300,7 +303,7 @@ bool BP(float* D_volumeData, unsigned int volumePitch,
bool BP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
unsigned int angle, const SDimensions& dims,
- const float* angles, const float* TOffsets)
+ const float* angles, const float* TOffsets, float fOutputScale)
{
// Only one angle.
// We need to Clamp to the border pixels instead of to zero, because
@@ -318,7 +321,7 @@ bool BP_SART(float* D_volumeData, unsigned int volumePitch,
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
(dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize);
- devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims);
+ devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims, fOutputScale);
cudaThreadSynchronize();
cudaTextForceKernelsCompletion();
@@ -369,7 +372,7 @@ int main()
for (unsigned int i = 0; i < dims.iProjAngles; ++i)
angle[i] = i*(M_PI/dims.iProjAngles);
- BP(D_volumeData, volumePitch, D_projData, projPitch, dims, angle, 0);
+ BP(D_volumeData, volumePitch, D_projData, projPitch, dims, angle, 0, 1.0f);
delete[] angle;
diff --git a/cuda/2d/par_bp.h b/cuda/2d/par_bp.h
index eaeafd8..64bcd34 100644
--- a/cuda/2d/par_bp.h
+++ b/cuda/2d/par_bp.h
@@ -36,12 +36,12 @@ namespace astraCUDA {
_AstraExport bool BP(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
const SDimensions& dims, const float* angles,
- const float* TOffsets);
+ const float* TOffsets, float fOutputScale);
_AstraExport bool BP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
unsigned int angle, const SDimensions& dims,
- const float* angles, const float* TOffsets);
+ const float* angles, const float* TOffsets, float fOutputScale);
}
diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu
index 29670c3..e5cb5bb 100644
--- a/cuda/2d/sart.cu
+++ b/cuda/2d/sart.cu
@@ -200,10 +200,10 @@ bool SART::iterate(unsigned int iterations)
// BP, mask, and add back
// TODO: Try putting the masking directly in the BP
zeroVolumeData(D_tmpData, tmpPitch, dims);
- callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle);
+ callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle, 1.0f);
processVol<opAddMul>(D_volumeData, D_maskData, D_tmpData, volumePitch, dims);
} else {
- callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle);
+ callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle, 1.0f);
}
if (useMinConstraint)
@@ -264,16 +264,16 @@ bool SART::callFP_SART(float* D_volumeData, unsigned int volumePitch,
bool SART::callBP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- unsigned int angle)
+ unsigned int angle, float outputScale)
{
if (angles) {
assert(!fanProjs);
return BP_SART(D_volumeData, volumePitch, D_projData, projPitch,
- angle, dims, angles, TOffsets);
+ angle, dims, angles, TOffsets, outputScale);
} else {
assert(fanProjs);
return FanBP_SART(D_volumeData, volumePitch, D_projData, projPitch,
- angle, dims, fanProjs);
+ angle, dims, fanProjs, outputScale);
}
}
diff --git a/cuda/2d/sart.h b/cuda/2d/sart.h
index 6574a6f..7dcd641 100644
--- a/cuda/2d/sart.h
+++ b/cuda/2d/sart.h
@@ -59,7 +59,7 @@ protected:
unsigned int angle, float outputScale);
bool callBP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- unsigned int angle);
+ unsigned int angle, float outputScale);
// projection angle variables
diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu
index a6194a5..162ee77 100644
--- a/cuda/2d/sirt.cu
+++ b/cuda/2d/sirt.cu
@@ -127,10 +127,10 @@ bool SIRT::precomputeWeights()
zeroVolumeData(D_pixelWeight, pixelPitch, dims);
if (useSinogramMask) {
- callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch);
+ callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch, 1.0f);
} else {
processSino<opSet>(D_projData, 1.0f, projPitch, dims);
- callBP(D_pixelWeight, pixelPitch, D_projData, projPitch);
+ callBP(D_pixelWeight, pixelPitch, D_projData, projPitch, 1.0f);
}
processVol<opInvert>(D_pixelWeight, pixelPitch, dims);
@@ -251,7 +251,7 @@ bool SIRT::iterate(unsigned int iterations)
zeroVolumeData(D_tmpData, tmpPitch, dims);
- callBP(D_tmpData, tmpPitch, D_projData, projPitch);
+ callBP(D_tmpData, tmpPitch, D_projData, projPitch, 1.0f);
processVol<opAddMul>(D_volumeData, D_pixelWeight, D_tmpData, volumePitch, dims);
diff --git a/cuda/3d/algo3d.cu b/cuda/3d/algo3d.cu
index 7f61280..cc86b70 100644
--- a/cuda/3d/algo3d.cu
+++ b/cuda/3d/algo3d.cu
@@ -41,6 +41,7 @@ ReconAlgo3D::ReconAlgo3D()
coneProjs = 0;
par3DProjs = 0;
shouldAbort = false;
+ fOutputScale = 1.0f;
}
ReconAlgo3D::~ReconAlgo3D()
@@ -57,9 +58,10 @@ void ReconAlgo3D::reset()
shouldAbort = false;
}
-bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProjection* _angles)
+bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProjection* _angles, float _outputScale)
{
dims = _dims;
+ fOutputScale = _outputScale;
coneProjs = new SConeProjection[dims.iProjAngles];
par3DProjs = 0;
@@ -69,9 +71,10 @@ bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProject
return true;
}
-bool ReconAlgo3D::setPar3DGeometry(const SDimensions3D& _dims, const SPar3DProjection* _angles)
+bool ReconAlgo3D::setPar3DGeometry(const SDimensions3D& _dims, const SPar3DProjection* _angles, float _outputScale)
{
dims = _dims;
+ fOutputScale = _outputScale;
par3DProjs = new SPar3DProjection[dims.iProjAngles];
coneProjs = 0;
@@ -87,19 +90,20 @@ bool ReconAlgo3D::callFP(cudaPitchedPtr& D_volumeData,
float outputScale)
{
if (coneProjs) {
- return ConeFP(D_volumeData, D_projData, dims, coneProjs, outputScale);
+ return ConeFP(D_volumeData, D_projData, dims, coneProjs, outputScale * this->fOutputScale);
} else {
- return Par3DFP(D_volumeData, D_projData, dims, par3DProjs, outputScale);
+ return Par3DFP(D_volumeData, D_projData, dims, par3DProjs, outputScale * this->fOutputScale);
}
}
bool ReconAlgo3D::callBP(cudaPitchedPtr& D_volumeData,
- cudaPitchedPtr& D_projData)
+ cudaPitchedPtr& D_projData,
+ float outputScale)
{
if (coneProjs) {
- return ConeBP(D_volumeData, D_projData, dims, coneProjs);
+ return ConeBP(D_volumeData, D_projData, dims, coneProjs, outputScale * this->fOutputScale);
} else {
- return Par3DBP(D_volumeData, D_projData, dims, par3DProjs);
+ return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, outputScale * this->fOutputScale);
}
}
diff --git a/cuda/3d/algo3d.h b/cuda/3d/algo3d.h
index f4c6a87..886b092 100644
--- a/cuda/3d/algo3d.h
+++ b/cuda/3d/algo3d.h
@@ -39,8 +39,8 @@ public:
ReconAlgo3D();
~ReconAlgo3D();
- bool setConeGeometry(const SDimensions3D& dims, const SConeProjection* projs);
- bool setPar3DGeometry(const SDimensions3D& dims, const SPar3DProjection* projs);
+ bool setConeGeometry(const SDimensions3D& dims, const SConeProjection* projs, float fOutputScale);
+ bool setPar3DGeometry(const SDimensions3D& dims, const SPar3DProjection* projs, float fOutputScale);
void signalAbort() { shouldAbort = true; }
@@ -51,12 +51,15 @@ protected:
cudaPitchedPtr& D_projData,
float outputScale);
bool callBP(cudaPitchedPtr& D_volumeData,
- cudaPitchedPtr& D_projData);
+ cudaPitchedPtr& D_projData,
+ float outputScale);
SDimensions3D dims;
SConeProjection* coneProjs;
SPar3DProjection* par3DProjs;
+ float fOutputScale;
+
volatile bool shouldAbort;
};
diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index 0b9c70b..8328229 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -40,6 +40,12 @@ $Id$
#include "arith3d.h"
#include "astra3d.h"
+#include "astra/ParallelProjectionGeometry3D.h"
+#include "astra/ParallelVecProjectionGeometry3D.h"
+#include "astra/ConeProjectionGeometry3D.h"
+#include "astra/ConeVecProjectionGeometry3D.h"
+#include "astra/VolumeGeometry3D.h"
+
#include <iostream>
using namespace astraCUDA3d;
@@ -52,86 +58,200 @@ enum CUDAProjectionType3d {
};
-static SConeProjection* genConeProjections(unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- double fOriginSourceDistance,
- double fOriginDetectorDistance,
- double fDetUSize,
- double fDetVSize,
- const float *pfAngles)
-{
- SConeProjection base;
- base.fSrcX = 0.0f;
- base.fSrcY = -fOriginSourceDistance;
- base.fSrcZ = 0.0f;
- base.fDetSX = iProjU * fDetUSize * -0.5f;
- base.fDetSY = fOriginDetectorDistance;
- base.fDetSZ = iProjV * fDetVSize * -0.5f;
- base.fDetUX = fDetUSize;
- base.fDetUY = 0.0f;
- base.fDetUZ = 0.0f;
- base.fDetVX = 0.0f;
- base.fDetVY = 0.0f;
- base.fDetVZ = fDetVSize;
- SConeProjection* p = new SConeProjection[iProjAngles];
+// adjust pProjs to normalize volume geometry
+template<typename ProjectionT>
+static bool convertAstraGeometry_internal(const CVolumeGeometry3D* pVolGeom,
+ unsigned int iProjectionAngleCount,
+ ProjectionT*& pProjs,
+ float& fOutputScale)
+{
+ assert(pVolGeom);
+ assert(pProjs);
+
+ // TODO: Relative instead of absolute
+ const float EPS = 0.00001f;
+ if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthY()) > EPS)
+ return false;
+ if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthZ()) > EPS)
+ return false;
-#define ROTATE0(name,i,alpha) do { p[i].f##name##X = base.f##name##X * cos(alpha) - base.f##name##Y * sin(alpha); p[i].f##name##Y = base.f##name##X * sin(alpha) + base.f##name##Y * cos(alpha); p[i].f##name##Z = base.f##name##Z; } while(0)
- for (unsigned int i = 0; i < iProjAngles; ++i) {
- ROTATE0(Src, i, pfAngles[i]);
- ROTATE0(DetS, i, pfAngles[i]);
- ROTATE0(DetU, i, pfAngles[i]);
- ROTATE0(DetV, i, pfAngles[i]);
+ // Translate
+ float dx = -(pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2;
+ float dy = -(pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2;
+ float dz = -(pVolGeom->getWindowMinZ() + pVolGeom->getWindowMaxZ()) / 2;
+
+ float factor = 1.0f / pVolGeom->getPixelLengthX();
+
+ for (int i = 0; i < iProjectionAngleCount; ++i) {
+ // CHECKME: Order of scaling and translation
+ pProjs[i].translate(dx, dy, dz);
+ pProjs[i].scale(factor);
}
-#undef ROTATE0
+ // CHECKME: Check factor
+ fOutputScale *= pVolGeom->getPixelLengthX();
+
+ return true;
+}
+
+
+bool convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom,
+ const CProjectionGeometry3D* pProjGeom,
+ SDimensions3D& dims)
+{
+ dims.iVolX = pVolGeom->getGridColCount();
+ dims.iVolY = pVolGeom->getGridRowCount();
+ dims.iVolZ = pVolGeom->getGridSliceCount();
+ dims.iProjAngles = pProjGeom->getProjectionCount();
+ dims.iProjU = pProjGeom->getDetectorColCount(),
+ dims.iProjV = pProjGeom->getDetectorRowCount(),
+ dims.iRaysPerDetDim = 1;
+ dims.iRaysPerVoxelDim = 1;
+
+ if (dims.iVolX <= 0 || dims.iVolX <= 0 || dims.iVolX <= 0)
+ return false;
+ if (dims.iProjAngles <= 0 || dims.iProjU <= 0 || dims.iProjV <= 0)
+ return false;
- return p;
+ return true;
}
-static SPar3DProjection* genPar3DProjections(unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- double fDetUSize,
- double fDetVSize,
- const float *pfAngles)
+
+bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
+ const CParallelProjectionGeometry3D* pProjGeom,
+ SPar3DProjection*& pProjs, float& fOutputScale)
{
- SPar3DProjection base;
- base.fRayX = 0.0f;
- base.fRayY = 1.0f;
- base.fRayZ = 0.0f;
+ assert(pVolGeom);
+ assert(pProjGeom);
+ assert(pProjGeom->getProjectionAngles());
- base.fDetSX = iProjU * fDetUSize * -0.5f;
- base.fDetSY = 0.0f;
- base.fDetSZ = iProjV * fDetVSize * -0.5f;
+ int nth = pProjGeom->getProjectionCount();
- base.fDetUX = fDetUSize;
- base.fDetUY = 0.0f;
- base.fDetUZ = 0.0f;
+ pProjs = genPar3DProjections(nth,
+ pProjGeom->getDetectorColCount(),
+ pProjGeom->getDetectorRowCount(),
+ pProjGeom->getDetectorSpacingX(),
+ pProjGeom->getDetectorSpacingY(),
+ pProjGeom->getProjectionAngles());
- base.fDetVX = 0.0f;
- base.fDetVY = 0.0f;
- base.fDetVZ = fDetVSize;
+ bool ok;
- SPar3DProjection* p = new SPar3DProjection[iProjAngles];
+ fOutputScale = 1.0f;
-#define ROTATE0(name,i,alpha) do { p[i].f##name##X = base.f##name##X * cos(alpha) - base.f##name##Y * sin(alpha); p[i].f##name##Y = base.f##name##X * sin(alpha) + base.f##name##Y * cos(alpha); p[i].f##name##Z = base.f##name##Z; } while(0)
+ ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
- for (unsigned int i = 0; i < iProjAngles; ++i) {
- ROTATE0(Ray, i, pfAngles[i]);
- ROTATE0(DetS, i, pfAngles[i]);
- ROTATE0(DetU, i, pfAngles[i]);
- ROTATE0(DetV, i, pfAngles[i]);
- }
+ return ok;
+}
+
+bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
+ const CParallelVecProjectionGeometry3D* pProjGeom,
+ SPar3DProjection*& pProjs, float& fOutputScale)
+{
+ assert(pVolGeom);
+ assert(pProjGeom);
+ assert(pProjGeom->getProjectionVectors());
+
+ int nth = pProjGeom->getProjectionCount();
+
+ pProjs = new SPar3DProjection[nth];
+ for (int i = 0; i < nth; ++i)
+ pProjs[i] = pProjGeom->getProjectionVectors()[i];
+
+ bool ok;
+
+ fOutputScale = 1.0f;
+
+ ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
+
+ return ok;
+}
+
+bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
+ const CConeProjectionGeometry3D* pProjGeom,
+ SConeProjection*& pProjs, float& fOutputScale)
+{
+ assert(pVolGeom);
+ assert(pProjGeom);
+ assert(pProjGeom->getProjectionAngles());
+
+ int nth = pProjGeom->getProjectionCount();
+
+ pProjs = genConeProjections(nth,
+ pProjGeom->getDetectorColCount(),
+ pProjGeom->getDetectorRowCount(),
+ pProjGeom->getOriginSourceDistance(),
+ pProjGeom->getOriginDetectorDistance(),
+ pProjGeom->getDetectorSpacingX(),
+ pProjGeom->getDetectorSpacingY(),
+ pProjGeom->getProjectionAngles());
+
+ bool ok;
+
+ fOutputScale = 1.0f;
+
+ ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
+
+ return ok;
+}
+
+bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
+ const CConeVecProjectionGeometry3D* pProjGeom,
+ SConeProjection*& pProjs, float& fOutputScale)
+{
+ assert(pVolGeom);
+ assert(pProjGeom);
+ assert(pProjGeom->getProjectionVectors());
+
+ int nth = pProjGeom->getProjectionCount();
+
+ pProjs = new SConeProjection[nth];
+ for (int i = 0; i < nth; ++i)
+ pProjs[i] = pProjGeom->getProjectionVectors()[i];
+
+ bool ok;
+
+ fOutputScale = 1.0f;
+
+ ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
+
+ return ok;
+}
+
+
+bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
+ const CProjectionGeometry3D* pProjGeom,
+ SPar3DProjection*& pParProjs,
+ SConeProjection*& pConeProjs,
+ float& fOutputScale)
+{
+ const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(pProjGeom);
+ const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(pProjGeom);
+ const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast<const CParallelVecProjectionGeometry3D*>(pProjGeom);
+ const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast<const CConeVecProjectionGeometry3D*>(pProjGeom);
+
+ pConeProjs = 0;
+ pParProjs = 0;
-#undef ROTATE0
+ bool ok;
- return p;
+ if (conegeom) {
+ ok = convertAstraGeometry(pVolGeom, conegeom, pConeProjs, fOutputScale);
+ } else if (conevec3dgeom) {
+ ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pConeProjs, fOutputScale);
+ } else if (par3dgeom) {
+ ok = convertAstraGeometry(pVolGeom, par3dgeom, pParProjs, fOutputScale);
+ } else if (parvec3dgeom) {
+ ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pParProjs, fOutputScale);
+ } else {
+ ok = false;
+ }
+
+ return ok;
}
@@ -151,7 +271,7 @@ public:
SConeProjection* projs;
SPar3DProjection* parprojs;
- float fPixelSize;
+ float fOutputScale;
bool initialized;
bool setStartReconstruction;
@@ -188,6 +308,8 @@ AstraSIRT3d::AstraSIRT3d()
pData->dims.iRaysPerVoxelDim = 1;
pData->projs = 0;
+ pData->parprojs = 0;
+ pData->fOutputScale = 1.0f;
pData->initialized = false;
pData->setStartReconstruction = false;
@@ -220,127 +342,37 @@ AstraSIRT3d::~AstraSIRT3d()
pData = 0;
}
-bool AstraSIRT3d::setReconstructionGeometry(unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ/*,
- float fPixelSize = 1.0f*/)
-{
- if (pData->initialized)
- return false;
-
- pData->dims.iVolX = iVolX;
- pData->dims.iVolY = iVolY;
- pData->dims.iVolZ = iVolZ;
-
- return (iVolX > 0 && iVolY > 0 && iVolZ > 0);
-}
-
-
-bool AstraSIRT3d::setPar3DGeometry(unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- const SPar3DProjection* projs)
-{
- if (pData->initialized)
- return false;
-
- pData->dims.iProjAngles = iProjAngles;
- pData->dims.iProjU = iProjU;
- pData->dims.iProjV = iProjV;
-
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || projs == 0)
- return false;
-
- pData->parprojs = new SPar3DProjection[iProjAngles];
- memcpy(pData->parprojs, projs, iProjAngles * sizeof(projs[0]));
-
- pData->projType = PROJ_PARALLEL;
-
- return true;
-}
-
-bool AstraSIRT3d::setPar3DGeometry(unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fDetUSize,
- float fDetVSize,
- const float *pfAngles)
+bool AstraSIRT3d::setGeometry(const CVolumeGeometry3D* pVolGeom,
+ const CProjectionGeometry3D* pProjGeom)
{
if (pData->initialized)
return false;
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
- return false;
-
- SPar3DProjection* p = genPar3DProjections(iProjAngles,
- iProjU, iProjV,
- fDetUSize, fDetVSize,
- pfAngles);
- pData->dims.iProjAngles = iProjAngles;
- pData->dims.iProjU = iProjU;
- pData->dims.iProjV = iProjV;
-
- pData->parprojs = p;
- pData->projType = PROJ_PARALLEL;
+ bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, pData->dims);
- return true;
-}
-
-
-
-bool AstraSIRT3d::setConeGeometry(unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- const SConeProjection* projs)
-{
- if (pData->initialized)
+ if (!ok)
return false;
- pData->dims.iProjAngles = iProjAngles;
- pData->dims.iProjU = iProjU;
- pData->dims.iProjV = iProjV;
+ pData->projs = 0;
+ pData->parprojs = 0;
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || projs == 0)
+ ok = convertAstraGeometry(pVolGeom, pProjGeom,
+ pData->parprojs, pData->projs,
+ pData->fOutputScale);
+ if (!ok)
return false;
- pData->projs = new SConeProjection[iProjAngles];
- memcpy(pData->projs, projs, iProjAngles * sizeof(projs[0]));
-
- pData->projType = PROJ_CONE;
+ if (pData->projs) {
+ assert(pData->parprojs == 0);
+ pData->projType = PROJ_CONE;
+ } else {
+ assert(pData->parprojs != 0);
+ pData->projType = PROJ_PARALLEL;
+ }
return true;
}
-bool AstraSIRT3d::setConeGeometry(unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fOriginSourceDistance,
- float fOriginDetectorDistance,
- float fDetUSize,
- float fDetVSize,
- const float *pfAngles)
-{
- if (pData->initialized)
- return false;
-
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
- return false;
-
- SConeProjection* p = genConeProjections(iProjAngles,
- iProjU, iProjV,
- fOriginSourceDistance,
- fOriginDetectorDistance,
- fDetUSize, fDetVSize,
- pfAngles);
- pData->dims.iProjAngles = iProjAngles;
- pData->dims.iProjU = iProjU;
- pData->dims.iProjV = iProjV;
-
- pData->projs = p;
- pData->projType = PROJ_CONE;
-
- return true;
-}
bool AstraSIRT3d::enableSuperSampling(unsigned int iVoxelSuperSampling,
unsigned int iDetectorSuperSampling)
@@ -404,9 +436,9 @@ bool AstraSIRT3d::init()
bool ok;
if (pData->projType == PROJ_PARALLEL) {
- ok = pData->sirt.setPar3DGeometry(pData->dims, pData->parprojs);
+ ok = pData->sirt.setPar3DGeometry(pData->dims, pData->parprojs, pData->fOutputScale);
} else {
- ok = pData->sirt.setConeGeometry(pData->dims, pData->projs);
+ ok = pData->sirt.setConeGeometry(pData->dims, pData->projs, pData->fOutputScale);
}
if (!ok)
@@ -618,7 +650,7 @@ public:
SConeProjection* projs;
SPar3DProjection* parprojs;
- float fPixelSize;
+ float fOutputScale;
bool initialized;
bool setStartReconstruction;
@@ -655,6 +687,8 @@ AstraCGLS3d::AstraCGLS3d()
pData->dims.iRaysPerVoxelDim = 1;
pData->projs = 0;
+ pData->parprojs = 0;
+ pData->fOutputScale = 1.0f;
pData->initialized = false;
pData->setStartReconstruction = false;
@@ -687,125 +721,33 @@ AstraCGLS3d::~AstraCGLS3d()
pData = 0;
}
-bool AstraCGLS3d::setReconstructionGeometry(unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ/*,
- float fPixelSize = 1.0f*/)
-{
- if (pData->initialized)
- return false;
-
- pData->dims.iVolX = iVolX;
- pData->dims.iVolY = iVolY;
- pData->dims.iVolZ = iVolZ;
-
- return (iVolX > 0 && iVolY > 0 && iVolZ > 0);
-}
-
-
-bool AstraCGLS3d::setPar3DGeometry(unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- const SPar3DProjection* projs)
-{
- if (pData->initialized)
- return false;
-
- pData->dims.iProjAngles = iProjAngles;
- pData->dims.iProjU = iProjU;
- pData->dims.iProjV = iProjV;
-
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || projs == 0)
- return false;
-
- pData->parprojs = new SPar3DProjection[iProjAngles];
- memcpy(pData->parprojs, projs, iProjAngles * sizeof(projs[0]));
-
- pData->projType = PROJ_PARALLEL;
-
- return true;
-}
-
-bool AstraCGLS3d::setPar3DGeometry(unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fDetUSize,
- float fDetVSize,
- const float *pfAngles)
-{
- if (pData->initialized)
- return false;
-
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
- return false;
-
- SPar3DProjection* p = genPar3DProjections(iProjAngles,
- iProjU, iProjV,
- fDetUSize, fDetVSize,
- pfAngles);
- pData->dims.iProjAngles = iProjAngles;
- pData->dims.iProjU = iProjU;
- pData->dims.iProjV = iProjV;
-
- pData->parprojs = p;
- pData->projType = PROJ_PARALLEL;
-
- return true;
-}
-
-
-
-bool AstraCGLS3d::setConeGeometry(unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- const SConeProjection* projs)
+bool AstraCGLS3d::setGeometry(const CVolumeGeometry3D* pVolGeom,
+ const CProjectionGeometry3D* pProjGeom)
{
if (pData->initialized)
return false;
- pData->dims.iProjAngles = iProjAngles;
- pData->dims.iProjU = iProjU;
- pData->dims.iProjV = iProjV;
+ bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, pData->dims);
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || projs == 0)
+ if (!ok)
return false;
- pData->projs = new SConeProjection[iProjAngles];
- memcpy(pData->projs, projs, iProjAngles * sizeof(projs[0]));
-
- pData->projType = PROJ_CONE;
-
- return true;
-}
-
-bool AstraCGLS3d::setConeGeometry(unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fOriginSourceDistance,
- float fOriginDetectorDistance,
- float fDetUSize,
- float fDetVSize,
- const float *pfAngles)
-{
- if (pData->initialized)
- return false;
+ pData->projs = 0;
+ pData->parprojs = 0;
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
+ ok = convertAstraGeometry(pVolGeom, pProjGeom,
+ pData->parprojs, pData->projs,
+ pData->fOutputScale);
+ if (!ok)
return false;
- SConeProjection* p = genConeProjections(iProjAngles,
- iProjU, iProjV,
- fOriginSourceDistance,
- fOriginDetectorDistance,
- fDetUSize, fDetVSize,
- pfAngles);
-
- pData->dims.iProjAngles = iProjAngles;
- pData->dims.iProjU = iProjU;
- pData->dims.iProjV = iProjV;
-
- pData->projs = p;
- pData->projType = PROJ_CONE;
+ if (pData->projs) {
+ assert(pData->parprojs == 0);
+ pData->projType = PROJ_CONE;
+ } else {
+ assert(pData->parprojs != 0);
+ pData->projType = PROJ_PARALLEL;
+ }
return true;
}
@@ -874,9 +816,9 @@ bool AstraCGLS3d::init()
bool ok;
if (pData->projType == PROJ_PARALLEL) {
- ok = pData->cgls.setPar3DGeometry(pData->dims, pData->parprojs);
+ ok = pData->cgls.setPar3DGeometry(pData->dims, pData->parprojs, pData->fOutputScale);
} else {
- ok = pData->cgls.setConeGeometry(pData->dims, pData->projs);
+ ok = pData->cgls.setConeGeometry(pData->dims, pData->projs, pData->fOutputScale);
}
if (!ok)
@@ -1077,179 +1019,31 @@ float AstraCGLS3d::computeDiffNorm()
-bool astraCudaConeFP(const float* pfVolume, float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fOriginSourceDistance,
- float fOriginDetectorDistance,
- float fDetUSize,
- float fDetVSize,
- const float *pfAngles,
- int iGPUIndex, int iDetectorSuperSampling)
-{
- if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
- return false;
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
- return false;
-
- SConeProjection* p = genConeProjections(iProjAngles,
- iProjU, iProjV,
- fOriginSourceDistance,
- fOriginDetectorDistance,
- fDetUSize, fDetVSize,
- pfAngles);
-
- bool ok;
- ok = astraCudaConeFP(pfVolume, pfProjections, iVolX, iVolY, iVolZ,
- iProjAngles, iProjU, iProjV, p, iGPUIndex, iDetectorSuperSampling);
-
- delete[] p;
-
- return ok;
-}
-
-bool astraCudaConeFP(const float* pfVolume, float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- const SConeProjection *pfAngles,
- int iGPUIndex, int iDetectorSuperSampling)
+bool astraCudaFP(const float* pfVolume, float* pfProjections,
+ const CVolumeGeometry3D* pVolGeom,
+ const CProjectionGeometry3D* pProjGeom,
+ int iGPUIndex, int iDetectorSuperSampling,
+ Cuda3DProjectionKernel projKernel)
{
SDimensions3D dims;
- dims.iVolX = iVolX;
- dims.iVolY = iVolY;
- dims.iVolZ = iVolZ;
- if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
- return false;
-
- dims.iProjAngles = iProjAngles;
- dims.iProjU = iProjU;
- dims.iProjV = iProjV;
-
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
+ bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
+ if (!ok)
return false;
dims.iRaysPerDetDim = iDetectorSuperSampling;
-
if (iDetectorSuperSampling == 0)
return false;
- if (iGPUIndex != -1) {
- cudaSetDevice(iGPUIndex);
- cudaError_t err = cudaGetLastError();
-
- // Ignore errors caused by calling cudaSetDevice multiple times
- if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess)
- return false;
- }
-
- cudaPitchedPtr D_volumeData = allocateVolumeData(dims);
- bool ok = D_volumeData.ptr;
- if (!ok)
- return false;
-
- cudaPitchedPtr D_projData = allocateProjectionData(dims);
- ok = D_projData.ptr;
- if (!ok) {
- cudaFree(D_volumeData.ptr);
- return false;
- }
-
- ok &= copyVolumeToDevice(pfVolume, D_volumeData, dims, dims.iVolX);
-
- ok &= zeroProjectionData(D_projData, dims);
-
- if (!ok) {
- cudaFree(D_volumeData.ptr);
- cudaFree(D_projData.ptr);
- return false;
- }
-
- ok &= ConeFP(D_volumeData, D_projData, dims, pfAngles, 1.0f);
-
- ok &= copyProjectionsFromDevice(pfProjections, D_projData,
- dims, dims.iProjU);
-
-
- cudaFree(D_volumeData.ptr);
- cudaFree(D_projData.ptr);
-
- return ok;
-
-}
-
-bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fDetUSize,
- float fDetVSize,
- const float *pfAngles,
- int iGPUIndex, int iDetectorSuperSampling,
- Cuda3DProjectionKernel projKernel)
-{
- if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
- return false;
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
- return false;
-
- SPar3DProjection* p = genPar3DProjections(iProjAngles,
- iProjU, iProjV,
- fDetUSize, fDetVSize,
- pfAngles);
-
- bool ok;
- ok = astraCudaPar3DFP(pfVolume, pfProjections, iVolX, iVolY, iVolZ,
- iProjAngles, iProjU, iProjV, p, iGPUIndex, iDetectorSuperSampling,
- projKernel);
-
- delete[] p;
-
- return ok;
-}
-
-
-bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- const SPar3DProjection *pfAngles,
- int iGPUIndex, int iDetectorSuperSampling,
- Cuda3DProjectionKernel projKernel)
-{
- SDimensions3D dims;
-
- dims.iVolX = iVolX;
- dims.iVolY = iVolY;
- dims.iVolZ = iVolZ;
- if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
- return false;
+ SPar3DProjection* pParProjs;
+ SConeProjection* pConeProjs;
- dims.iProjAngles = iProjAngles;
- dims.iProjU = iProjU;
- dims.iProjV = iProjV;
+ float outputScale;
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
- return false;
+ ok = convertAstraGeometry(pVolGeom, pProjGeom,
+ pParProjs, pConeProjs,
+ outputScale);
- dims.iRaysPerDetDim = iDetectorSuperSampling;
-
- if (iDetectorSuperSampling == 0)
- return false;
if (iGPUIndex != -1) {
cudaSetDevice(iGPUIndex);
@@ -1262,7 +1056,7 @@ bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections,
cudaPitchedPtr D_volumeData = allocateVolumeData(dims);
- bool ok = D_volumeData.ptr;
+ ok = D_volumeData.ptr;
if (!ok)
return false;
@@ -1283,15 +1077,25 @@ bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections,
return false;
}
- switch (projKernel) {
- case ker3d_default:
- ok &= Par3DFP(D_volumeData, D_projData, dims, pfAngles, 1.0f);
- break;
- case ker3d_sum_square_weights:
- ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pfAngles, 1.0f);
- break;
- default:
- assert(false);
+ if (pParProjs) {
+ switch (projKernel) {
+ case ker3d_default:
+ ok &= Par3DFP(D_volumeData, D_projData, dims, pParProjs, outputScale);
+ break;
+ case ker3d_sum_square_weights:
+ ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pParProjs, outputScale*outputScale);
+ break;
+ default:
+ assert(false);
+ }
+ } else {
+ switch (projKernel) {
+ case ker3d_default:
+ ok &= ConeFP(D_volumeData, D_projData, dims, pConeProjs, outputScale);
+ break;
+ default:
+ assert(false);
+ }
}
ok &= copyProjectionsFromDevice(pfProjections, D_projData,
@@ -1305,207 +1109,28 @@ bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections,
}
-bool astraCudaConeBP(float* pfVolume, const float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fOriginSourceDistance,
- float fOriginDetectorDistance,
- float fDetUSize,
- float fDetVSize,
- const float *pfAngles,
- int iGPUIndex, int iVoxelSuperSampling)
-{
- if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
- return false;
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
- return false;
-
- SConeProjection* p = genConeProjections(iProjAngles,
- iProjU, iProjV,
- fOriginSourceDistance,
- fOriginDetectorDistance,
- fDetUSize, fDetVSize,
- pfAngles);
-
- bool ok;
- ok = astraCudaConeBP(pfVolume, pfProjections, iVolX, iVolY, iVolZ,
- iProjAngles, iProjU, iProjV, p, iGPUIndex, iVoxelSuperSampling);
- delete[] p;
-
- return ok;
-}
-
-bool astraCudaConeBP(float* pfVolume, const float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- const SConeProjection *pfAngles,
- int iGPUIndex, int iVoxelSuperSampling)
+bool astraCudaBP(float* pfVolume, const float* pfProjections,
+ const CVolumeGeometry3D* pVolGeom,
+ const CProjectionGeometry3D* pProjGeom,
+ int iGPUIndex, int iVoxelSuperSampling)
{
SDimensions3D dims;
- dims.iVolX = iVolX;
- dims.iVolY = iVolY;
- dims.iVolZ = iVolZ;
- if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
- return false;
-
- dims.iProjAngles = iProjAngles;
- dims.iProjU = iProjU;
- dims.iProjV = iProjV;
-
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
- return false;
-
- dims.iRaysPerVoxelDim = iVoxelSuperSampling;
-
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
- return false;
-
- if (iGPUIndex != -1) {
- cudaSetDevice(iGPUIndex);
- cudaError_t err = cudaGetLastError();
-
- // Ignore errors caused by calling cudaSetDevice multiple times
- if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess)
- return false;
- }
-
- cudaPitchedPtr D_volumeData = allocateVolumeData(dims);
- bool ok = D_volumeData.ptr;
+ bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
if (!ok)
return false;
- cudaPitchedPtr D_projData = allocateProjectionData(dims);
- ok = D_projData.ptr;
- if (!ok) {
- cudaFree(D_volumeData.ptr);
- return false;
- }
-
- ok &= copyProjectionsToDevice(pfProjections, D_projData,
- dims, dims.iProjU);
-
- ok &= zeroVolumeData(D_volumeData, dims);
-
- if (!ok) {
- cudaFree(D_volumeData.ptr);
- cudaFree(D_projData.ptr);
- return false;
- }
-
- ok &= ConeBP(D_volumeData, D_projData, dims, pfAngles);
-
- ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX);
-
-
- cudaFree(D_volumeData.ptr);
- cudaFree(D_projData.ptr);
-
- return ok;
-
-}
-
-bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fDetUSize,
- float fDetVSize,
- const float *pfAngles,
- int iGPUIndex, int iVoxelSuperSampling)
-{
- if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
- return false;
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
- return false;
-
- SPar3DProjection* p = genPar3DProjections(iProjAngles,
- iProjU, iProjV,
- fDetUSize, fDetVSize,
- pfAngles);
-
- bool ok;
- ok = astraCudaPar3DBP(pfVolume, pfProjections, iVolX, iVolY, iVolZ,
- iProjAngles, iProjU, iProjV, p, iGPUIndex, iVoxelSuperSampling);
-
- delete[] p;
-
- return ok;
-}
-
-// This computes the column weights, divides by them, and adds the
-// result to the current volume. This is both more expensive and more
-// GPU memory intensive than the regular BP, but allows saving system RAM.
-bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fDetUSize,
- float fDetVSize,
- const float *pfAngles,
- int iGPUIndex, int iVoxelSuperSampling)
-{
- if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
- return false;
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
- return false;
-
- SPar3DProjection* p = genPar3DProjections(iProjAngles,
- iProjU, iProjV,
- fDetUSize, fDetVSize,
- pfAngles);
-
- bool ok;
- ok = astraCudaPar3DBP_SIRTWeighted(pfVolume, pfProjections, iVolX, iVolY, iVolZ,
- iProjAngles, iProjU, iProjV, p, iGPUIndex, iVoxelSuperSampling);
-
- delete[] p;
-
- return ok;
-}
-
-
-bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- const SPar3DProjection *pfAngles,
- int iGPUIndex, int iVoxelSuperSampling)
-{
- SDimensions3D dims;
-
- dims.iVolX = iVolX;
- dims.iVolY = iVolY;
- dims.iVolZ = iVolZ;
- if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
- return false;
+ dims.iRaysPerVoxelDim = iVoxelSuperSampling;
- dims.iProjAngles = iProjAngles;
- dims.iProjU = iProjU;
- dims.iProjV = iProjV;
+ SPar3DProjection* pParProjs;
+ SConeProjection* pConeProjs;
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
- return false;
+ float outputScale;
- dims.iRaysPerVoxelDim = iVoxelSuperSampling;
+ ok = convertAstraGeometry(pVolGeom, pProjGeom,
+ pParProjs, pConeProjs,
+ outputScale);
if (iGPUIndex != -1) {
cudaSetDevice(iGPUIndex);
@@ -1518,7 +1143,7 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
cudaPitchedPtr D_volumeData = allocateVolumeData(dims);
- bool ok = D_volumeData.ptr;
+ ok = D_volumeData.ptr;
if (!ok)
return false;
@@ -1540,7 +1165,10 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
return false;
}
- ok &= Par3DBP(D_volumeData, D_projData, dims, pfAngles);
+ if (pParProjs)
+ ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, outputScale);
+ else
+ ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, outputScale);
ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX);
@@ -1556,33 +1184,28 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
// This computes the column weights, divides by them, and adds the
// result to the current volume. This is both more expensive and more
// GPU memory intensive than the regular BP, but allows saving system RAM.
-bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume,
+bool astraCudaBP_SIRTWeighted(float* pfVolume,
const float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- const SPar3DProjection *pfAngles,
+ const CVolumeGeometry3D* pVolGeom,
+ const CProjectionGeometry3D* pProjGeom,
int iGPUIndex, int iVoxelSuperSampling)
{
SDimensions3D dims;
- dims.iVolX = iVolX;
- dims.iVolY = iVolY;
- dims.iVolZ = iVolZ;
- if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
+ bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
+ if (!ok)
return false;
- dims.iProjAngles = iProjAngles;
- dims.iProjU = iProjU;
- dims.iProjV = iProjV;
+ dims.iRaysPerVoxelDim = iVoxelSuperSampling;
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
- return false;
+ SPar3DProjection* pParProjs;
+ SConeProjection* pConeProjs;
- dims.iRaysPerVoxelDim = iVoxelSuperSampling;
+ float outputScale;
+
+ ok = convertAstraGeometry(pVolGeom, pProjGeom,
+ pParProjs, pConeProjs,
+ outputScale);
if (iGPUIndex != -1) {
cudaSetDevice(iGPUIndex);
@@ -1595,7 +1218,7 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume,
cudaPitchedPtr D_pixelWeight = allocateVolumeData(dims);
- bool ok = D_pixelWeight.ptr;
+ ok = D_pixelWeight.ptr;
if (!ok)
return false;
@@ -1617,7 +1240,12 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume,
// Compute weights
ok &= zeroVolumeData(D_pixelWeight, dims);
processSino3D<opSet>(D_projData, 1.0f, dims);
- ok &= Par3DBP(D_pixelWeight, D_projData, dims, pfAngles);
+
+ if (pParProjs)
+ ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, outputScale);
+ else
+ ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, outputScale);
+
processVol3D<opInvert>(D_pixelWeight, dims);
if (!ok) {
cudaFree(D_pixelWeight.ptr);
@@ -1630,7 +1258,11 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume,
dims, dims.iProjU);
ok &= zeroVolumeData(D_volumeData, dims);
// Do BP into D_volumeData
- ok &= Par3DBP(D_volumeData, D_projData, dims, pfAngles);
+ if (pParProjs)
+ ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, outputScale);
+ else
+ ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, outputScale);
+
// Multiply with weights
processVol3D<opMul>(D_volumeData, D_pixelWeight, dims);
@@ -1653,6 +1285,9 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume,
cudaFree(D_volumeData.ptr);
cudaFree(D_projData.ptr);
+ delete[] pParProjs;
+ delete[] pConeProjs;
+
return ok;
}
@@ -1660,33 +1295,19 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume,
bool astraCudaFDK(float* pfVolume, const float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fOriginSourceDistance,
- float fOriginDetectorDistance,
- float fDetUSize,
- float fDetVSize,
- const float *pfAngles,
+ const CVolumeGeometry3D* pVolGeom,
+ const CConeProjectionGeometry3D* pProjGeom,
bool bShortScan,
int iGPUIndex, int iVoxelSuperSampling)
{
SDimensions3D dims;
- dims.iVolX = iVolX;
- dims.iVolY = iVolY;
- dims.iVolZ = iVolZ;
- if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
- return false;
+ bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
- dims.iProjAngles = iProjAngles;
- dims.iProjU = iProjU;
- dims.iProjV = iProjV;
+ // TODO: Check that pVolGeom is normalized, since we don't support
+ // other volume geometries yet
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
+ if (!ok)
return false;
dims.iRaysPerVoxelDim = iVoxelSuperSampling;
@@ -1703,9 +1324,8 @@ bool astraCudaFDK(float* pfVolume, const float* pfProjections,
return false;
}
-
cudaPitchedPtr D_volumeData = allocateVolumeData(dims);
- bool ok = D_volumeData.ptr;
+ ok = D_volumeData.ptr;
if (!ok)
return false;
@@ -1726,6 +1346,13 @@ bool astraCudaFDK(float* pfVolume, const float* pfProjections,
return false;
}
+ float fOriginSourceDistance = pProjGeom->getOriginSourceDistance();
+ float fOriginDetectorDistance = pProjGeom->getOriginDetectorDistance();
+ float fDetUSize = pProjGeom->getDetectorSpacingX();
+ float fDetVSize = pProjGeom->getDetectorSpacingY();
+ const float *pfAngles = pProjGeom->getProjectionAngles();
+
+
// TODO: Offer interface for SrcZ, DetZ
ok &= FDK(D_volumeData, D_projData, fOriginSourceDistance,
fOriginDetectorDistance, 0, 0, fDetUSize, fDetVSize,
diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h
index f91fe26..2782994 100644
--- a/cuda/3d/astra3d.h
+++ b/cuda/3d/astra3d.h
@@ -42,7 +42,12 @@ enum Cuda3DProjectionKernel {
ker3d_sum_square_weights
};
-
+class CProjectionGeometry3D;
+class CParallelProjectionGeometry3D;
+class CParallelVecProjectionGeometry3D;
+class CConeProjectionGeometry3D;
+class CConeVecProjectionGeometry3D;
+class CVolumeGeometry3D;
class AstraSIRT3d_internal;
@@ -52,37 +57,9 @@ public:
AstraSIRT3d();
~AstraSIRT3d();
- // Set the number of pixels in the reconstruction rectangle,
- // and the length of the edge of a pixel.
- // Volume pixels are assumed to be square.
- // This must be called before setting the projection geometry.
- bool setReconstructionGeometry(unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ/*,
- float fPixelSize = 1.0f*/);
-
- bool setConeGeometry(unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- const SConeProjection* projs);
- bool setConeGeometry(unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fOriginSourceDistance,
- float fOriginDetectorDistance,
- float fSourceZ,
- float fDetSize,
- const float *pfAngles);
- bool setPar3DGeometry(unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- const SPar3DProjection* projs);
- bool setPar3DGeometry(unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fSourceZ,
- float fDetSize,
- const float *pfAngles);
+ // Set the volume and projection geometry
+ bool setGeometry(const CVolumeGeometry3D* pVolGeom,
+ const CProjectionGeometry3D* pProjGeom);
// Enable supersampling.
//
@@ -197,37 +174,9 @@ public:
AstraCGLS3d();
~AstraCGLS3d();
- // Set the number of pixels in the reconstruction rectangle,
- // and the length of the edge of a pixel.
- // Volume pixels are assumed to be square.
- // This must be called before setting the projection geometry.
- bool setReconstructionGeometry(unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ/*,
- float fPixelSize = 1.0f*/);
-
- bool setConeGeometry(unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- const SConeProjection* projs);
- bool setConeGeometry(unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fOriginSourceDistance,
- float fOriginDetectorDistance,
- float fSourceZ,
- float fDetSize,
- const float *pfAngles);
- bool setPar3DGeometry(unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- const SPar3DProjection* projs);
- bool setPar3DGeometry(unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fSourceZ,
- float fDetSize,
- const float *pfAngles);
+ // Set the volume and projection geometry
+ bool setGeometry(const CVolumeGeometry3D* pVolGeom,
+ const CProjectionGeometry3D* pProjGeom);
// Enable supersampling.
//
@@ -332,140 +281,40 @@ protected:
AstraCGLS3d_internal *pData;
};
+bool convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom,
+ const CProjectionGeometry3D* pProjGeom,
+ astraCUDA3d::SDimensions3D& dims);
+bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
+ const CProjectionGeometry3D* pProjGeom,
+ SPar3DProjection*& pParProjs,
+ SConeProjection*& pConeProjs,
+ float& fOutputScale);
-_AstraExport bool astraCudaConeFP(const float* pfVolume, float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fOriginSourceDistance,
- float fOriginDetectorDistance,
- float fDetUSize,
- float fDetVSize,
- const float *pfAngles,
- int iGPUIndex, int iDetectorSuperSampling);
-
-_AstraExport bool astraCudaConeFP(const float* pfVolume, float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- const SConeProjection *pfAngles,
- int iGPUIndex, int iDetectorSuperSampling);
-
-_AstraExport bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fDetUSize,
- float fDetVSize,
- const float *pfAngles,
+_AstraExport bool astraCudaFP(const float* pfVolume, float* pfProjections,
+ const CVolumeGeometry3D* pVolGeom,
+ const CProjectionGeometry3D* pProjGeom,
int iGPUIndex, int iDetectorSuperSampling,
Cuda3DProjectionKernel projKernel);
-_AstraExport bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- const SPar3DProjection *pfAngles,
- int iGPUIndex, int iDetectorSuperSampling,
- Cuda3DProjectionKernel projKernel);
-
-_AstraExport bool astraCudaConeBP(float* pfVolume, const float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fOriginSourceDistance,
- float fOriginDetectorDistance,
- float fDetUSize,
- float fDetVSize,
- const float *pfAngles,
- int iGPUIndex, int iVoxelSuperSampling);
-
-_AstraExport bool astraCudaConeBP(float* pfVolume, const float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- const SConeProjection *pfAngles,
- int iGPUIndex, int iVoxelSuperSampling);
-
-_AstraExport bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fDetUSize,
- float fDetVSize,
- const float *pfAngles,
+_AstraExport bool astraCudaBP(float* pfVolume, const float* pfProjections,
+ const CVolumeGeometry3D* pVolGeom,
+ const CProjectionGeometry3D* pProjGeom,
int iGPUIndex, int iVoxelSuperSampling);
-_AstraExport bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- const SPar3DProjection *pfAngles,
- int iGPUIndex, int iVoxelSuperSampling);
-
-_AstraExport bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fDetUSize,
- float fDetVSize,
- const float *pfAngles,
- int iGPUIndex, int iVoxelSuperSampling);
-
-_AstraExport bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- const SPar3DProjection *pfAngles,
+_AstraExport bool astraCudaBP_SIRTWeighted(float* pfVolume, const float* pfProjections,
+ const CVolumeGeometry3D* pVolGeom,
+ const CProjectionGeometry3D* pProjGeom,
int iGPUIndex, int iVoxelSuperSampling);
_AstraExport bool astraCudaFDK(float* pfVolume, const float* pfProjections,
- unsigned int iVolX,
- unsigned int iVolY,
- unsigned int iVolZ,
- unsigned int iProjAngles,
- unsigned int iProjU,
- unsigned int iProjV,
- float fOriginSourceDistance,
- float fOriginDetectorDistance,
- float fDetUSize,
- float fDetVSize,
- const float *pfAngles,
+ const CVolumeGeometry3D* pVolGeom,
+ const CConeProjectionGeometry3D* pProjGeom,
bool bShortScan,
int iGPUIndex, int iVoxelSuperSampling);
+
}
diff --git a/cuda/3d/cgls3d.cu b/cuda/3d/cgls3d.cu
index 5071a9b..dd0e8a0 100644
--- a/cuda/3d/cgls3d.cu
+++ b/cuda/3d/cgls3d.cu
@@ -165,7 +165,7 @@ bool CGLS::iterate(unsigned int iterations)
// p = A'*r
zeroVolumeData(D_p, dims);
- callBP(D_p, D_r);
+ callBP(D_p, D_r, 1.0f);
if (useVolumeMask)
processVol3D<opMul>(D_p, D_maskData, dims);
@@ -195,7 +195,7 @@ bool CGLS::iterate(unsigned int iterations)
// z = A'*r
zeroVolumeData(D_z, dims);
- callBP(D_z, D_r);
+ callBP(D_z, D_r, 1.0f);
if (useVolumeMask)
processVol3D<opMul>(D_z, D_maskData, dims);
@@ -242,7 +242,7 @@ bool doCGLS(cudaPitchedPtr& D_volumeData,
CGLS cgls;
bool ok = true;
- ok &= cgls.setConeGeometry(dims, angles);
+ ok &= cgls.setConeGeometry(dims, angles, 1.0f);
if (D_maskData.ptr)
ok &= cgls.enableVolumeMask();
diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu
index 5648d6f..4a41f6a 100644
--- a/cuda/3d/cone_bp.cu
+++ b/cuda/3d/cone_bp.cu
@@ -78,7 +78,8 @@ bool bindProjDataTexture(const cudaArray* array)
//__launch_bounds__(32*16, 4)
__global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAngle,
- int angleOffset, const astraCUDA3d::SDimensions3D dims)
+ int angleOffset, const astraCUDA3d::SDimensions3D dims,
+ float fOutputScale)
{
float* volData = (float*)D_volData;
@@ -147,13 +148,13 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng
endZ = dims.iVolZ - startZ;
for(int i=0; i < endZ; i++)
- volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i];
+ volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i] * fOutputScale;
} //End kernel
// supersampling version
-__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims)
+__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale)
{
float* volData = (float*)D_volData;
@@ -189,6 +190,9 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start
float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim;
const float fSubStep = 1.0f/dims.iRaysPerVoxelDim;
+ fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim);
+
+
for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f)
{
@@ -236,14 +240,15 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start
}
- volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal / (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim);
+ volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal * fOutputScale;
}
}
bool ConeBP_Array(cudaPitchedPtr D_volumeData,
cudaArray *D_projArray,
- const SDimensions3D& dims, const SConeProjection* angles)
+ const SDimensions3D& dims, const SConeProjection* angles,
+ float fOutputScale)
{
bindProjDataTexture(D_projArray);
@@ -291,9 +296,9 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData,
for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) {
// printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr);
if (dims.iRaysPerVoxelDim == 1)
- dev_cone_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims);
+ dev_cone_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
else
- dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims);
+ dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
}
cudaTextForceKernelsCompletion();
@@ -309,14 +314,15 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData,
bool ConeBP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
- const SDimensions3D& dims, const SConeProjection* angles)
+ const SDimensions3D& dims, const SConeProjection* angles,
+ float fOutputScale)
{
// transfer projections to array
cudaArray* cuArray = allocateProjectionArray(dims);
transferProjectionsToArray(D_projData, cuArray, dims);
- bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles);
+ bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale);
cudaFreeArray(cuArray);
@@ -473,7 +479,7 @@ int main()
}
#endif
- astraCUDA3d::ConeBP(volData, projData, dims, angle);
+ astraCUDA3d::ConeBP(volData, projData, dims, angle, 1.0f);
#if 0
float* buf = new float[256*256];
diff --git a/cuda/3d/cone_bp.h b/cuda/3d/cone_bp.h
index cba6d9f..4d3d2dd 100644
--- a/cuda/3d/cone_bp.h
+++ b/cuda/3d/cone_bp.h
@@ -33,13 +33,14 @@ namespace astraCUDA3d {
_AstraExport bool ConeBP_Array(cudaPitchedPtr D_volumeData,
cudaArray *D_projArray,
- const SDimensions3D& dims, const SConeProjection* angles);
+ const SDimensions3D& dims, const SConeProjection* angles,
+ float fOutputScale);
_AstraExport bool ConeBP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
- const SDimensions3D& dims, const SConeProjection* angles);
+ const SDimensions3D& dims, const SConeProjection* angles,
+ float fOutputScale);
-
}
#endif
diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu
new file mode 100644
index 0000000..6d81dc0
--- /dev/null
+++ b/cuda/3d/mem3d.cu
@@ -0,0 +1,270 @@
+/*
+-----------------------------------------------------------------------
+Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp
+ 2014-2015, CWI, Amsterdam
+
+Contact: astra@uantwerpen.be
+Website: http://sf.net/projects/astra-toolbox
+
+This file is part of the ASTRA Toolbox.
+
+
+The ASTRA Toolbox is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The ASTRA Toolbox is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
+
+-----------------------------------------------------------------------
+$Id$
+*/
+
+#include <cstdio>
+#include <cassert>
+
+#include "util3d.h"
+
+#include "mem3d.h"
+
+#include "astra3d.h"
+#include "cone_fp.h"
+#include "cone_bp.h"
+#include "par3d_fp.h"
+#include "par3d_bp.h"
+
+#include "astra/Logging.h"
+
+
+namespace astraCUDA3d {
+
+
+struct SMemHandle3D_internal
+{
+ cudaPitchedPtr ptr;
+ unsigned int nx;
+ unsigned int ny;
+ unsigned int nz;
+};
+
+size_t availableGPUMemory()
+{
+ size_t free, total;
+ cudaError_t err = cudaMemGetInfo(&free, &total);
+ if (err != cudaSuccess)
+ return 0;
+ return free;
+}
+
+MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Mem3DZeroMode zero)
+{
+ SMemHandle3D_internal hnd;
+ hnd.nx = x;
+ hnd.ny = y;
+ hnd.nz = z;
+
+ size_t free = availableGPUMemory();
+
+ cudaError_t err;
+ err = cudaMalloc3D(&hnd.ptr, make_cudaExtent(sizeof(float)*x, y, z));
+
+ if (err != cudaSuccess) {
+ return MemHandle3D();
+ }
+
+ size_t free2 = availableGPUMemory();
+
+ ASTRA_DEBUG("Allocated %d x %d x %d on GPU. (Pre: %lu, post: %lu)", x, y, z, free, free2);
+
+
+
+ if (zero == INIT_ZERO) {
+ err = cudaMemset3D(hnd.ptr, 0, make_cudaExtent(sizeof(float)*x, y, z));
+ if (err != cudaSuccess) {
+ cudaFree(hnd.ptr.ptr);
+ return MemHandle3D();
+ }
+ }
+
+ MemHandle3D ret;
+ ret.d = boost::shared_ptr<SMemHandle3D_internal>(new SMemHandle3D_internal);
+ *ret.d = hnd;
+
+ return ret;
+}
+
+bool freeGPUMemory(MemHandle3D handle)
+{
+ size_t free = availableGPUMemory();
+ cudaError_t err = cudaFree(handle.d->ptr.ptr);
+ size_t free2 = availableGPUMemory();
+
+ ASTRA_DEBUG("Freeing memory. (Pre: %lu, post: %lu)", free, free2);
+
+ return err == cudaSuccess;
+}
+
+bool copyToGPUMemory(const float *src, MemHandle3D dst, const SSubDimensions3D &pos)
+{
+ ASTRA_DEBUG("Copying %d x %d x %d to GPU", pos.subnx, pos.subny, pos.subnz);
+ ASTRA_DEBUG("Offset %d,%d,%d", pos.subx, pos.suby, pos.subz);
+ cudaPitchedPtr s;
+ s.ptr = (void*)src; // const cast away
+ s.pitch = pos.pitch * sizeof(float);
+ s.xsize = pos.nx * sizeof(float);
+ s.ysize = pos.ny;
+ ASTRA_DEBUG("Pitch %d, xsize %d, ysize %d", s.pitch, s.xsize, s.ysize);
+
+ cudaMemcpy3DParms p;
+ p.srcArray = 0;
+ p.srcPos = make_cudaPos(pos.subx * sizeof(float), pos.suby, pos.subz);
+ p.srcPtr = s;
+
+ p.dstArray = 0;
+ p.dstPos = make_cudaPos(0, 0, 0);
+ p.dstPtr = dst.d->ptr;
+
+ p.extent = make_cudaExtent(pos.subnx * sizeof(float), pos.subny, pos.subnz);
+
+ p.kind = cudaMemcpyHostToDevice;
+
+ cudaError_t err = cudaMemcpy3D(&p);
+
+ return err == cudaSuccess;
+}
+
+
+bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos)
+{
+ ASTRA_DEBUG("Copying %d x %d x %d from GPU", pos.subnx, pos.subny, pos.subnz);
+ ASTRA_DEBUG("Offset %d,%d,%d", pos.subx, pos.suby, pos.subz);
+ cudaPitchedPtr d;
+ d.ptr = (void*)dst;
+ d.pitch = pos.pitch * sizeof(float);
+ d.xsize = pos.nx * sizeof(float);
+ d.ysize = pos.ny;
+ ASTRA_DEBUG("Pitch %d, xsize %d, ysize %d", d.pitch, d.xsize, d.ysize);
+
+ cudaMemcpy3DParms p;
+ p.srcArray = 0;
+ p.srcPos = make_cudaPos(0, 0, 0);
+ p.srcPtr = src.d->ptr;
+
+ p.dstArray = 0;
+ p.dstPos = make_cudaPos(pos.subx * sizeof(float), pos.suby, pos.subz);
+ p.dstPtr = d;
+
+ p.extent = make_cudaExtent(pos.subnx * sizeof(float), pos.subny, pos.subnz);
+
+ p.kind = cudaMemcpyDeviceToHost;
+
+ cudaError_t err = cudaMemcpy3D(&p);
+
+ return err == cudaSuccess;
+
+}
+
+
+bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel)
+{
+ SDimensions3D dims;
+
+ bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
+ if (!ok)
+ return false;
+
+#if 1
+ dims.iRaysPerDetDim = iDetectorSuperSampling;
+ if (iDetectorSuperSampling == 0)
+ return false;
+#else
+ dims.iRaysPerDetDim = 1;
+ astra::Cuda3DProjectionKernel projKernel = astra::ker3d_default;
+#endif
+
+
+ SPar3DProjection* pParProjs;
+ SConeProjection* pConeProjs;
+
+ float outputScale = 1.0f;
+
+ ok = convertAstraGeometry(pVolGeom, pProjGeom,
+ pParProjs, pConeProjs,
+ outputScale);
+
+ if (pParProjs) {
+#if 0
+ for (int i = 0; i < dims.iProjAngles; ++i) {
+ ASTRA_DEBUG("Vec: %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f\n",
+ pParProjs[i].fRayX, pParProjs[i].fRayY, pParProjs[i].fRayZ,
+ pParProjs[i].fDetSX, pParProjs[i].fDetSY, pParProjs[i].fDetSZ,
+ pParProjs[i].fDetUX, pParProjs[i].fDetUY, pParProjs[i].fDetUZ,
+ pParProjs[i].fDetVX, pParProjs[i].fDetVY, pParProjs[i].fDetVZ);
+ }
+#endif
+
+ switch (projKernel) {
+ case astra::ker3d_default:
+ ok &= Par3DFP(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale);
+ break;
+ case astra::ker3d_sum_square_weights:
+ ok &= Par3DFP_SumSqW(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale*outputScale);
+ break;
+ default:
+ ok = false;
+ }
+ } else {
+ switch (projKernel) {
+ case astra::ker3d_default:
+ ok &= ConeFP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, outputScale);
+ break;
+ default:
+ ok = false;
+ }
+ }
+
+ return ok;
+}
+
+bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling)
+{
+ SDimensions3D dims;
+
+ bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
+ if (!ok)
+ return false;
+
+#if 1
+ dims.iRaysPerVoxelDim = iVoxelSuperSampling;
+#else
+ dims.iRaysPerVoxelDim = 1;
+#endif
+
+ SPar3DProjection* pParProjs;
+ SConeProjection* pConeProjs;
+
+ float outputScale = 1.0f;
+
+ ok = convertAstraGeometry(pVolGeom, pProjGeom,
+ pParProjs, pConeProjs,
+ outputScale);
+
+ if (pParProjs)
+ ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale);
+ else
+ ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, outputScale);
+
+ return ok;
+
+}
+
+
+
+
+}
diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h
new file mode 100644
index 0000000..82bad19
--- /dev/null
+++ b/cuda/3d/mem3d.h
@@ -0,0 +1,99 @@
+/*
+-----------------------------------------------------------------------
+Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp
+ 2014-2015, CWI, Amsterdam
+
+Contact: astra@uantwerpen.be
+Website: http://sf.net/projects/astra-toolbox
+
+This file is part of the ASTRA Toolbox.
+
+
+The ASTRA Toolbox is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The ASTRA Toolbox is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
+
+-----------------------------------------------------------------------
+*/
+
+#ifndef _CUDA_MEM3D_H
+#define _CUDA_MEM3D_H
+
+#include <boost/shared_ptr.hpp>
+
+#include "astra3d.h"
+
+namespace astra {
+class CVolumeGeometry3D;
+class CProjectionGeometry3D;
+}
+
+namespace astraCUDA3d {
+
+// TODO: Make it possible to delete these handles when they're no longer
+// necessary inside the FP/BP
+//
+// TODO: Add functions for querying capacity
+
+struct SMemHandle3D_internal;
+
+struct MemHandle3D {
+ boost::shared_ptr<SMemHandle3D_internal> d;
+ operator bool() const { return (bool)d; }
+};
+
+struct SSubDimensions3D {
+ unsigned int nx;
+ unsigned int ny;
+ unsigned int nz;
+ unsigned int pitch;
+ unsigned int subnx;
+ unsigned int subny;
+ unsigned int subnz;
+ unsigned int subx;
+ unsigned int suby;
+ unsigned int subz;
+};
+
+/*
+// Useful or not?
+enum Mem3DCopyMode {
+ MODE_SET,
+ MODE_ADD
+};
+*/
+
+enum Mem3DZeroMode {
+ INIT_NO,
+ INIT_ZERO
+};
+
+size_t availableGPUMemory();
+
+MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Mem3DZeroMode zero);
+
+bool copyToGPUMemory(const float *src, MemHandle3D dst, const SSubDimensions3D &pos);
+
+bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos);
+
+bool freeGPUMemory(MemHandle3D handle);
+
+
+bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel);
+
+bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling);
+
+
+
+}
+
+#endif
diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu
index 0c33280..cafab46 100644
--- a/cuda/3d/par3d_bp.cu
+++ b/cuda/3d/par3d_bp.cu
@@ -77,7 +77,7 @@ static bool bindProjDataTexture(const cudaArray* array)
}
-__global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims)
+__global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale)
{
float* volData = (float*)D_volData;
@@ -139,11 +139,11 @@ __global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAn
endZ = dims.iVolZ - startZ;
for(int i=0; i < endZ; i++)
- volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i];
+ volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i] * fOutputScale;
}
// supersampling version
-__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims)
+__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale)
{
float* volData = (float*)D_volData;
@@ -180,6 +180,9 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star
const float fSubStep = 1.0f/dims.iRaysPerVoxelDim;
+ fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim);
+
+
for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f)
{
@@ -217,14 +220,15 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star
}
- volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal / (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim);
+ volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal * fOutputScale;
}
}
bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
cudaArray *D_projArray,
- const SDimensions3D& dims, const SPar3DProjection* angles)
+ const SDimensions3D& dims, const SPar3DProjection* angles,
+ float fOutputScale)
{
bindProjDataTexture(D_projArray);
@@ -271,9 +275,9 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) {
// printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr);
if (dims.iRaysPerVoxelDim == 1)
- dev_par3D_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims);
+ dev_par3D_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
else
- dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims);
+ dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
}
cudaTextForceKernelsCompletion();
@@ -288,14 +292,15 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
bool Par3DBP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
- const SDimensions3D& dims, const SPar3DProjection* angles)
+ const SDimensions3D& dims, const SPar3DProjection* angles,
+ float fOutputScale)
{
// transfer projections to array
cudaArray* cuArray = allocateProjectionArray(dims);
transferProjectionsToArray(D_projData, cuArray, dims);
- bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles);
+ bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale);
cudaFreeArray(cuArray);
@@ -445,7 +450,7 @@ int main()
cudaMemcpy3D(&p);
}
- astraCUDA3d::Par3DBP(volData, projData, dims, angle);
+ astraCUDA3d::Par3DBP(volData, projData, dims, angle, 1.0f);
#if 1
float* buf = new float[256*256];
diff --git a/cuda/3d/par3d_bp.h b/cuda/3d/par3d_bp.h
index ece37d1..f1fc62d 100644
--- a/cuda/3d/par3d_bp.h
+++ b/cuda/3d/par3d_bp.h
@@ -33,11 +33,13 @@ namespace astraCUDA3d {
_AstraExport bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
cudaArray *D_projArray,
- const SDimensions3D& dims, const SPar3DProjection* angles);
+ const SDimensions3D& dims, const SPar3DProjection* angles,
+ float fOutputScale);
_AstraExport bool Par3DBP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
- const SDimensions3D& dims, const SPar3DProjection* angles);
+ const SDimensions3D& dims, const SPar3DProjection* angles,
+ float fOutputScale);
}
diff --git a/cuda/3d/sirt3d.cu b/cuda/3d/sirt3d.cu
index 389ee6b..484521e 100644
--- a/cuda/3d/sirt3d.cu
+++ b/cuda/3d/sirt3d.cu
@@ -160,10 +160,10 @@ bool SIRT::precomputeWeights()
zeroVolumeData(D_pixelWeight, dims);
if (useSinogramMask) {
- callBP(D_pixelWeight, D_smaskData);
+ callBP(D_pixelWeight, D_smaskData, 1.0f);
} else {
processSino3D<opSet>(D_projData, 1.0f, dims);
- callBP(D_pixelWeight, D_projData);
+ callBP(D_pixelWeight, D_projData, 1.0f);
}
#if 0
float* bufp = new float[512*512];
@@ -293,7 +293,7 @@ bool SIRT::iterate(unsigned int iterations)
#endif
- callBP(D_tmpData, D_projData);
+ callBP(D_tmpData, D_projData, 1.0f);
#if 0
printf("Dumping tmpData: %p\n", (void*)D_tmpData.ptr);
float* buf = new float[256*256];
@@ -347,7 +347,7 @@ bool doSIRT(cudaPitchedPtr& D_volumeData,
SIRT sirt;
bool ok = true;
- ok &= sirt.setConeGeometry(dims, angles);
+ ok &= sirt.setConeGeometry(dims, angles, 1.0f);
if (D_maskData.ptr)
ok &= sirt.enableVolumeMask();