summaryrefslogtreecommitdiffstats
path: root/cuda/2d
diff options
context:
space:
mode:
authorWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2015-03-12 14:28:03 +0100
committerWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2015-03-12 14:28:03 +0100
commit9458268a8b9192af98fc1b88bf0a5fbbc7696a77 (patch)
treee0f4dc2bfa82ee9770958de4a39b586de0b5e4ef /cuda/2d
parent1a7dfb0964fa7686aa01f0d836f95910dc4dc07f (diff)
downloadastra-9458268a8b9192af98fc1b88bf0a5fbbc7696a77.tar.gz
astra-9458268a8b9192af98fc1b88bf0a5fbbc7696a77.tar.bz2
astra-9458268a8b9192af98fc1b88bf0a5fbbc7696a77.tar.xz
astra-9458268a8b9192af98fc1b88bf0a5fbbc7696a77.zip
Add outputScale argument to 2D CUDA BP
Diffstat (limited to 'cuda/2d')
-rw-r--r--cuda/2d/algo.cu7
-rw-r--r--cuda/2d/algo.h3
-rw-r--r--cuda/2d/astra.cu12
-rw-r--r--cuda/2d/cgls.cu4
-rw-r--r--cuda/2d/em.cu4
-rw-r--r--cuda/2d/fan_bp.cu47
-rw-r--r--cuda/2d/fan_bp.h9
-rw-r--r--cuda/2d/par_bp.cu31
-rw-r--r--cuda/2d/par_bp.h4
-rw-r--r--cuda/2d/sart.cu10
-rw-r--r--cuda/2d/sart.h2
-rw-r--r--cuda/2d/sirt.cu6
12 files changed, 76 insertions, 63 deletions
diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu
index 144fabd..dc74e51 100644
--- a/cuda/2d/algo.cu
+++ b/cuda/2d/algo.cu
@@ -336,16 +336,17 @@ bool ReconAlgo::callFP(float* D_volumeData, unsigned int volumePitch,
}
bool ReconAlgo::callBP(float* D_volumeData, unsigned int volumePitch,
- float* D_projData, unsigned int projPitch)
+ float* D_projData, unsigned int projPitch,
+ float outputScale)
{
if (angles) {
assert(!fanProjs);
return BP(D_volumeData, volumePitch, D_projData, projPitch,
- dims, angles, TOffsets);
+ dims, angles, TOffsets, outputScale);
} else {
assert(fanProjs);
return FanBP(D_volumeData, volumePitch, D_projData, projPitch,
- dims, fanProjs);
+ dims, fanProjs, outputScale);
}
}
diff --git a/cuda/2d/algo.h b/cuda/2d/algo.h
index a75905e..99959c8 100644
--- a/cuda/2d/algo.h
+++ b/cuda/2d/algo.h
@@ -118,7 +118,8 @@ protected:
float* D_projData, unsigned int projPitch,
float outputScale);
bool callBP(float* D_volumeData, unsigned int volumePitch,
- float* D_projData, unsigned int projPitch);
+ float* D_projData, unsigned int projPitch,
+ float outputScale);
SDimensions dims;
diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu
index 0b5be06..5e2a07a 100644
--- a/cuda/2d/astra.cu
+++ b/cuda/2d/astra.cu
@@ -367,21 +367,19 @@ bool AstraFBP::run()
}
+ float fOutputScale = (M_PI / 2.0f) / (float)pData->dims.iProjAngles;
+
if (pData->bFanBeam) {
- ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections);
+ ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections, fOutputScale);
} else {
- ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets);
+ ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets, fOutputScale);
}
if(!ok)
{
return false;
}
- processVol<opMul>(pData->D_volumeData,
- (M_PI / 2.0f) / (float)pData->dims.iProjAngles,
- pData->volumePitch, pData->dims);
-
return true;
}
@@ -593,7 +591,7 @@ bool BPalgo::iterate(unsigned int)
{
// TODO: This zeroVolume makes an earlier memcpy of D_volumeData redundant
zeroVolumeData(D_volumeData, volumePitch, dims);
- callBP(D_volumeData, volumePitch, D_sinoData, sinoPitch);
+ callBP(D_volumeData, volumePitch, D_sinoData, sinoPitch, 1.0f);
return true;
}
diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu
index 9ead563..f402914 100644
--- a/cuda/2d/cgls.cu
+++ b/cuda/2d/cgls.cu
@@ -135,7 +135,7 @@ bool CGLS::iterate(unsigned int iterations)
// p = A'*r
zeroVolumeData(D_p, pPitch, dims);
- callBP(D_p, pPitch, D_r, rPitch);
+ callBP(D_p, pPitch, D_r, rPitch, 1.0f);
if (useVolumeMask)
processVol<opMul>(D_p, D_maskData, pPitch, dims);
@@ -166,7 +166,7 @@ bool CGLS::iterate(unsigned int iterations)
// z = A'*r
zeroVolumeData(D_z, zPitch, dims);
- callBP(D_z, zPitch, D_r, rPitch);
+ callBP(D_z, zPitch, D_r, rPitch, 1.0f);
if (useVolumeMask)
processVol<opMul>(D_z, D_maskData, zPitch, dims);
diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu
index 00127c0..8593b08 100644
--- a/cuda/2d/em.cu
+++ b/cuda/2d/em.cu
@@ -102,7 +102,7 @@ bool EM::precomputeWeights()
#endif
{
processSino<opSet>(D_projData, 1.0f, projPitch, dims);
- callBP(D_pixelWeight, pixelPitch, D_projData, projPitch);
+ callBP(D_pixelWeight, pixelPitch, D_projData, projPitch, 1.0f);
}
processVol<opInvert>(D_pixelWeight, pixelPitch, dims);
@@ -137,7 +137,7 @@ bool EM::iterate(unsigned int iterations)
// Do BP of projData into tmpData
zeroVolumeData(D_tmpData, tmpPitch, dims);
- callBP(D_tmpData, tmpPitch, D_projData, projPitch);
+ callBP(D_tmpData, tmpPitch, D_projData, projPitch, 1.0f);
// Multiply volumeData with tmpData divided by pixel weights
processVol<opMul2>(D_volumeData, D_tmpData, D_pixelWeight, pixelPitch, dims);
diff --git a/cuda/2d/fan_bp.cu b/cuda/2d/fan_bp.cu
index 74e8b12..b4321ba 100644
--- a/cuda/2d/fan_bp.cu
+++ b/cuda/2d/fan_bp.cu
@@ -77,7 +77,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi
return true;
}
-__global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims)
+__global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -121,11 +121,11 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s
fA += 1.0f;
}
- volData[Y*volPitch+X] += fVal;
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
// supersampling version
-__global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims)
+__global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -146,6 +146,8 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
float* volData = (float*)D_volData;
+ fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
+
float fVal = 0.0f;
float fA = startAngle + 0.5f;
@@ -180,14 +182,14 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
fA += 1.0f;
}
- volData[Y*volPitch+X] += fVal / (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
// BP specifically for SART.
// It includes (free) weighting with voxel weight.
// It assumes the proj texture is set up _without_ padding, unlike regular BP.
-__global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDimensions dims)
+__global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -222,12 +224,12 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi
const float fT = fNum / fDen;
const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f);
- volData[Y*volPitch+X] += fVal;
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
// Weighted BP for use in fan beam FBP
// Each pixel/ray is weighted by 1/L^2 where L is the distance to the source.
-__global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims)
+__global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -273,13 +275,14 @@ __global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, un
fA += 1.0f;
}
- volData[Y*volPitch+X] += fVal;
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
assert(dims.iProjAngles <= g_MaxAngles);
@@ -310,9 +313,9 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
if (dims.iRaysPerPixelDim > 1)
- devFanBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims);
+ devFanBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
else
- devFanBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims);
+ devFanBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
}
cudaThreadSynchronize();
@@ -325,7 +328,8 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
assert(dims.iProjAngles <= g_MaxAngles);
@@ -355,7 +359,7 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
cudaStreamCreate(&stream);
for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
- devFanBP_FBPWeighted<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims);
+ devFanBP_FBPWeighted<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
}
cudaThreadSynchronize();
@@ -370,7 +374,8 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
unsigned int angle,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
// only one angle
bindProjDataTexture(D_projData, projPitch, dims.iProjDets, 1, cudaAddressModeClamp);
@@ -391,7 +396,7 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
(dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize);
- devFanBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, dims);
+ devFanBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, dims, fOutputScale);
cudaThreadSynchronize();
cudaTextForceKernelsCompletion();
@@ -401,7 +406,8 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
bool FanBP(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {
SDimensions subdims = dims;
@@ -413,7 +419,7 @@ bool FanBP(float* D_volumeData, unsigned int volumePitch,
bool ret;
ret = FanBP_internal(D_volumeData, volumePitch,
D_projData + iAngle * projPitch, projPitch,
- subdims, angles + iAngle);
+ subdims, angles + iAngle, fOutputScale);
if (!ret)
return false;
}
@@ -422,7 +428,8 @@ bool FanBP(float* D_volumeData, unsigned int volumePitch,
bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {
SDimensions subdims = dims;
@@ -434,7 +441,7 @@ bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch,
bool ret;
ret = FanBP_FBPWeighted_internal(D_volumeData, volumePitch,
D_projData + iAngle * projPitch, projPitch,
- subdims, angles + iAngle);
+ subdims, angles + iAngle, fOutputScale);
if (!ret)
return false;
@@ -498,7 +505,7 @@ int main()
copyVolumeToDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);
copySinogramToDevice(sino, dims.iProjDets, dims.iProjDets, dims.iProjAngles, D_projData, projPitch);
- FanBP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs);
+ FanBP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs, 1.0f);
copyVolumeFromDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);
diff --git a/cuda/2d/fan_bp.h b/cuda/2d/fan_bp.h
index e4e69b0..3ebe1e8 100644
--- a/cuda/2d/fan_bp.h
+++ b/cuda/2d/fan_bp.h
@@ -33,16 +33,19 @@ namespace astraCUDA {
_AstraExport bool FanBP(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles);
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale);
_AstraExport bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
unsigned int angle,
- const SDimensions& dims, const SFanProjection* angles);
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale);
_AstraExport bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles);
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale);
}
diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu
index 635200f..d9f7325 100644
--- a/cuda/2d/par_bp.cu
+++ b/cuda/2d/par_bp.cu
@@ -73,7 +73,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi
return true;
}
-__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims)
+__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -123,11 +123,11 @@ __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int star
}
- volData[Y*volPitch+X] += fVal;
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
// supersampling version
-__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims)
+__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -152,6 +152,8 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s
float fA = startAngle + 0.5f;
const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f;
+ fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
+
if (offsets) {
for (int angle = startAngle; angle < endAngle; ++angle)
@@ -196,10 +198,10 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s
}
- volData[Y*volPitch+X] += fVal / (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
-__global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset, float angle_sin, float angle_cos, const SDimensions dims)
+__global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset, float angle_sin, float angle_cos, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -218,13 +220,13 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset
const float fT = fT_base + fX * angle_cos - fY * angle_sin + offset;
const float fVal = tex2D(gT_projTexture, fT, 0.5f);
- D_volData[Y*volPitch+X] += fVal;
+ D_volData[Y*volPitch+X] += fVal * fOutputScale;
}
bool BP_internal(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const float* angles, const float* TOffsets)
+ const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale)
{
// TODO: process angles block by block
assert(dims.iProjAngles <= g_MaxAngles);
@@ -261,9 +263,9 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,
for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
if (dims.iRaysPerPixelDim > 1)
- devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims);
+ devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale);
else
- devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims);
+ devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale);
}
cudaThreadSynchronize();
@@ -276,7 +278,7 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,
bool BP(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const float* angles, const float* TOffsets)
+ const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale)
{
for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {
SDimensions subdims = dims;
@@ -289,7 +291,8 @@ bool BP(float* D_volumeData, unsigned int volumePitch,
ret = BP_internal(D_volumeData, volumePitch,
D_projData + iAngle * projPitch, projPitch,
subdims, angles + iAngle,
- TOffsets ? TOffsets + iAngle : 0);
+ TOffsets ? TOffsets + iAngle : 0,
+ fOutputScale);
if (!ret)
return false;
}
@@ -300,7 +303,7 @@ bool BP(float* D_volumeData, unsigned int volumePitch,
bool BP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
unsigned int angle, const SDimensions& dims,
- const float* angles, const float* TOffsets)
+ const float* angles, const float* TOffsets, float fOutputScale)
{
// Only one angle.
// We need to Clamp to the border pixels instead of to zero, because
@@ -318,7 +321,7 @@ bool BP_SART(float* D_volumeData, unsigned int volumePitch,
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
(dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize);
- devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims);
+ devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims, fOutputScale);
cudaThreadSynchronize();
cudaTextForceKernelsCompletion();
@@ -369,7 +372,7 @@ int main()
for (unsigned int i = 0; i < dims.iProjAngles; ++i)
angle[i] = i*(M_PI/dims.iProjAngles);
- BP(D_volumeData, volumePitch, D_projData, projPitch, dims, angle, 0);
+ BP(D_volumeData, volumePitch, D_projData, projPitch, dims, angle, 0, 1.0f);
delete[] angle;
diff --git a/cuda/2d/par_bp.h b/cuda/2d/par_bp.h
index eaeafd8..64bcd34 100644
--- a/cuda/2d/par_bp.h
+++ b/cuda/2d/par_bp.h
@@ -36,12 +36,12 @@ namespace astraCUDA {
_AstraExport bool BP(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
const SDimensions& dims, const float* angles,
- const float* TOffsets);
+ const float* TOffsets, float fOutputScale);
_AstraExport bool BP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
unsigned int angle, const SDimensions& dims,
- const float* angles, const float* TOffsets);
+ const float* angles, const float* TOffsets, float fOutputScale);
}
diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu
index 29670c3..e5cb5bb 100644
--- a/cuda/2d/sart.cu
+++ b/cuda/2d/sart.cu
@@ -200,10 +200,10 @@ bool SART::iterate(unsigned int iterations)
// BP, mask, and add back
// TODO: Try putting the masking directly in the BP
zeroVolumeData(D_tmpData, tmpPitch, dims);
- callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle);
+ callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle, 1.0f);
processVol<opAddMul>(D_volumeData, D_maskData, D_tmpData, volumePitch, dims);
} else {
- callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle);
+ callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle, 1.0f);
}
if (useMinConstraint)
@@ -264,16 +264,16 @@ bool SART::callFP_SART(float* D_volumeData, unsigned int volumePitch,
bool SART::callBP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- unsigned int angle)
+ unsigned int angle, float outputScale)
{
if (angles) {
assert(!fanProjs);
return BP_SART(D_volumeData, volumePitch, D_projData, projPitch,
- angle, dims, angles, TOffsets);
+ angle, dims, angles, TOffsets, outputScale);
} else {
assert(fanProjs);
return FanBP_SART(D_volumeData, volumePitch, D_projData, projPitch,
- angle, dims, fanProjs);
+ angle, dims, fanProjs, outputScale);
}
}
diff --git a/cuda/2d/sart.h b/cuda/2d/sart.h
index 6574a6f..7dcd641 100644
--- a/cuda/2d/sart.h
+++ b/cuda/2d/sart.h
@@ -59,7 +59,7 @@ protected:
unsigned int angle, float outputScale);
bool callBP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- unsigned int angle);
+ unsigned int angle, float outputScale);
// projection angle variables
diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu
index a6194a5..162ee77 100644
--- a/cuda/2d/sirt.cu
+++ b/cuda/2d/sirt.cu
@@ -127,10 +127,10 @@ bool SIRT::precomputeWeights()
zeroVolumeData(D_pixelWeight, pixelPitch, dims);
if (useSinogramMask) {
- callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch);
+ callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch, 1.0f);
} else {
processSino<opSet>(D_projData, 1.0f, projPitch, dims);
- callBP(D_pixelWeight, pixelPitch, D_projData, projPitch);
+ callBP(D_pixelWeight, pixelPitch, D_projData, projPitch, 1.0f);
}
processVol<opInvert>(D_pixelWeight, pixelPitch, dims);
@@ -251,7 +251,7 @@ bool SIRT::iterate(unsigned int iterations)
zeroVolumeData(D_tmpData, tmpPitch, dims);
- callBP(D_tmpData, tmpPitch, D_projData, projPitch);
+ callBP(D_tmpData, tmpPitch, D_projData, projPitch, 1.0f);
processVol<opAddMul>(D_volumeData, D_pixelWeight, D_tmpData, volumePitch, dims);