summaryrefslogtreecommitdiffstats
path: root/cuda/2d/fan_bp.cu
diff options
context:
space:
mode:
authorWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2016-07-28 17:05:24 +0200
committerWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2016-07-28 17:05:24 +0200
commitb2611a03577c285ddf48edab0d05dafa09ab362c (patch)
treec1d2f1b5166ba23f55e68e8faf0832f7c540f787 /cuda/2d/fan_bp.cu
parent1ff4a270a7df1edb54dd91fe653d6a936b959b3a (diff)
parent53249b3ad63f0d08b9862a75602acf263d230d77 (diff)
downloadastra-b2611a03577c285ddf48edab0d05dafa09ab362c.tar.gz
astra-b2611a03577c285ddf48edab0d05dafa09ab362c.tar.bz2
astra-b2611a03577c285ddf48edab0d05dafa09ab362c.tar.xz
astra-b2611a03577c285ddf48edab0d05dafa09ab362c.zip
Merge branch 'master' into parvec
Diffstat (limited to 'cuda/2d/fan_bp.cu')
-rw-r--r--cuda/2d/fan_bp.cu47
1 files changed, 27 insertions, 20 deletions
diff --git a/cuda/2d/fan_bp.cu b/cuda/2d/fan_bp.cu
index 74e8b12..b4321ba 100644
--- a/cuda/2d/fan_bp.cu
+++ b/cuda/2d/fan_bp.cu
@@ -77,7 +77,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi
return true;
}
-__global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims)
+__global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -121,11 +121,11 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s
fA += 1.0f;
}
- volData[Y*volPitch+X] += fVal;
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
// supersampling version
-__global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims)
+__global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -146,6 +146,8 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
float* volData = (float*)D_volData;
+ fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
+
float fVal = 0.0f;
float fA = startAngle + 0.5f;
@@ -180,14 +182,14 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
fA += 1.0f;
}
- volData[Y*volPitch+X] += fVal / (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
// BP specifically for SART.
// It includes (free) weighting with voxel weight.
// It assumes the proj texture is set up _without_ padding, unlike regular BP.
-__global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDimensions dims)
+__global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -222,12 +224,12 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi
const float fT = fNum / fDen;
const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f);
- volData[Y*volPitch+X] += fVal;
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
// Weighted BP for use in fan beam FBP
// Each pixel/ray is weighted by 1/L^2 where L is the distance to the source.
-__global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims)
+__global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -273,13 +275,14 @@ __global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, un
fA += 1.0f;
}
- volData[Y*volPitch+X] += fVal;
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
assert(dims.iProjAngles <= g_MaxAngles);
@@ -310,9 +313,9 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
if (dims.iRaysPerPixelDim > 1)
- devFanBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims);
+ devFanBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
else
- devFanBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims);
+ devFanBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
}
cudaThreadSynchronize();
@@ -325,7 +328,8 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
assert(dims.iProjAngles <= g_MaxAngles);
@@ -355,7 +359,7 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
cudaStreamCreate(&stream);
for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
- devFanBP_FBPWeighted<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims);
+ devFanBP_FBPWeighted<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
}
cudaThreadSynchronize();
@@ -370,7 +374,8 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
unsigned int angle,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
// only one angle
bindProjDataTexture(D_projData, projPitch, dims.iProjDets, 1, cudaAddressModeClamp);
@@ -391,7 +396,7 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
(dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize);
- devFanBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, dims);
+ devFanBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, dims, fOutputScale);
cudaThreadSynchronize();
cudaTextForceKernelsCompletion();
@@ -401,7 +406,8 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
bool FanBP(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {
SDimensions subdims = dims;
@@ -413,7 +419,7 @@ bool FanBP(float* D_volumeData, unsigned int volumePitch,
bool ret;
ret = FanBP_internal(D_volumeData, volumePitch,
D_projData + iAngle * projPitch, projPitch,
- subdims, angles + iAngle);
+ subdims, angles + iAngle, fOutputScale);
if (!ret)
return false;
}
@@ -422,7 +428,8 @@ bool FanBP(float* D_volumeData, unsigned int volumePitch,
bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {
SDimensions subdims = dims;
@@ -434,7 +441,7 @@ bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch,
bool ret;
ret = FanBP_FBPWeighted_internal(D_volumeData, volumePitch,
D_projData + iAngle * projPitch, projPitch,
- subdims, angles + iAngle);
+ subdims, angles + iAngle, fOutputScale);
if (!ret)
return false;
@@ -498,7 +505,7 @@ int main()
copyVolumeToDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);
copySinogramToDevice(sino, dims.iProjDets, dims.iProjDets, dims.iProjAngles, D_projData, projPitch);
- FanBP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs);
+ FanBP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs, 1.0f);
copyVolumeFromDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);