diff options
author | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2015-03-12 14:28:03 +0100 |
---|---|---|
committer | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2015-03-12 14:28:03 +0100 |
commit | 9458268a8b9192af98fc1b88bf0a5fbbc7696a77 (patch) | |
tree | e0f4dc2bfa82ee9770958de4a39b586de0b5e4ef /cuda/2d | |
parent | 1a7dfb0964fa7686aa01f0d836f95910dc4dc07f (diff) | |
download | astra-9458268a8b9192af98fc1b88bf0a5fbbc7696a77.tar.gz astra-9458268a8b9192af98fc1b88bf0a5fbbc7696a77.tar.bz2 astra-9458268a8b9192af98fc1b88bf0a5fbbc7696a77.tar.xz astra-9458268a8b9192af98fc1b88bf0a5fbbc7696a77.zip |
Add outputScale argument to 2D CUDA BP
Diffstat (limited to 'cuda/2d')
-rw-r--r-- | cuda/2d/algo.cu | 7 | ||||
-rw-r--r-- | cuda/2d/algo.h | 3 | ||||
-rw-r--r-- | cuda/2d/astra.cu | 12 | ||||
-rw-r--r-- | cuda/2d/cgls.cu | 4 | ||||
-rw-r--r-- | cuda/2d/em.cu | 4 | ||||
-rw-r--r-- | cuda/2d/fan_bp.cu | 47 | ||||
-rw-r--r-- | cuda/2d/fan_bp.h | 9 | ||||
-rw-r--r-- | cuda/2d/par_bp.cu | 31 | ||||
-rw-r--r-- | cuda/2d/par_bp.h | 4 | ||||
-rw-r--r-- | cuda/2d/sart.cu | 10 | ||||
-rw-r--r-- | cuda/2d/sart.h | 2 | ||||
-rw-r--r-- | cuda/2d/sirt.cu | 6 |
12 files changed, 76 insertions, 63 deletions
diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu index 144fabd..dc74e51 100644 --- a/cuda/2d/algo.cu +++ b/cuda/2d/algo.cu @@ -336,16 +336,17 @@ bool ReconAlgo::callFP(float* D_volumeData, unsigned int volumePitch, } bool ReconAlgo::callBP(float* D_volumeData, unsigned int volumePitch, - float* D_projData, unsigned int projPitch) + float* D_projData, unsigned int projPitch, + float outputScale) { if (angles) { assert(!fanProjs); return BP(D_volumeData, volumePitch, D_projData, projPitch, - dims, angles, TOffsets); + dims, angles, TOffsets, outputScale); } else { assert(fanProjs); return FanBP(D_volumeData, volumePitch, D_projData, projPitch, - dims, fanProjs); + dims, fanProjs, outputScale); } } diff --git a/cuda/2d/algo.h b/cuda/2d/algo.h index a75905e..99959c8 100644 --- a/cuda/2d/algo.h +++ b/cuda/2d/algo.h @@ -118,7 +118,8 @@ protected: float* D_projData, unsigned int projPitch, float outputScale); bool callBP(float* D_volumeData, unsigned int volumePitch, - float* D_projData, unsigned int projPitch); + float* D_projData, unsigned int projPitch, + float outputScale); SDimensions dims; diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu index 0b5be06..5e2a07a 100644 --- a/cuda/2d/astra.cu +++ b/cuda/2d/astra.cu @@ -367,21 +367,19 @@ bool AstraFBP::run() } + float fOutputScale = (M_PI / 2.0f) / (float)pData->dims.iProjAngles; + if (pData->bFanBeam) { - ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections); + ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections, fOutputScale); } else { - ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets); + ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets, fOutputScale); } if(!ok) { return false; } - processVol<opMul>(pData->D_volumeData, - (M_PI / 2.0f) / (float)pData->dims.iProjAngles, - pData->volumePitch, pData->dims); - return true; } @@ -593,7 +591,7 @@ bool BPalgo::iterate(unsigned int) { // TODO: This zeroVolume makes an earlier memcpy of D_volumeData redundant zeroVolumeData(D_volumeData, volumePitch, dims); - callBP(D_volumeData, volumePitch, D_sinoData, sinoPitch); + callBP(D_volumeData, volumePitch, D_sinoData, sinoPitch, 1.0f); return true; } diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu index 9ead563..f402914 100644 --- a/cuda/2d/cgls.cu +++ b/cuda/2d/cgls.cu @@ -135,7 +135,7 @@ bool CGLS::iterate(unsigned int iterations) // p = A'*r zeroVolumeData(D_p, pPitch, dims); - callBP(D_p, pPitch, D_r, rPitch); + callBP(D_p, pPitch, D_r, rPitch, 1.0f); if (useVolumeMask) processVol<opMul>(D_p, D_maskData, pPitch, dims); @@ -166,7 +166,7 @@ bool CGLS::iterate(unsigned int iterations) // z = A'*r zeroVolumeData(D_z, zPitch, dims); - callBP(D_z, zPitch, D_r, rPitch); + callBP(D_z, zPitch, D_r, rPitch, 1.0f); if (useVolumeMask) processVol<opMul>(D_z, D_maskData, zPitch, dims); diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu index 00127c0..8593b08 100644 --- a/cuda/2d/em.cu +++ b/cuda/2d/em.cu @@ -102,7 +102,7 @@ bool EM::precomputeWeights() #endif { processSino<opSet>(D_projData, 1.0f, projPitch, dims); - callBP(D_pixelWeight, pixelPitch, D_projData, projPitch); + callBP(D_pixelWeight, pixelPitch, D_projData, projPitch, 1.0f); } processVol<opInvert>(D_pixelWeight, pixelPitch, dims); @@ -137,7 +137,7 @@ bool EM::iterate(unsigned int iterations) // Do BP of projData into tmpData zeroVolumeData(D_tmpData, tmpPitch, dims); - callBP(D_tmpData, tmpPitch, D_projData, projPitch); + callBP(D_tmpData, tmpPitch, D_projData, projPitch, 1.0f); // Multiply volumeData with tmpData divided by pixel weights processVol<opMul2>(D_volumeData, D_tmpData, D_pixelWeight, pixelPitch, dims); diff --git a/cuda/2d/fan_bp.cu b/cuda/2d/fan_bp.cu index 74e8b12..b4321ba 100644 --- a/cuda/2d/fan_bp.cu +++ b/cuda/2d/fan_bp.cu @@ -77,7 +77,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi return true; } -__global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims) +__global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -121,11 +121,11 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s fA += 1.0f; } - volData[Y*volPitch+X] += fVal; + volData[Y*volPitch+X] += fVal * fOutputScale; } // supersampling version -__global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims) +__global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -146,6 +146,8 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in float* volData = (float*)D_volData; + fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim); + float fVal = 0.0f; float fA = startAngle + 0.5f; @@ -180,14 +182,14 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in fA += 1.0f; } - volData[Y*volPitch+X] += fVal / (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim); + volData[Y*volPitch+X] += fVal * fOutputScale; } // BP specifically for SART. // It includes (free) weighting with voxel weight. // It assumes the proj texture is set up _without_ padding, unlike regular BP. -__global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDimensions dims) +__global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -222,12 +224,12 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi const float fT = fNum / fDen; const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f); - volData[Y*volPitch+X] += fVal; + volData[Y*volPitch+X] += fVal * fOutputScale; } // Weighted BP for use in fan beam FBP // Each pixel/ray is weighted by 1/L^2 where L is the distance to the source. -__global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims) +__global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -273,13 +275,14 @@ __global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, un fA += 1.0f; } - volData[Y*volPitch+X] += fVal; + volData[Y*volPitch+X] += fVal * fOutputScale; } bool FanBP_internal(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const SFanProjection* angles) + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale) { assert(dims.iProjAngles <= g_MaxAngles); @@ -310,9 +313,9 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch, for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) { if (dims.iRaysPerPixelDim > 1) - devFanBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims); + devFanBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale); else - devFanBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims); + devFanBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale); } cudaThreadSynchronize(); @@ -325,7 +328,8 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch, bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const SFanProjection* angles) + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale) { assert(dims.iProjAngles <= g_MaxAngles); @@ -355,7 +359,7 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch, cudaStreamCreate(&stream); for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) { - devFanBP_FBPWeighted<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims); + devFanBP_FBPWeighted<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale); } cudaThreadSynchronize(); @@ -370,7 +374,8 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch, bool FanBP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, unsigned int angle, - const SDimensions& dims, const SFanProjection* angles) + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale) { // only one angle bindProjDataTexture(D_projData, projPitch, dims.iProjDets, 1, cudaAddressModeClamp); @@ -391,7 +396,7 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch, dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices, (dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize); - devFanBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, dims); + devFanBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, dims, fOutputScale); cudaThreadSynchronize(); cudaTextForceKernelsCompletion(); @@ -401,7 +406,8 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch, bool FanBP(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const SFanProjection* angles) + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale) { for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) { SDimensions subdims = dims; @@ -413,7 +419,7 @@ bool FanBP(float* D_volumeData, unsigned int volumePitch, bool ret; ret = FanBP_internal(D_volumeData, volumePitch, D_projData + iAngle * projPitch, projPitch, - subdims, angles + iAngle); + subdims, angles + iAngle, fOutputScale); if (!ret) return false; } @@ -422,7 +428,8 @@ bool FanBP(float* D_volumeData, unsigned int volumePitch, bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const SFanProjection* angles) + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale) { for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) { SDimensions subdims = dims; @@ -434,7 +441,7 @@ bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch, bool ret; ret = FanBP_FBPWeighted_internal(D_volumeData, volumePitch, D_projData + iAngle * projPitch, projPitch, - subdims, angles + iAngle); + subdims, angles + iAngle, fOutputScale); if (!ret) return false; @@ -498,7 +505,7 @@ int main() copyVolumeToDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch); copySinogramToDevice(sino, dims.iProjDets, dims.iProjDets, dims.iProjAngles, D_projData, projPitch); - FanBP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs); + FanBP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs, 1.0f); copyVolumeFromDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch); diff --git a/cuda/2d/fan_bp.h b/cuda/2d/fan_bp.h index e4e69b0..3ebe1e8 100644 --- a/cuda/2d/fan_bp.h +++ b/cuda/2d/fan_bp.h @@ -33,16 +33,19 @@ namespace astraCUDA { _AstraExport bool FanBP(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const SFanProjection* angles); + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale); _AstraExport bool FanBP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, unsigned int angle, - const SDimensions& dims, const SFanProjection* angles); + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale); _AstraExport bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const SFanProjection* angles); + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale); } diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu index 635200f..d9f7325 100644 --- a/cuda/2d/par_bp.cu +++ b/cuda/2d/par_bp.cu @@ -73,7 +73,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi return true; } -__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims) +__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -123,11 +123,11 @@ __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int star } - volData[Y*volPitch+X] += fVal; + volData[Y*volPitch+X] += fVal * fOutputScale; } // supersampling version -__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims) +__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -152,6 +152,8 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s float fA = startAngle + 0.5f; const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f; + fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim); + if (offsets) { for (int angle = startAngle; angle < endAngle; ++angle) @@ -196,10 +198,10 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s } - volData[Y*volPitch+X] += fVal / (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim); + volData[Y*volPitch+X] += fVal * fOutputScale; } -__global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset, float angle_sin, float angle_cos, const SDimensions dims) +__global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset, float angle_sin, float angle_cos, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -218,13 +220,13 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset const float fT = fT_base + fX * angle_cos - fY * angle_sin + offset; const float fVal = tex2D(gT_projTexture, fT, 0.5f); - D_volData[Y*volPitch+X] += fVal; + D_volData[Y*volPitch+X] += fVal * fOutputScale; } bool BP_internal(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const float* angles, const float* TOffsets) + const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale) { // TODO: process angles block by block assert(dims.iProjAngles <= g_MaxAngles); @@ -261,9 +263,9 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch, for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) { if (dims.iRaysPerPixelDim > 1) - devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims); + devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale); else - devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims); + devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale); } cudaThreadSynchronize(); @@ -276,7 +278,7 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch, bool BP(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const float* angles, const float* TOffsets) + const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale) { for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) { SDimensions subdims = dims; @@ -289,7 +291,8 @@ bool BP(float* D_volumeData, unsigned int volumePitch, ret = BP_internal(D_volumeData, volumePitch, D_projData + iAngle * projPitch, projPitch, subdims, angles + iAngle, - TOffsets ? TOffsets + iAngle : 0); + TOffsets ? TOffsets + iAngle : 0, + fOutputScale); if (!ret) return false; } @@ -300,7 +303,7 @@ bool BP(float* D_volumeData, unsigned int volumePitch, bool BP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, unsigned int angle, const SDimensions& dims, - const float* angles, const float* TOffsets) + const float* angles, const float* TOffsets, float fOutputScale) { // Only one angle. // We need to Clamp to the border pixels instead of to zero, because @@ -318,7 +321,7 @@ bool BP_SART(float* D_volumeData, unsigned int volumePitch, dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices, (dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize); - devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims); + devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims, fOutputScale); cudaThreadSynchronize(); cudaTextForceKernelsCompletion(); @@ -369,7 +372,7 @@ int main() for (unsigned int i = 0; i < dims.iProjAngles; ++i) angle[i] = i*(M_PI/dims.iProjAngles); - BP(D_volumeData, volumePitch, D_projData, projPitch, dims, angle, 0); + BP(D_volumeData, volumePitch, D_projData, projPitch, dims, angle, 0, 1.0f); delete[] angle; diff --git a/cuda/2d/par_bp.h b/cuda/2d/par_bp.h index eaeafd8..64bcd34 100644 --- a/cuda/2d/par_bp.h +++ b/cuda/2d/par_bp.h @@ -36,12 +36,12 @@ namespace astraCUDA { _AstraExport bool BP(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, const SDimensions& dims, const float* angles, - const float* TOffsets); + const float* TOffsets, float fOutputScale); _AstraExport bool BP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, unsigned int angle, const SDimensions& dims, - const float* angles, const float* TOffsets); + const float* angles, const float* TOffsets, float fOutputScale); } diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu index 29670c3..e5cb5bb 100644 --- a/cuda/2d/sart.cu +++ b/cuda/2d/sart.cu @@ -200,10 +200,10 @@ bool SART::iterate(unsigned int iterations) // BP, mask, and add back // TODO: Try putting the masking directly in the BP zeroVolumeData(D_tmpData, tmpPitch, dims); - callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle); + callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle, 1.0f); processVol<opAddMul>(D_volumeData, D_maskData, D_tmpData, volumePitch, dims); } else { - callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle); + callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle, 1.0f); } if (useMinConstraint) @@ -264,16 +264,16 @@ bool SART::callFP_SART(float* D_volumeData, unsigned int volumePitch, bool SART::callBP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - unsigned int angle) + unsigned int angle, float outputScale) { if (angles) { assert(!fanProjs); return BP_SART(D_volumeData, volumePitch, D_projData, projPitch, - angle, dims, angles, TOffsets); + angle, dims, angles, TOffsets, outputScale); } else { assert(fanProjs); return FanBP_SART(D_volumeData, volumePitch, D_projData, projPitch, - angle, dims, fanProjs); + angle, dims, fanProjs, outputScale); } } diff --git a/cuda/2d/sart.h b/cuda/2d/sart.h index 6574a6f..7dcd641 100644 --- a/cuda/2d/sart.h +++ b/cuda/2d/sart.h @@ -59,7 +59,7 @@ protected: unsigned int angle, float outputScale); bool callBP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - unsigned int angle); + unsigned int angle, float outputScale); // projection angle variables diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu index a6194a5..162ee77 100644 --- a/cuda/2d/sirt.cu +++ b/cuda/2d/sirt.cu @@ -127,10 +127,10 @@ bool SIRT::precomputeWeights() zeroVolumeData(D_pixelWeight, pixelPitch, dims); if (useSinogramMask) { - callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch); + callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch, 1.0f); } else { processSino<opSet>(D_projData, 1.0f, projPitch, dims); - callBP(D_pixelWeight, pixelPitch, D_projData, projPitch); + callBP(D_pixelWeight, pixelPitch, D_projData, projPitch, 1.0f); } processVol<opInvert>(D_pixelWeight, pixelPitch, dims); @@ -251,7 +251,7 @@ bool SIRT::iterate(unsigned int iterations) zeroVolumeData(D_tmpData, tmpPitch, dims); - callBP(D_tmpData, tmpPitch, D_projData, projPitch); + callBP(D_tmpData, tmpPitch, D_projData, projPitch, 1.0f); processVol<opAddMul>(D_volumeData, D_pixelWeight, D_tmpData, volumePitch, dims); |