diff options
Diffstat (limited to 'cuda')
-rw-r--r-- | cuda/2d/algo.cu | 7 | ||||
-rw-r--r-- | cuda/2d/algo.h | 3 | ||||
-rw-r--r-- | cuda/2d/astra.cu | 12 | ||||
-rw-r--r-- | cuda/2d/cgls.cu | 4 | ||||
-rw-r--r-- | cuda/2d/em.cu | 4 | ||||
-rw-r--r-- | cuda/2d/fan_bp.cu | 47 | ||||
-rw-r--r-- | cuda/2d/fan_bp.h | 9 | ||||
-rw-r--r-- | cuda/2d/par_bp.cu | 31 | ||||
-rw-r--r-- | cuda/2d/par_bp.h | 4 | ||||
-rw-r--r-- | cuda/2d/sart.cu | 10 | ||||
-rw-r--r-- | cuda/2d/sart.h | 2 | ||||
-rw-r--r-- | cuda/2d/sirt.cu | 6 | ||||
-rw-r--r-- | cuda/3d/algo3d.cu | 18 | ||||
-rw-r--r-- | cuda/3d/algo3d.h | 9 | ||||
-rw-r--r-- | cuda/3d/astra3d.cu | 1005 | ||||
-rw-r--r-- | cuda/3d/astra3d.h | 215 | ||||
-rw-r--r-- | cuda/3d/cgls3d.cu | 6 | ||||
-rw-r--r-- | cuda/3d/cone_bp.cu | 26 | ||||
-rw-r--r-- | cuda/3d/cone_bp.h | 7 | ||||
-rw-r--r-- | cuda/3d/mem3d.cu | 270 | ||||
-rw-r--r-- | cuda/3d/mem3d.h | 99 | ||||
-rw-r--r-- | cuda/3d/par3d_bp.cu | 25 | ||||
-rw-r--r-- | cuda/3d/par3d_bp.h | 6 | ||||
-rw-r--r-- | cuda/3d/sirt3d.cu | 8 |
24 files changed, 856 insertions, 977 deletions
diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu index 144fabd..dc74e51 100644 --- a/cuda/2d/algo.cu +++ b/cuda/2d/algo.cu @@ -336,16 +336,17 @@ bool ReconAlgo::callFP(float* D_volumeData, unsigned int volumePitch, } bool ReconAlgo::callBP(float* D_volumeData, unsigned int volumePitch, - float* D_projData, unsigned int projPitch) + float* D_projData, unsigned int projPitch, + float outputScale) { if (angles) { assert(!fanProjs); return BP(D_volumeData, volumePitch, D_projData, projPitch, - dims, angles, TOffsets); + dims, angles, TOffsets, outputScale); } else { assert(fanProjs); return FanBP(D_volumeData, volumePitch, D_projData, projPitch, - dims, fanProjs); + dims, fanProjs, outputScale); } } diff --git a/cuda/2d/algo.h b/cuda/2d/algo.h index a75905e..99959c8 100644 --- a/cuda/2d/algo.h +++ b/cuda/2d/algo.h @@ -118,7 +118,8 @@ protected: float* D_projData, unsigned int projPitch, float outputScale); bool callBP(float* D_volumeData, unsigned int volumePitch, - float* D_projData, unsigned int projPitch); + float* D_projData, unsigned int projPitch, + float outputScale); SDimensions dims; diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu index 2f72db0..4c69628 100644 --- a/cuda/2d/astra.cu +++ b/cuda/2d/astra.cu @@ -368,21 +368,19 @@ bool AstraFBP::run() } + float fOutputScale = (M_PI / 2.0f) / (float)pData->dims.iProjAngles; + if (pData->bFanBeam) { - ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections); + ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections, fOutputScale); } else { - ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets); + ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets, fOutputScale); } if(!ok) { return false; } - processVol<opMul>(pData->D_volumeData, - (M_PI / 2.0f) / (float)pData->dims.iProjAngles, - pData->volumePitch, pData->dims); - return true; } @@ -594,7 +592,7 @@ bool BPalgo::iterate(unsigned int) { // TODO: This zeroVolume makes an earlier memcpy of D_volumeData redundant zeroVolumeData(D_volumeData, volumePitch, dims); - callBP(D_volumeData, volumePitch, D_sinoData, sinoPitch); + callBP(D_volumeData, volumePitch, D_sinoData, sinoPitch, 1.0f); return true; } diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu index 9ead563..f402914 100644 --- a/cuda/2d/cgls.cu +++ b/cuda/2d/cgls.cu @@ -135,7 +135,7 @@ bool CGLS::iterate(unsigned int iterations) // p = A'*r zeroVolumeData(D_p, pPitch, dims); - callBP(D_p, pPitch, D_r, rPitch); + callBP(D_p, pPitch, D_r, rPitch, 1.0f); if (useVolumeMask) processVol<opMul>(D_p, D_maskData, pPitch, dims); @@ -166,7 +166,7 @@ bool CGLS::iterate(unsigned int iterations) // z = A'*r zeroVolumeData(D_z, zPitch, dims); - callBP(D_z, zPitch, D_r, rPitch); + callBP(D_z, zPitch, D_r, rPitch, 1.0f); if (useVolumeMask) processVol<opMul>(D_z, D_maskData, zPitch, dims); diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu index 00127c0..8593b08 100644 --- a/cuda/2d/em.cu +++ b/cuda/2d/em.cu @@ -102,7 +102,7 @@ bool EM::precomputeWeights() #endif { processSino<opSet>(D_projData, 1.0f, projPitch, dims); - callBP(D_pixelWeight, pixelPitch, D_projData, projPitch); + callBP(D_pixelWeight, pixelPitch, D_projData, projPitch, 1.0f); } processVol<opInvert>(D_pixelWeight, pixelPitch, dims); @@ -137,7 +137,7 @@ bool EM::iterate(unsigned int iterations) // Do BP of projData into tmpData zeroVolumeData(D_tmpData, tmpPitch, dims); - callBP(D_tmpData, tmpPitch, D_projData, projPitch); + callBP(D_tmpData, tmpPitch, D_projData, projPitch, 1.0f); // Multiply volumeData with tmpData divided by pixel weights processVol<opMul2>(D_volumeData, D_tmpData, D_pixelWeight, pixelPitch, dims); diff --git a/cuda/2d/fan_bp.cu b/cuda/2d/fan_bp.cu index 74e8b12..b4321ba 100644 --- a/cuda/2d/fan_bp.cu +++ b/cuda/2d/fan_bp.cu @@ -77,7 +77,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi return true; } -__global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims) +__global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -121,11 +121,11 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s fA += 1.0f; } - volData[Y*volPitch+X] += fVal; + volData[Y*volPitch+X] += fVal * fOutputScale; } // supersampling version -__global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims) +__global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -146,6 +146,8 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in float* volData = (float*)D_volData; + fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim); + float fVal = 0.0f; float fA = startAngle + 0.5f; @@ -180,14 +182,14 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in fA += 1.0f; } - volData[Y*volPitch+X] += fVal / (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim); + volData[Y*volPitch+X] += fVal * fOutputScale; } // BP specifically for SART. // It includes (free) weighting with voxel weight. // It assumes the proj texture is set up _without_ padding, unlike regular BP. -__global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDimensions dims) +__global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -222,12 +224,12 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi const float fT = fNum / fDen; const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f); - volData[Y*volPitch+X] += fVal; + volData[Y*volPitch+X] += fVal * fOutputScale; } // Weighted BP for use in fan beam FBP // Each pixel/ray is weighted by 1/L^2 where L is the distance to the source. -__global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims) +__global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -273,13 +275,14 @@ __global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, un fA += 1.0f; } - volData[Y*volPitch+X] += fVal; + volData[Y*volPitch+X] += fVal * fOutputScale; } bool FanBP_internal(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const SFanProjection* angles) + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale) { assert(dims.iProjAngles <= g_MaxAngles); @@ -310,9 +313,9 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch, for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) { if (dims.iRaysPerPixelDim > 1) - devFanBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims); + devFanBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale); else - devFanBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims); + devFanBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale); } cudaThreadSynchronize(); @@ -325,7 +328,8 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch, bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const SFanProjection* angles) + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale) { assert(dims.iProjAngles <= g_MaxAngles); @@ -355,7 +359,7 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch, cudaStreamCreate(&stream); for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) { - devFanBP_FBPWeighted<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims); + devFanBP_FBPWeighted<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale); } cudaThreadSynchronize(); @@ -370,7 +374,8 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch, bool FanBP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, unsigned int angle, - const SDimensions& dims, const SFanProjection* angles) + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale) { // only one angle bindProjDataTexture(D_projData, projPitch, dims.iProjDets, 1, cudaAddressModeClamp); @@ -391,7 +396,7 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch, dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices, (dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize); - devFanBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, dims); + devFanBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, dims, fOutputScale); cudaThreadSynchronize(); cudaTextForceKernelsCompletion(); @@ -401,7 +406,8 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch, bool FanBP(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const SFanProjection* angles) + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale) { for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) { SDimensions subdims = dims; @@ -413,7 +419,7 @@ bool FanBP(float* D_volumeData, unsigned int volumePitch, bool ret; ret = FanBP_internal(D_volumeData, volumePitch, D_projData + iAngle * projPitch, projPitch, - subdims, angles + iAngle); + subdims, angles + iAngle, fOutputScale); if (!ret) return false; } @@ -422,7 +428,8 @@ bool FanBP(float* D_volumeData, unsigned int volumePitch, bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const SFanProjection* angles) + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale) { for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) { SDimensions subdims = dims; @@ -434,7 +441,7 @@ bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch, bool ret; ret = FanBP_FBPWeighted_internal(D_volumeData, volumePitch, D_projData + iAngle * projPitch, projPitch, - subdims, angles + iAngle); + subdims, angles + iAngle, fOutputScale); if (!ret) return false; @@ -498,7 +505,7 @@ int main() copyVolumeToDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch); copySinogramToDevice(sino, dims.iProjDets, dims.iProjDets, dims.iProjAngles, D_projData, projPitch); - FanBP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs); + FanBP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs, 1.0f); copyVolumeFromDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch); diff --git a/cuda/2d/fan_bp.h b/cuda/2d/fan_bp.h index e4e69b0..3ebe1e8 100644 --- a/cuda/2d/fan_bp.h +++ b/cuda/2d/fan_bp.h @@ -33,16 +33,19 @@ namespace astraCUDA { _AstraExport bool FanBP(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const SFanProjection* angles); + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale); _AstraExport bool FanBP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, unsigned int angle, - const SDimensions& dims, const SFanProjection* angles); + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale); _AstraExport bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const SFanProjection* angles); + const SDimensions& dims, const SFanProjection* angles, + float fOutputScale); } diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu index 635200f..d9f7325 100644 --- a/cuda/2d/par_bp.cu +++ b/cuda/2d/par_bp.cu @@ -73,7 +73,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi return true; } -__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims) +__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -123,11 +123,11 @@ __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int star } - volData[Y*volPitch+X] += fVal; + volData[Y*volPitch+X] += fVal * fOutputScale; } // supersampling version -__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims) +__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -152,6 +152,8 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s float fA = startAngle + 0.5f; const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f; + fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim); + if (offsets) { for (int angle = startAngle; angle < endAngle; ++angle) @@ -196,10 +198,10 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s } - volData[Y*volPitch+X] += fVal / (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim); + volData[Y*volPitch+X] += fVal * fOutputScale; } -__global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset, float angle_sin, float angle_cos, const SDimensions dims) +__global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset, float angle_sin, float angle_cos, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -218,13 +220,13 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset const float fT = fT_base + fX * angle_cos - fY * angle_sin + offset; const float fVal = tex2D(gT_projTexture, fT, 0.5f); - D_volData[Y*volPitch+X] += fVal; + D_volData[Y*volPitch+X] += fVal * fOutputScale; } bool BP_internal(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const float* angles, const float* TOffsets) + const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale) { // TODO: process angles block by block assert(dims.iProjAngles <= g_MaxAngles); @@ -261,9 +263,9 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch, for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) { if (dims.iRaysPerPixelDim > 1) - devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims); + devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale); else - devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims); + devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale); } cudaThreadSynchronize(); @@ -276,7 +278,7 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch, bool BP(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const float* angles, const float* TOffsets) + const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale) { for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) { SDimensions subdims = dims; @@ -289,7 +291,8 @@ bool BP(float* D_volumeData, unsigned int volumePitch, ret = BP_internal(D_volumeData, volumePitch, D_projData + iAngle * projPitch, projPitch, subdims, angles + iAngle, - TOffsets ? TOffsets + iAngle : 0); + TOffsets ? TOffsets + iAngle : 0, + fOutputScale); if (!ret) return false; } @@ -300,7 +303,7 @@ bool BP(float* D_volumeData, unsigned int volumePitch, bool BP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, unsigned int angle, const SDimensions& dims, - const float* angles, const float* TOffsets) + const float* angles, const float* TOffsets, float fOutputScale) { // Only one angle. // We need to Clamp to the border pixels instead of to zero, because @@ -318,7 +321,7 @@ bool BP_SART(float* D_volumeData, unsigned int volumePitch, dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices, (dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize); - devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims); + devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims, fOutputScale); cudaThreadSynchronize(); cudaTextForceKernelsCompletion(); @@ -369,7 +372,7 @@ int main() for (unsigned int i = 0; i < dims.iProjAngles; ++i) angle[i] = i*(M_PI/dims.iProjAngles); - BP(D_volumeData, volumePitch, D_projData, projPitch, dims, angle, 0); + BP(D_volumeData, volumePitch, D_projData, projPitch, dims, angle, 0, 1.0f); delete[] angle; diff --git a/cuda/2d/par_bp.h b/cuda/2d/par_bp.h index eaeafd8..64bcd34 100644 --- a/cuda/2d/par_bp.h +++ b/cuda/2d/par_bp.h @@ -36,12 +36,12 @@ namespace astraCUDA { _AstraExport bool BP(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, const SDimensions& dims, const float* angles, - const float* TOffsets); + const float* TOffsets, float fOutputScale); _AstraExport bool BP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, unsigned int angle, const SDimensions& dims, - const float* angles, const float* TOffsets); + const float* angles, const float* TOffsets, float fOutputScale); } diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu index 29670c3..e5cb5bb 100644 --- a/cuda/2d/sart.cu +++ b/cuda/2d/sart.cu @@ -200,10 +200,10 @@ bool SART::iterate(unsigned int iterations) // BP, mask, and add back // TODO: Try putting the masking directly in the BP zeroVolumeData(D_tmpData, tmpPitch, dims); - callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle); + callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle, 1.0f); processVol<opAddMul>(D_volumeData, D_maskData, D_tmpData, volumePitch, dims); } else { - callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle); + callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle, 1.0f); } if (useMinConstraint) @@ -264,16 +264,16 @@ bool SART::callFP_SART(float* D_volumeData, unsigned int volumePitch, bool SART::callBP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - unsigned int angle) + unsigned int angle, float outputScale) { if (angles) { assert(!fanProjs); return BP_SART(D_volumeData, volumePitch, D_projData, projPitch, - angle, dims, angles, TOffsets); + angle, dims, angles, TOffsets, outputScale); } else { assert(fanProjs); return FanBP_SART(D_volumeData, volumePitch, D_projData, projPitch, - angle, dims, fanProjs); + angle, dims, fanProjs, outputScale); } } diff --git a/cuda/2d/sart.h b/cuda/2d/sart.h index 6574a6f..7dcd641 100644 --- a/cuda/2d/sart.h +++ b/cuda/2d/sart.h @@ -59,7 +59,7 @@ protected: unsigned int angle, float outputScale); bool callBP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - unsigned int angle); + unsigned int angle, float outputScale); // projection angle variables diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu index a6194a5..162ee77 100644 --- a/cuda/2d/sirt.cu +++ b/cuda/2d/sirt.cu @@ -127,10 +127,10 @@ bool SIRT::precomputeWeights() zeroVolumeData(D_pixelWeight, pixelPitch, dims); if (useSinogramMask) { - callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch); + callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch, 1.0f); } else { processSino<opSet>(D_projData, 1.0f, projPitch, dims); - callBP(D_pixelWeight, pixelPitch, D_projData, projPitch); + callBP(D_pixelWeight, pixelPitch, D_projData, projPitch, 1.0f); } processVol<opInvert>(D_pixelWeight, pixelPitch, dims); @@ -251,7 +251,7 @@ bool SIRT::iterate(unsigned int iterations) zeroVolumeData(D_tmpData, tmpPitch, dims); - callBP(D_tmpData, tmpPitch, D_projData, projPitch); + callBP(D_tmpData, tmpPitch, D_projData, projPitch, 1.0f); processVol<opAddMul>(D_volumeData, D_pixelWeight, D_tmpData, volumePitch, dims); diff --git a/cuda/3d/algo3d.cu b/cuda/3d/algo3d.cu index 7f61280..cc86b70 100644 --- a/cuda/3d/algo3d.cu +++ b/cuda/3d/algo3d.cu @@ -41,6 +41,7 @@ ReconAlgo3D::ReconAlgo3D() coneProjs = 0; par3DProjs = 0; shouldAbort = false; + fOutputScale = 1.0f; } ReconAlgo3D::~ReconAlgo3D() @@ -57,9 +58,10 @@ void ReconAlgo3D::reset() shouldAbort = false; } -bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProjection* _angles) +bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProjection* _angles, float _outputScale) { dims = _dims; + fOutputScale = _outputScale; coneProjs = new SConeProjection[dims.iProjAngles]; par3DProjs = 0; @@ -69,9 +71,10 @@ bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProject return true; } -bool ReconAlgo3D::setPar3DGeometry(const SDimensions3D& _dims, const SPar3DProjection* _angles) +bool ReconAlgo3D::setPar3DGeometry(const SDimensions3D& _dims, const SPar3DProjection* _angles, float _outputScale) { dims = _dims; + fOutputScale = _outputScale; par3DProjs = new SPar3DProjection[dims.iProjAngles]; coneProjs = 0; @@ -87,19 +90,20 @@ bool ReconAlgo3D::callFP(cudaPitchedPtr& D_volumeData, float outputScale) { if (coneProjs) { - return ConeFP(D_volumeData, D_projData, dims, coneProjs, outputScale); + return ConeFP(D_volumeData, D_projData, dims, coneProjs, outputScale * this->fOutputScale); } else { - return Par3DFP(D_volumeData, D_projData, dims, par3DProjs, outputScale); + return Par3DFP(D_volumeData, D_projData, dims, par3DProjs, outputScale * this->fOutputScale); } } bool ReconAlgo3D::callBP(cudaPitchedPtr& D_volumeData, - cudaPitchedPtr& D_projData) + cudaPitchedPtr& D_projData, + float outputScale) { if (coneProjs) { - return ConeBP(D_volumeData, D_projData, dims, coneProjs); + return ConeBP(D_volumeData, D_projData, dims, coneProjs, outputScale * this->fOutputScale); } else { - return Par3DBP(D_volumeData, D_projData, dims, par3DProjs); + return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, outputScale * this->fOutputScale); } } diff --git a/cuda/3d/algo3d.h b/cuda/3d/algo3d.h index f4c6a87..886b092 100644 --- a/cuda/3d/algo3d.h +++ b/cuda/3d/algo3d.h @@ -39,8 +39,8 @@ public: ReconAlgo3D(); ~ReconAlgo3D(); - bool setConeGeometry(const SDimensions3D& dims, const SConeProjection* projs); - bool setPar3DGeometry(const SDimensions3D& dims, const SPar3DProjection* projs); + bool setConeGeometry(const SDimensions3D& dims, const SConeProjection* projs, float fOutputScale); + bool setPar3DGeometry(const SDimensions3D& dims, const SPar3DProjection* projs, float fOutputScale); void signalAbort() { shouldAbort = true; } @@ -51,12 +51,15 @@ protected: cudaPitchedPtr& D_projData, float outputScale); bool callBP(cudaPitchedPtr& D_volumeData, - cudaPitchedPtr& D_projData); + cudaPitchedPtr& D_projData, + float outputScale); SDimensions3D dims; SConeProjection* coneProjs; SPar3DProjection* par3DProjs; + float fOutputScale; + volatile bool shouldAbort; }; diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index 0b9c70b..8328229 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -40,6 +40,12 @@ $Id$ #include "arith3d.h" #include "astra3d.h" +#include "astra/ParallelProjectionGeometry3D.h" +#include "astra/ParallelVecProjectionGeometry3D.h" +#include "astra/ConeProjectionGeometry3D.h" +#include "astra/ConeVecProjectionGeometry3D.h" +#include "astra/VolumeGeometry3D.h" + #include <iostream> using namespace astraCUDA3d; @@ -52,86 +58,200 @@ enum CUDAProjectionType3d { }; -static SConeProjection* genConeProjections(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - double fOriginSourceDistance, - double fOriginDetectorDistance, - double fDetUSize, - double fDetVSize, - const float *pfAngles) -{ - SConeProjection base; - base.fSrcX = 0.0f; - base.fSrcY = -fOriginSourceDistance; - base.fSrcZ = 0.0f; - base.fDetSX = iProjU * fDetUSize * -0.5f; - base.fDetSY = fOriginDetectorDistance; - base.fDetSZ = iProjV * fDetVSize * -0.5f; - base.fDetUX = fDetUSize; - base.fDetUY = 0.0f; - base.fDetUZ = 0.0f; - base.fDetVX = 0.0f; - base.fDetVY = 0.0f; - base.fDetVZ = fDetVSize; - SConeProjection* p = new SConeProjection[iProjAngles]; +// adjust pProjs to normalize volume geometry +template<typename ProjectionT> +static bool convertAstraGeometry_internal(const CVolumeGeometry3D* pVolGeom, + unsigned int iProjectionAngleCount, + ProjectionT*& pProjs, + float& fOutputScale) +{ + assert(pVolGeom); + assert(pProjs); + + // TODO: Relative instead of absolute + const float EPS = 0.00001f; + if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthY()) > EPS) + return false; + if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthZ()) > EPS) + return false; -#define ROTATE0(name,i,alpha) do { p[i].f##name##X = base.f##name##X * cos(alpha) - base.f##name##Y * sin(alpha); p[i].f##name##Y = base.f##name##X * sin(alpha) + base.f##name##Y * cos(alpha); p[i].f##name##Z = base.f##name##Z; } while(0) - for (unsigned int i = 0; i < iProjAngles; ++i) { - ROTATE0(Src, i, pfAngles[i]); - ROTATE0(DetS, i, pfAngles[i]); - ROTATE0(DetU, i, pfAngles[i]); - ROTATE0(DetV, i, pfAngles[i]); + // Translate + float dx = -(pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2; + float dy = -(pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2; + float dz = -(pVolGeom->getWindowMinZ() + pVolGeom->getWindowMaxZ()) / 2; + + float factor = 1.0f / pVolGeom->getPixelLengthX(); + + for (int i = 0; i < iProjectionAngleCount; ++i) { + // CHECKME: Order of scaling and translation + pProjs[i].translate(dx, dy, dz); + pProjs[i].scale(factor); } -#undef ROTATE0 + // CHECKME: Check factor + fOutputScale *= pVolGeom->getPixelLengthX(); + + return true; +} + + +bool convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom, + SDimensions3D& dims) +{ + dims.iVolX = pVolGeom->getGridColCount(); + dims.iVolY = pVolGeom->getGridRowCount(); + dims.iVolZ = pVolGeom->getGridSliceCount(); + dims.iProjAngles = pProjGeom->getProjectionCount(); + dims.iProjU = pProjGeom->getDetectorColCount(), + dims.iProjV = pProjGeom->getDetectorRowCount(), + dims.iRaysPerDetDim = 1; + dims.iRaysPerVoxelDim = 1; + + if (dims.iVolX <= 0 || dims.iVolX <= 0 || dims.iVolX <= 0) + return false; + if (dims.iProjAngles <= 0 || dims.iProjU <= 0 || dims.iProjV <= 0) + return false; - return p; + return true; } -static SPar3DProjection* genPar3DProjections(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - double fDetUSize, - double fDetVSize, - const float *pfAngles) + +bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, + const CParallelProjectionGeometry3D* pProjGeom, + SPar3DProjection*& pProjs, float& fOutputScale) { - SPar3DProjection base; - base.fRayX = 0.0f; - base.fRayY = 1.0f; - base.fRayZ = 0.0f; + assert(pVolGeom); + assert(pProjGeom); + assert(pProjGeom->getProjectionAngles()); - base.fDetSX = iProjU * fDetUSize * -0.5f; - base.fDetSY = 0.0f; - base.fDetSZ = iProjV * fDetVSize * -0.5f; + int nth = pProjGeom->getProjectionCount(); - base.fDetUX = fDetUSize; - base.fDetUY = 0.0f; - base.fDetUZ = 0.0f; + pProjs = genPar3DProjections(nth, + pProjGeom->getDetectorColCount(), + pProjGeom->getDetectorRowCount(), + pProjGeom->getDetectorSpacingX(), + pProjGeom->getDetectorSpacingY(), + pProjGeom->getProjectionAngles()); - base.fDetVX = 0.0f; - base.fDetVY = 0.0f; - base.fDetVZ = fDetVSize; + bool ok; - SPar3DProjection* p = new SPar3DProjection[iProjAngles]; + fOutputScale = 1.0f; -#define ROTATE0(name,i,alpha) do { p[i].f##name##X = base.f##name##X * cos(alpha) - base.f##name##Y * sin(alpha); p[i].f##name##Y = base.f##name##X * sin(alpha) + base.f##name##Y * cos(alpha); p[i].f##name##Z = base.f##name##Z; } while(0) + ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); - for (unsigned int i = 0; i < iProjAngles; ++i) { - ROTATE0(Ray, i, pfAngles[i]); - ROTATE0(DetS, i, pfAngles[i]); - ROTATE0(DetU, i, pfAngles[i]); - ROTATE0(DetV, i, pfAngles[i]); - } + return ok; +} + +bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, + const CParallelVecProjectionGeometry3D* pProjGeom, + SPar3DProjection*& pProjs, float& fOutputScale) +{ + assert(pVolGeom); + assert(pProjGeom); + assert(pProjGeom->getProjectionVectors()); + + int nth = pProjGeom->getProjectionCount(); + + pProjs = new SPar3DProjection[nth]; + for (int i = 0; i < nth; ++i) + pProjs[i] = pProjGeom->getProjectionVectors()[i]; + + bool ok; + + fOutputScale = 1.0f; + + ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); + + return ok; +} + +bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, + const CConeProjectionGeometry3D* pProjGeom, + SConeProjection*& pProjs, float& fOutputScale) +{ + assert(pVolGeom); + assert(pProjGeom); + assert(pProjGeom->getProjectionAngles()); + + int nth = pProjGeom->getProjectionCount(); + + pProjs = genConeProjections(nth, + pProjGeom->getDetectorColCount(), + pProjGeom->getDetectorRowCount(), + pProjGeom->getOriginSourceDistance(), + pProjGeom->getOriginDetectorDistance(), + pProjGeom->getDetectorSpacingX(), + pProjGeom->getDetectorSpacingY(), + pProjGeom->getProjectionAngles()); + + bool ok; + + fOutputScale = 1.0f; + + ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); + + return ok; +} + +bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, + const CConeVecProjectionGeometry3D* pProjGeom, + SConeProjection*& pProjs, float& fOutputScale) +{ + assert(pVolGeom); + assert(pProjGeom); + assert(pProjGeom->getProjectionVectors()); + + int nth = pProjGeom->getProjectionCount(); + + pProjs = new SConeProjection[nth]; + for (int i = 0; i < nth; ++i) + pProjs[i] = pProjGeom->getProjectionVectors()[i]; + + bool ok; + + fOutputScale = 1.0f; + + ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); + + return ok; +} + + +bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom, + SPar3DProjection*& pParProjs, + SConeProjection*& pConeProjs, + float& fOutputScale) +{ + const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(pProjGeom); + const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(pProjGeom); + const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast<const CParallelVecProjectionGeometry3D*>(pProjGeom); + const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast<const CConeVecProjectionGeometry3D*>(pProjGeom); + + pConeProjs = 0; + pParProjs = 0; -#undef ROTATE0 + bool ok; - return p; + if (conegeom) { + ok = convertAstraGeometry(pVolGeom, conegeom, pConeProjs, fOutputScale); + } else if (conevec3dgeom) { + ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pConeProjs, fOutputScale); + } else if (par3dgeom) { + ok = convertAstraGeometry(pVolGeom, par3dgeom, pParProjs, fOutputScale); + } else if (parvec3dgeom) { + ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pParProjs, fOutputScale); + } else { + ok = false; + } + + return ok; } @@ -151,7 +271,7 @@ public: SConeProjection* projs; SPar3DProjection* parprojs; - float fPixelSize; + float fOutputScale; bool initialized; bool setStartReconstruction; @@ -188,6 +308,8 @@ AstraSIRT3d::AstraSIRT3d() pData->dims.iRaysPerVoxelDim = 1; pData->projs = 0; + pData->parprojs = 0; + pData->fOutputScale = 1.0f; pData->initialized = false; pData->setStartReconstruction = false; @@ -220,127 +342,37 @@ AstraSIRT3d::~AstraSIRT3d() pData = 0; } -bool AstraSIRT3d::setReconstructionGeometry(unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ/*, - float fPixelSize = 1.0f*/) -{ - if (pData->initialized) - return false; - - pData->dims.iVolX = iVolX; - pData->dims.iVolY = iVolY; - pData->dims.iVolZ = iVolZ; - - return (iVolX > 0 && iVolY > 0 && iVolZ > 0); -} - - -bool AstraSIRT3d::setPar3DGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection* projs) -{ - if (pData->initialized) - return false; - - pData->dims.iProjAngles = iProjAngles; - pData->dims.iProjU = iProjU; - pData->dims.iProjV = iProjV; - - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || projs == 0) - return false; - - pData->parprojs = new SPar3DProjection[iProjAngles]; - memcpy(pData->parprojs, projs, iProjAngles * sizeof(projs[0])); - - pData->projType = PROJ_PARALLEL; - - return true; -} - -bool AstraSIRT3d::setPar3DGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fDetUSize, - float fDetVSize, - const float *pfAngles) +bool AstraSIRT3d::setGeometry(const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom) { if (pData->initialized) return false; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - SPar3DProjection* p = genPar3DProjections(iProjAngles, - iProjU, iProjV, - fDetUSize, fDetVSize, - pfAngles); - pData->dims.iProjAngles = iProjAngles; - pData->dims.iProjU = iProjU; - pData->dims.iProjV = iProjV; - - pData->parprojs = p; - pData->projType = PROJ_PARALLEL; + bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, pData->dims); - return true; -} - - - -bool AstraSIRT3d::setConeGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SConeProjection* projs) -{ - if (pData->initialized) + if (!ok) return false; - pData->dims.iProjAngles = iProjAngles; - pData->dims.iProjU = iProjU; - pData->dims.iProjV = iProjV; + pData->projs = 0; + pData->parprojs = 0; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || projs == 0) + ok = convertAstraGeometry(pVolGeom, pProjGeom, + pData->parprojs, pData->projs, + pData->fOutputScale); + if (!ok) return false; - pData->projs = new SConeProjection[iProjAngles]; - memcpy(pData->projs, projs, iProjAngles * sizeof(projs[0])); - - pData->projType = PROJ_CONE; + if (pData->projs) { + assert(pData->parprojs == 0); + pData->projType = PROJ_CONE; + } else { + assert(pData->parprojs != 0); + pData->projType = PROJ_PARALLEL; + } return true; } -bool AstraSIRT3d::setConeGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fDetUSize, - float fDetVSize, - const float *pfAngles) -{ - if (pData->initialized) - return false; - - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - SConeProjection* p = genConeProjections(iProjAngles, - iProjU, iProjV, - fOriginSourceDistance, - fOriginDetectorDistance, - fDetUSize, fDetVSize, - pfAngles); - pData->dims.iProjAngles = iProjAngles; - pData->dims.iProjU = iProjU; - pData->dims.iProjV = iProjV; - - pData->projs = p; - pData->projType = PROJ_CONE; - - return true; -} bool AstraSIRT3d::enableSuperSampling(unsigned int iVoxelSuperSampling, unsigned int iDetectorSuperSampling) @@ -404,9 +436,9 @@ bool AstraSIRT3d::init() bool ok; if (pData->projType == PROJ_PARALLEL) { - ok = pData->sirt.setPar3DGeometry(pData->dims, pData->parprojs); + ok = pData->sirt.setPar3DGeometry(pData->dims, pData->parprojs, pData->fOutputScale); } else { - ok = pData->sirt.setConeGeometry(pData->dims, pData->projs); + ok = pData->sirt.setConeGeometry(pData->dims, pData->projs, pData->fOutputScale); } if (!ok) @@ -618,7 +650,7 @@ public: SConeProjection* projs; SPar3DProjection* parprojs; - float fPixelSize; + float fOutputScale; bool initialized; bool setStartReconstruction; @@ -655,6 +687,8 @@ AstraCGLS3d::AstraCGLS3d() pData->dims.iRaysPerVoxelDim = 1; pData->projs = 0; + pData->parprojs = 0; + pData->fOutputScale = 1.0f; pData->initialized = false; pData->setStartReconstruction = false; @@ -687,125 +721,33 @@ AstraCGLS3d::~AstraCGLS3d() pData = 0; } -bool AstraCGLS3d::setReconstructionGeometry(unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ/*, - float fPixelSize = 1.0f*/) -{ - if (pData->initialized) - return false; - - pData->dims.iVolX = iVolX; - pData->dims.iVolY = iVolY; - pData->dims.iVolZ = iVolZ; - - return (iVolX > 0 && iVolY > 0 && iVolZ > 0); -} - - -bool AstraCGLS3d::setPar3DGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection* projs) -{ - if (pData->initialized) - return false; - - pData->dims.iProjAngles = iProjAngles; - pData->dims.iProjU = iProjU; - pData->dims.iProjV = iProjV; - - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || projs == 0) - return false; - - pData->parprojs = new SPar3DProjection[iProjAngles]; - memcpy(pData->parprojs, projs, iProjAngles * sizeof(projs[0])); - - pData->projType = PROJ_PARALLEL; - - return true; -} - -bool AstraCGLS3d::setPar3DGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fDetUSize, - float fDetVSize, - const float *pfAngles) -{ - if (pData->initialized) - return false; - - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - SPar3DProjection* p = genPar3DProjections(iProjAngles, - iProjU, iProjV, - fDetUSize, fDetVSize, - pfAngles); - pData->dims.iProjAngles = iProjAngles; - pData->dims.iProjU = iProjU; - pData->dims.iProjV = iProjV; - - pData->parprojs = p; - pData->projType = PROJ_PARALLEL; - - return true; -} - - - -bool AstraCGLS3d::setConeGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SConeProjection* projs) +bool AstraCGLS3d::setGeometry(const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom) { if (pData->initialized) return false; - pData->dims.iProjAngles = iProjAngles; - pData->dims.iProjU = iProjU; - pData->dims.iProjV = iProjV; + bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, pData->dims); - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || projs == 0) + if (!ok) return false; - pData->projs = new SConeProjection[iProjAngles]; - memcpy(pData->projs, projs, iProjAngles * sizeof(projs[0])); - - pData->projType = PROJ_CONE; - - return true; -} - -bool AstraCGLS3d::setConeGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fDetUSize, - float fDetVSize, - const float *pfAngles) -{ - if (pData->initialized) - return false; + pData->projs = 0; + pData->parprojs = 0; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) + ok = convertAstraGeometry(pVolGeom, pProjGeom, + pData->parprojs, pData->projs, + pData->fOutputScale); + if (!ok) return false; - SConeProjection* p = genConeProjections(iProjAngles, - iProjU, iProjV, - fOriginSourceDistance, - fOriginDetectorDistance, - fDetUSize, fDetVSize, - pfAngles); - - pData->dims.iProjAngles = iProjAngles; - pData->dims.iProjU = iProjU; - pData->dims.iProjV = iProjV; - - pData->projs = p; - pData->projType = PROJ_CONE; + if (pData->projs) { + assert(pData->parprojs == 0); + pData->projType = PROJ_CONE; + } else { + assert(pData->parprojs != 0); + pData->projType = PROJ_PARALLEL; + } return true; } @@ -874,9 +816,9 @@ bool AstraCGLS3d::init() bool ok; if (pData->projType == PROJ_PARALLEL) { - ok = pData->cgls.setPar3DGeometry(pData->dims, pData->parprojs); + ok = pData->cgls.setPar3DGeometry(pData->dims, pData->parprojs, pData->fOutputScale); } else { - ok = pData->cgls.setConeGeometry(pData->dims, pData->projs); + ok = pData->cgls.setConeGeometry(pData->dims, pData->projs, pData->fOutputScale); } if (!ok) @@ -1077,179 +1019,31 @@ float AstraCGLS3d::computeDiffNorm() -bool astraCudaConeFP(const float* pfVolume, float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fDetUSize, - float fDetVSize, - const float *pfAngles, - int iGPUIndex, int iDetectorSuperSampling) -{ - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - SConeProjection* p = genConeProjections(iProjAngles, - iProjU, iProjV, - fOriginSourceDistance, - fOriginDetectorDistance, - fDetUSize, fDetVSize, - pfAngles); - - bool ok; - ok = astraCudaConeFP(pfVolume, pfProjections, iVolX, iVolY, iVolZ, - iProjAngles, iProjU, iProjV, p, iGPUIndex, iDetectorSuperSampling); - - delete[] p; - - return ok; -} - -bool astraCudaConeFP(const float* pfVolume, float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SConeProjection *pfAngles, - int iGPUIndex, int iDetectorSuperSampling) +bool astraCudaFP(const float* pfVolume, float* pfProjections, + const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom, + int iGPUIndex, int iDetectorSuperSampling, + Cuda3DProjectionKernel projKernel) { SDimensions3D dims; - dims.iVolX = iVolX; - dims.iVolY = iVolY; - dims.iVolZ = iVolZ; - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; - - dims.iProjAngles = iProjAngles; - dims.iProjU = iProjU; - dims.iProjV = iProjV; - - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) + bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); + if (!ok) return false; dims.iRaysPerDetDim = iDetectorSuperSampling; - if (iDetectorSuperSampling == 0) return false; - if (iGPUIndex != -1) { - cudaSetDevice(iGPUIndex); - cudaError_t err = cudaGetLastError(); - - // Ignore errors caused by calling cudaSetDevice multiple times - if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess) - return false; - } - - cudaPitchedPtr D_volumeData = allocateVolumeData(dims); - bool ok = D_volumeData.ptr; - if (!ok) - return false; - - cudaPitchedPtr D_projData = allocateProjectionData(dims); - ok = D_projData.ptr; - if (!ok) { - cudaFree(D_volumeData.ptr); - return false; - } - - ok &= copyVolumeToDevice(pfVolume, D_volumeData, dims, dims.iVolX); - - ok &= zeroProjectionData(D_projData, dims); - - if (!ok) { - cudaFree(D_volumeData.ptr); - cudaFree(D_projData.ptr); - return false; - } - - ok &= ConeFP(D_volumeData, D_projData, dims, pfAngles, 1.0f); - - ok &= copyProjectionsFromDevice(pfProjections, D_projData, - dims, dims.iProjU); - - - cudaFree(D_volumeData.ptr); - cudaFree(D_projData.ptr); - - return ok; - -} - -bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fDetUSize, - float fDetVSize, - const float *pfAngles, - int iGPUIndex, int iDetectorSuperSampling, - Cuda3DProjectionKernel projKernel) -{ - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - SPar3DProjection* p = genPar3DProjections(iProjAngles, - iProjU, iProjV, - fDetUSize, fDetVSize, - pfAngles); - - bool ok; - ok = astraCudaPar3DFP(pfVolume, pfProjections, iVolX, iVolY, iVolZ, - iProjAngles, iProjU, iProjV, p, iGPUIndex, iDetectorSuperSampling, - projKernel); - - delete[] p; - - return ok; -} - - -bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection *pfAngles, - int iGPUIndex, int iDetectorSuperSampling, - Cuda3DProjectionKernel projKernel) -{ - SDimensions3D dims; - - dims.iVolX = iVolX; - dims.iVolY = iVolY; - dims.iVolZ = iVolZ; - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; + SPar3DProjection* pParProjs; + SConeProjection* pConeProjs; - dims.iProjAngles = iProjAngles; - dims.iProjU = iProjU; - dims.iProjV = iProjV; + float outputScale; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; + ok = convertAstraGeometry(pVolGeom, pProjGeom, + pParProjs, pConeProjs, + outputScale); - dims.iRaysPerDetDim = iDetectorSuperSampling; - - if (iDetectorSuperSampling == 0) - return false; if (iGPUIndex != -1) { cudaSetDevice(iGPUIndex); @@ -1262,7 +1056,7 @@ bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections, cudaPitchedPtr D_volumeData = allocateVolumeData(dims); - bool ok = D_volumeData.ptr; + ok = D_volumeData.ptr; if (!ok) return false; @@ -1283,15 +1077,25 @@ bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections, return false; } - switch (projKernel) { - case ker3d_default: - ok &= Par3DFP(D_volumeData, D_projData, dims, pfAngles, 1.0f); - break; - case ker3d_sum_square_weights: - ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pfAngles, 1.0f); - break; - default: - assert(false); + if (pParProjs) { + switch (projKernel) { + case ker3d_default: + ok &= Par3DFP(D_volumeData, D_projData, dims, pParProjs, outputScale); + break; + case ker3d_sum_square_weights: + ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pParProjs, outputScale*outputScale); + break; + default: + assert(false); + } + } else { + switch (projKernel) { + case ker3d_default: + ok &= ConeFP(D_volumeData, D_projData, dims, pConeProjs, outputScale); + break; + default: + assert(false); + } } ok &= copyProjectionsFromDevice(pfProjections, D_projData, @@ -1305,207 +1109,28 @@ bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections, } -bool astraCudaConeBP(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fDetUSize, - float fDetVSize, - const float *pfAngles, - int iGPUIndex, int iVoxelSuperSampling) -{ - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - SConeProjection* p = genConeProjections(iProjAngles, - iProjU, iProjV, - fOriginSourceDistance, - fOriginDetectorDistance, - fDetUSize, fDetVSize, - pfAngles); - - bool ok; - ok = astraCudaConeBP(pfVolume, pfProjections, iVolX, iVolY, iVolZ, - iProjAngles, iProjU, iProjV, p, iGPUIndex, iVoxelSuperSampling); - delete[] p; - - return ok; -} - -bool astraCudaConeBP(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SConeProjection *pfAngles, - int iGPUIndex, int iVoxelSuperSampling) +bool astraCudaBP(float* pfVolume, const float* pfProjections, + const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom, + int iGPUIndex, int iVoxelSuperSampling) { SDimensions3D dims; - dims.iVolX = iVolX; - dims.iVolY = iVolY; - dims.iVolZ = iVolZ; - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; - - dims.iProjAngles = iProjAngles; - dims.iProjU = iProjU; - dims.iProjV = iProjV; - - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - dims.iRaysPerVoxelDim = iVoxelSuperSampling; - - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - if (iGPUIndex != -1) { - cudaSetDevice(iGPUIndex); - cudaError_t err = cudaGetLastError(); - - // Ignore errors caused by calling cudaSetDevice multiple times - if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess) - return false; - } - - cudaPitchedPtr D_volumeData = allocateVolumeData(dims); - bool ok = D_volumeData.ptr; + bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); if (!ok) return false; - cudaPitchedPtr D_projData = allocateProjectionData(dims); - ok = D_projData.ptr; - if (!ok) { - cudaFree(D_volumeData.ptr); - return false; - } - - ok &= copyProjectionsToDevice(pfProjections, D_projData, - dims, dims.iProjU); - - ok &= zeroVolumeData(D_volumeData, dims); - - if (!ok) { - cudaFree(D_volumeData.ptr); - cudaFree(D_projData.ptr); - return false; - } - - ok &= ConeBP(D_volumeData, D_projData, dims, pfAngles); - - ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX); - - - cudaFree(D_volumeData.ptr); - cudaFree(D_projData.ptr); - - return ok; - -} - -bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fDetUSize, - float fDetVSize, - const float *pfAngles, - int iGPUIndex, int iVoxelSuperSampling) -{ - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - SPar3DProjection* p = genPar3DProjections(iProjAngles, - iProjU, iProjV, - fDetUSize, fDetVSize, - pfAngles); - - bool ok; - ok = astraCudaPar3DBP(pfVolume, pfProjections, iVolX, iVolY, iVolZ, - iProjAngles, iProjU, iProjV, p, iGPUIndex, iVoxelSuperSampling); - - delete[] p; - - return ok; -} - -// This computes the column weights, divides by them, and adds the -// result to the current volume. This is both more expensive and more -// GPU memory intensive than the regular BP, but allows saving system RAM. -bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fDetUSize, - float fDetVSize, - const float *pfAngles, - int iGPUIndex, int iVoxelSuperSampling) -{ - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - - SPar3DProjection* p = genPar3DProjections(iProjAngles, - iProjU, iProjV, - fDetUSize, fDetVSize, - pfAngles); - - bool ok; - ok = astraCudaPar3DBP_SIRTWeighted(pfVolume, pfProjections, iVolX, iVolY, iVolZ, - iProjAngles, iProjU, iProjV, p, iGPUIndex, iVoxelSuperSampling); - - delete[] p; - - return ok; -} - - -bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection *pfAngles, - int iGPUIndex, int iVoxelSuperSampling) -{ - SDimensions3D dims; - - dims.iVolX = iVolX; - dims.iVolY = iVolY; - dims.iVolZ = iVolZ; - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; + dims.iRaysPerVoxelDim = iVoxelSuperSampling; - dims.iProjAngles = iProjAngles; - dims.iProjU = iProjU; - dims.iProjV = iProjV; + SPar3DProjection* pParProjs; + SConeProjection* pConeProjs; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; + float outputScale; - dims.iRaysPerVoxelDim = iVoxelSuperSampling; + ok = convertAstraGeometry(pVolGeom, pProjGeom, + pParProjs, pConeProjs, + outputScale); if (iGPUIndex != -1) { cudaSetDevice(iGPUIndex); @@ -1518,7 +1143,7 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, cudaPitchedPtr D_volumeData = allocateVolumeData(dims); - bool ok = D_volumeData.ptr; + ok = D_volumeData.ptr; if (!ok) return false; @@ -1540,7 +1165,10 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, return false; } - ok &= Par3DBP(D_volumeData, D_projData, dims, pfAngles); + if (pParProjs) + ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, outputScale); + else + ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, outputScale); ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX); @@ -1556,33 +1184,28 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, // This computes the column weights, divides by them, and adds the // result to the current volume. This is both more expensive and more // GPU memory intensive than the regular BP, but allows saving system RAM. -bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, +bool astraCudaBP_SIRTWeighted(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection *pfAngles, + const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom, int iGPUIndex, int iVoxelSuperSampling) { SDimensions3D dims; - dims.iVolX = iVolX; - dims.iVolY = iVolY; - dims.iVolZ = iVolZ; - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) + bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); + if (!ok) return false; - dims.iProjAngles = iProjAngles; - dims.iProjU = iProjU; - dims.iProjV = iProjV; + dims.iRaysPerVoxelDim = iVoxelSuperSampling; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; + SPar3DProjection* pParProjs; + SConeProjection* pConeProjs; - dims.iRaysPerVoxelDim = iVoxelSuperSampling; + float outputScale; + + ok = convertAstraGeometry(pVolGeom, pProjGeom, + pParProjs, pConeProjs, + outputScale); if (iGPUIndex != -1) { cudaSetDevice(iGPUIndex); @@ -1595,7 +1218,7 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, cudaPitchedPtr D_pixelWeight = allocateVolumeData(dims); - bool ok = D_pixelWeight.ptr; + ok = D_pixelWeight.ptr; if (!ok) return false; @@ -1617,7 +1240,12 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, // Compute weights ok &= zeroVolumeData(D_pixelWeight, dims); processSino3D<opSet>(D_projData, 1.0f, dims); - ok &= Par3DBP(D_pixelWeight, D_projData, dims, pfAngles); + + if (pParProjs) + ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, outputScale); + else + ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, outputScale); + processVol3D<opInvert>(D_pixelWeight, dims); if (!ok) { cudaFree(D_pixelWeight.ptr); @@ -1630,7 +1258,11 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, dims, dims.iProjU); ok &= zeroVolumeData(D_volumeData, dims); // Do BP into D_volumeData - ok &= Par3DBP(D_volumeData, D_projData, dims, pfAngles); + if (pParProjs) + ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, outputScale); + else + ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, outputScale); + // Multiply with weights processVol3D<opMul>(D_volumeData, D_pixelWeight, dims); @@ -1653,6 +1285,9 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, cudaFree(D_volumeData.ptr); cudaFree(D_projData.ptr); + delete[] pParProjs; + delete[] pConeProjs; + return ok; } @@ -1660,33 +1295,19 @@ bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, bool astraCudaFDK(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fDetUSize, - float fDetVSize, - const float *pfAngles, + const CVolumeGeometry3D* pVolGeom, + const CConeProjectionGeometry3D* pProjGeom, bool bShortScan, int iGPUIndex, int iVoxelSuperSampling) { SDimensions3D dims; - dims.iVolX = iVolX; - dims.iVolY = iVolY; - dims.iVolZ = iVolZ; - if (iVolX == 0 || iVolY == 0 || iVolZ == 0) - return false; + bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); - dims.iProjAngles = iProjAngles; - dims.iProjU = iProjU; - dims.iProjV = iProjV; + // TODO: Check that pVolGeom is normalized, since we don't support + // other volume geometries yet - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) + if (!ok) return false; dims.iRaysPerVoxelDim = iVoxelSuperSampling; @@ -1703,9 +1324,8 @@ bool astraCudaFDK(float* pfVolume, const float* pfProjections, return false; } - cudaPitchedPtr D_volumeData = allocateVolumeData(dims); - bool ok = D_volumeData.ptr; + ok = D_volumeData.ptr; if (!ok) return false; @@ -1726,6 +1346,13 @@ bool astraCudaFDK(float* pfVolume, const float* pfProjections, return false; } + float fOriginSourceDistance = pProjGeom->getOriginSourceDistance(); + float fOriginDetectorDistance = pProjGeom->getOriginDetectorDistance(); + float fDetUSize = pProjGeom->getDetectorSpacingX(); + float fDetVSize = pProjGeom->getDetectorSpacingY(); + const float *pfAngles = pProjGeom->getProjectionAngles(); + + // TODO: Offer interface for SrcZ, DetZ ok &= FDK(D_volumeData, D_projData, fOriginSourceDistance, fOriginDetectorDistance, 0, 0, fDetUSize, fDetVSize, diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h index f91fe26..2782994 100644 --- a/cuda/3d/astra3d.h +++ b/cuda/3d/astra3d.h @@ -42,7 +42,12 @@ enum Cuda3DProjectionKernel { ker3d_sum_square_weights }; - +class CProjectionGeometry3D; +class CParallelProjectionGeometry3D; +class CParallelVecProjectionGeometry3D; +class CConeProjectionGeometry3D; +class CConeVecProjectionGeometry3D; +class CVolumeGeometry3D; class AstraSIRT3d_internal; @@ -52,37 +57,9 @@ public: AstraSIRT3d(); ~AstraSIRT3d(); - // Set the number of pixels in the reconstruction rectangle, - // and the length of the edge of a pixel. - // Volume pixels are assumed to be square. - // This must be called before setting the projection geometry. - bool setReconstructionGeometry(unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ/*, - float fPixelSize = 1.0f*/); - - bool setConeGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SConeProjection* projs); - bool setConeGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fSourceZ, - float fDetSize, - const float *pfAngles); - bool setPar3DGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection* projs); - bool setPar3DGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fSourceZ, - float fDetSize, - const float *pfAngles); + // Set the volume and projection geometry + bool setGeometry(const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom); // Enable supersampling. // @@ -197,37 +174,9 @@ public: AstraCGLS3d(); ~AstraCGLS3d(); - // Set the number of pixels in the reconstruction rectangle, - // and the length of the edge of a pixel. - // Volume pixels are assumed to be square. - // This must be called before setting the projection geometry. - bool setReconstructionGeometry(unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ/*, - float fPixelSize = 1.0f*/); - - bool setConeGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SConeProjection* projs); - bool setConeGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fSourceZ, - float fDetSize, - const float *pfAngles); - bool setPar3DGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection* projs); - bool setPar3DGeometry(unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fSourceZ, - float fDetSize, - const float *pfAngles); + // Set the volume and projection geometry + bool setGeometry(const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom); // Enable supersampling. // @@ -332,140 +281,40 @@ protected: AstraCGLS3d_internal *pData; }; +bool convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom, + astraCUDA3d::SDimensions3D& dims); +bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom, + SPar3DProjection*& pParProjs, + SConeProjection*& pConeProjs, + float& fOutputScale); -_AstraExport bool astraCudaConeFP(const float* pfVolume, float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fDetUSize, - float fDetVSize, - const float *pfAngles, - int iGPUIndex, int iDetectorSuperSampling); - -_AstraExport bool astraCudaConeFP(const float* pfVolume, float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SConeProjection *pfAngles, - int iGPUIndex, int iDetectorSuperSampling); - -_AstraExport bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fDetUSize, - float fDetVSize, - const float *pfAngles, +_AstraExport bool astraCudaFP(const float* pfVolume, float* pfProjections, + const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom, int iGPUIndex, int iDetectorSuperSampling, Cuda3DProjectionKernel projKernel); -_AstraExport bool astraCudaPar3DFP(const float* pfVolume, float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection *pfAngles, - int iGPUIndex, int iDetectorSuperSampling, - Cuda3DProjectionKernel projKernel); - -_AstraExport bool astraCudaConeBP(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fDetUSize, - float fDetVSize, - const float *pfAngles, - int iGPUIndex, int iVoxelSuperSampling); - -_AstraExport bool astraCudaConeBP(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SConeProjection *pfAngles, - int iGPUIndex, int iVoxelSuperSampling); - -_AstraExport bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fDetUSize, - float fDetVSize, - const float *pfAngles, +_AstraExport bool astraCudaBP(float* pfVolume, const float* pfProjections, + const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom, int iGPUIndex, int iVoxelSuperSampling); -_AstraExport bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection *pfAngles, - int iGPUIndex, int iVoxelSuperSampling); - -_AstraExport bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fDetUSize, - float fDetVSize, - const float *pfAngles, - int iGPUIndex, int iVoxelSuperSampling); - -_AstraExport bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - const SPar3DProjection *pfAngles, +_AstraExport bool astraCudaBP_SIRTWeighted(float* pfVolume, const float* pfProjections, + const CVolumeGeometry3D* pVolGeom, + const CProjectionGeometry3D* pProjGeom, int iGPUIndex, int iVoxelSuperSampling); _AstraExport bool astraCudaFDK(float* pfVolume, const float* pfProjections, - unsigned int iVolX, - unsigned int iVolY, - unsigned int iVolZ, - unsigned int iProjAngles, - unsigned int iProjU, - unsigned int iProjV, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fDetUSize, - float fDetVSize, - const float *pfAngles, + const CVolumeGeometry3D* pVolGeom, + const CConeProjectionGeometry3D* pProjGeom, bool bShortScan, int iGPUIndex, int iVoxelSuperSampling); + } diff --git a/cuda/3d/cgls3d.cu b/cuda/3d/cgls3d.cu index 5071a9b..dd0e8a0 100644 --- a/cuda/3d/cgls3d.cu +++ b/cuda/3d/cgls3d.cu @@ -165,7 +165,7 @@ bool CGLS::iterate(unsigned int iterations) // p = A'*r zeroVolumeData(D_p, dims); - callBP(D_p, D_r); + callBP(D_p, D_r, 1.0f); if (useVolumeMask) processVol3D<opMul>(D_p, D_maskData, dims); @@ -195,7 +195,7 @@ bool CGLS::iterate(unsigned int iterations) // z = A'*r zeroVolumeData(D_z, dims); - callBP(D_z, D_r); + callBP(D_z, D_r, 1.0f); if (useVolumeMask) processVol3D<opMul>(D_z, D_maskData, dims); @@ -242,7 +242,7 @@ bool doCGLS(cudaPitchedPtr& D_volumeData, CGLS cgls; bool ok = true; - ok &= cgls.setConeGeometry(dims, angles); + ok &= cgls.setConeGeometry(dims, angles, 1.0f); if (D_maskData.ptr) ok &= cgls.enableVolumeMask(); diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu index 5648d6f..4a41f6a 100644 --- a/cuda/3d/cone_bp.cu +++ b/cuda/3d/cone_bp.cu @@ -78,7 +78,8 @@ bool bindProjDataTexture(const cudaArray* array) //__launch_bounds__(32*16, 4) __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAngle, - int angleOffset, const astraCUDA3d::SDimensions3D dims) + int angleOffset, const astraCUDA3d::SDimensions3D dims, + float fOutputScale) { float* volData = (float*)D_volData; @@ -147,13 +148,13 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng endZ = dims.iVolZ - startZ; for(int i=0; i < endZ; i++) - volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i]; + volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i] * fOutputScale; } //End kernel // supersampling version -__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims) +__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale) { float* volData = (float*)D_volData; @@ -189,6 +190,9 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; const float fSubStep = 1.0f/dims.iRaysPerVoxelDim; + fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); + + for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f) { @@ -236,14 +240,15 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start } - volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal / (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); + volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal * fOutputScale; } } bool ConeBP_Array(cudaPitchedPtr D_volumeData, cudaArray *D_projArray, - const SDimensions3D& dims, const SConeProjection* angles) + const SDimensions3D& dims, const SConeProjection* angles, + float fOutputScale) { bindProjDataTexture(D_projArray); @@ -291,9 +296,9 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData, for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) { // printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr); if (dims.iRaysPerVoxelDim == 1) - dev_cone_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims); + dev_cone_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); else - dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims); + dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); } cudaTextForceKernelsCompletion(); @@ -309,14 +314,15 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData, bool ConeBP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, - const SDimensions3D& dims, const SConeProjection* angles) + const SDimensions3D& dims, const SConeProjection* angles, + float fOutputScale) { // transfer projections to array cudaArray* cuArray = allocateProjectionArray(dims); transferProjectionsToArray(D_projData, cuArray, dims); - bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles); + bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale); cudaFreeArray(cuArray); @@ -473,7 +479,7 @@ int main() } #endif - astraCUDA3d::ConeBP(volData, projData, dims, angle); + astraCUDA3d::ConeBP(volData, projData, dims, angle, 1.0f); #if 0 float* buf = new float[256*256]; diff --git a/cuda/3d/cone_bp.h b/cuda/3d/cone_bp.h index cba6d9f..4d3d2dd 100644 --- a/cuda/3d/cone_bp.h +++ b/cuda/3d/cone_bp.h @@ -33,13 +33,14 @@ namespace astraCUDA3d { _AstraExport bool ConeBP_Array(cudaPitchedPtr D_volumeData, cudaArray *D_projArray, - const SDimensions3D& dims, const SConeProjection* angles); + const SDimensions3D& dims, const SConeProjection* angles, + float fOutputScale); _AstraExport bool ConeBP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, - const SDimensions3D& dims, const SConeProjection* angles); + const SDimensions3D& dims, const SConeProjection* angles, + float fOutputScale); - } #endif diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu new file mode 100644 index 0000000..6d81dc0 --- /dev/null +++ b/cuda/3d/mem3d.cu @@ -0,0 +1,270 @@ +/* +----------------------------------------------------------------------- +Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp + 2014-2015, CWI, Amsterdam + +Contact: astra@uantwerpen.be +Website: http://sf.net/projects/astra-toolbox + +This file is part of the ASTRA Toolbox. + + +The ASTRA Toolbox is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +The ASTRA Toolbox is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. + +----------------------------------------------------------------------- +$Id$ +*/ + +#include <cstdio> +#include <cassert> + +#include "util3d.h" + +#include "mem3d.h" + +#include "astra3d.h" +#include "cone_fp.h" +#include "cone_bp.h" +#include "par3d_fp.h" +#include "par3d_bp.h" + +#include "astra/Logging.h" + + +namespace astraCUDA3d { + + +struct SMemHandle3D_internal +{ + cudaPitchedPtr ptr; + unsigned int nx; + unsigned int ny; + unsigned int nz; +}; + +size_t availableGPUMemory() +{ + size_t free, total; + cudaError_t err = cudaMemGetInfo(&free, &total); + if (err != cudaSuccess) + return 0; + return free; +} + +MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Mem3DZeroMode zero) +{ + SMemHandle3D_internal hnd; + hnd.nx = x; + hnd.ny = y; + hnd.nz = z; + + size_t free = availableGPUMemory(); + + cudaError_t err; + err = cudaMalloc3D(&hnd.ptr, make_cudaExtent(sizeof(float)*x, y, z)); + + if (err != cudaSuccess) { + return MemHandle3D(); + } + + size_t free2 = availableGPUMemory(); + + ASTRA_DEBUG("Allocated %d x %d x %d on GPU. (Pre: %lu, post: %lu)", x, y, z, free, free2); + + + + if (zero == INIT_ZERO) { + err = cudaMemset3D(hnd.ptr, 0, make_cudaExtent(sizeof(float)*x, y, z)); + if (err != cudaSuccess) { + cudaFree(hnd.ptr.ptr); + return MemHandle3D(); + } + } + + MemHandle3D ret; + ret.d = boost::shared_ptr<SMemHandle3D_internal>(new SMemHandle3D_internal); + *ret.d = hnd; + + return ret; +} + +bool freeGPUMemory(MemHandle3D handle) +{ + size_t free = availableGPUMemory(); + cudaError_t err = cudaFree(handle.d->ptr.ptr); + size_t free2 = availableGPUMemory(); + + ASTRA_DEBUG("Freeing memory. (Pre: %lu, post: %lu)", free, free2); + + return err == cudaSuccess; +} + +bool copyToGPUMemory(const float *src, MemHandle3D dst, const SSubDimensions3D &pos) +{ + ASTRA_DEBUG("Copying %d x %d x %d to GPU", pos.subnx, pos.subny, pos.subnz); + ASTRA_DEBUG("Offset %d,%d,%d", pos.subx, pos.suby, pos.subz); + cudaPitchedPtr s; + s.ptr = (void*)src; // const cast away + s.pitch = pos.pitch * sizeof(float); + s.xsize = pos.nx * sizeof(float); + s.ysize = pos.ny; + ASTRA_DEBUG("Pitch %d, xsize %d, ysize %d", s.pitch, s.xsize, s.ysize); + + cudaMemcpy3DParms p; + p.srcArray = 0; + p.srcPos = make_cudaPos(pos.subx * sizeof(float), pos.suby, pos.subz); + p.srcPtr = s; + + p.dstArray = 0; + p.dstPos = make_cudaPos(0, 0, 0); + p.dstPtr = dst.d->ptr; + + p.extent = make_cudaExtent(pos.subnx * sizeof(float), pos.subny, pos.subnz); + + p.kind = cudaMemcpyHostToDevice; + + cudaError_t err = cudaMemcpy3D(&p); + + return err == cudaSuccess; +} + + +bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos) +{ + ASTRA_DEBUG("Copying %d x %d x %d from GPU", pos.subnx, pos.subny, pos.subnz); + ASTRA_DEBUG("Offset %d,%d,%d", pos.subx, pos.suby, pos.subz); + cudaPitchedPtr d; + d.ptr = (void*)dst; + d.pitch = pos.pitch * sizeof(float); + d.xsize = pos.nx * sizeof(float); + d.ysize = pos.ny; + ASTRA_DEBUG("Pitch %d, xsize %d, ysize %d", d.pitch, d.xsize, d.ysize); + + cudaMemcpy3DParms p; + p.srcArray = 0; + p.srcPos = make_cudaPos(0, 0, 0); + p.srcPtr = src.d->ptr; + + p.dstArray = 0; + p.dstPos = make_cudaPos(pos.subx * sizeof(float), pos.suby, pos.subz); + p.dstPtr = d; + + p.extent = make_cudaExtent(pos.subnx * sizeof(float), pos.subny, pos.subnz); + + p.kind = cudaMemcpyDeviceToHost; + + cudaError_t err = cudaMemcpy3D(&p); + + return err == cudaSuccess; + +} + + +bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel) +{ + SDimensions3D dims; + + bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); + if (!ok) + return false; + +#if 1 + dims.iRaysPerDetDim = iDetectorSuperSampling; + if (iDetectorSuperSampling == 0) + return false; +#else + dims.iRaysPerDetDim = 1; + astra::Cuda3DProjectionKernel projKernel = astra::ker3d_default; +#endif + + + SPar3DProjection* pParProjs; + SConeProjection* pConeProjs; + + float outputScale = 1.0f; + + ok = convertAstraGeometry(pVolGeom, pProjGeom, + pParProjs, pConeProjs, + outputScale); + + if (pParProjs) { +#if 0 + for (int i = 0; i < dims.iProjAngles; ++i) { + ASTRA_DEBUG("Vec: %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f\n", + pParProjs[i].fRayX, pParProjs[i].fRayY, pParProjs[i].fRayZ, + pParProjs[i].fDetSX, pParProjs[i].fDetSY, pParProjs[i].fDetSZ, + pParProjs[i].fDetUX, pParProjs[i].fDetUY, pParProjs[i].fDetUZ, + pParProjs[i].fDetVX, pParProjs[i].fDetVY, pParProjs[i].fDetVZ); + } +#endif + + switch (projKernel) { + case astra::ker3d_default: + ok &= Par3DFP(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale); + break; + case astra::ker3d_sum_square_weights: + ok &= Par3DFP_SumSqW(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale*outputScale); + break; + default: + ok = false; + } + } else { + switch (projKernel) { + case astra::ker3d_default: + ok &= ConeFP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, outputScale); + break; + default: + ok = false; + } + } + + return ok; +} + +bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling) +{ + SDimensions3D dims; + + bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); + if (!ok) + return false; + +#if 1 + dims.iRaysPerVoxelDim = iVoxelSuperSampling; +#else + dims.iRaysPerVoxelDim = 1; +#endif + + SPar3DProjection* pParProjs; + SConeProjection* pConeProjs; + + float outputScale = 1.0f; + + ok = convertAstraGeometry(pVolGeom, pProjGeom, + pParProjs, pConeProjs, + outputScale); + + if (pParProjs) + ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale); + else + ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, outputScale); + + return ok; + +} + + + + +} diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h new file mode 100644 index 0000000..82bad19 --- /dev/null +++ b/cuda/3d/mem3d.h @@ -0,0 +1,99 @@ +/* +----------------------------------------------------------------------- +Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp + 2014-2015, CWI, Amsterdam + +Contact: astra@uantwerpen.be +Website: http://sf.net/projects/astra-toolbox + +This file is part of the ASTRA Toolbox. + + +The ASTRA Toolbox is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +The ASTRA Toolbox is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. + +----------------------------------------------------------------------- +*/ + +#ifndef _CUDA_MEM3D_H +#define _CUDA_MEM3D_H + +#include <boost/shared_ptr.hpp> + +#include "astra3d.h" + +namespace astra { +class CVolumeGeometry3D; +class CProjectionGeometry3D; +} + +namespace astraCUDA3d { + +// TODO: Make it possible to delete these handles when they're no longer +// necessary inside the FP/BP +// +// TODO: Add functions for querying capacity + +struct SMemHandle3D_internal; + +struct MemHandle3D { + boost::shared_ptr<SMemHandle3D_internal> d; + operator bool() const { return (bool)d; } +}; + +struct SSubDimensions3D { + unsigned int nx; + unsigned int ny; + unsigned int nz; + unsigned int pitch; + unsigned int subnx; + unsigned int subny; + unsigned int subnz; + unsigned int subx; + unsigned int suby; + unsigned int subz; +}; + +/* +// Useful or not? +enum Mem3DCopyMode { + MODE_SET, + MODE_ADD +}; +*/ + +enum Mem3DZeroMode { + INIT_NO, + INIT_ZERO +}; + +size_t availableGPUMemory(); + +MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Mem3DZeroMode zero); + +bool copyToGPUMemory(const float *src, MemHandle3D dst, const SSubDimensions3D &pos); + +bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos); + +bool freeGPUMemory(MemHandle3D handle); + + +bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel); + +bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling); + + + +} + +#endif diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu index 0c33280..cafab46 100644 --- a/cuda/3d/par3d_bp.cu +++ b/cuda/3d/par3d_bp.cu @@ -77,7 +77,7 @@ static bool bindProjDataTexture(const cudaArray* array) } -__global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims) +__global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale) { float* volData = (float*)D_volData; @@ -139,11 +139,11 @@ __global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAn endZ = dims.iVolZ - startZ; for(int i=0; i < endZ; i++) - volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i]; + volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i] * fOutputScale; } // supersampling version -__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims) +__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale) { float* volData = (float*)D_volData; @@ -180,6 +180,9 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star const float fSubStep = 1.0f/dims.iRaysPerVoxelDim; + fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); + + for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f) { @@ -217,14 +220,15 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star } - volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal / (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); + volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal * fOutputScale; } } bool Par3DBP_Array(cudaPitchedPtr D_volumeData, cudaArray *D_projArray, - const SDimensions3D& dims, const SPar3DProjection* angles) + const SDimensions3D& dims, const SPar3DProjection* angles, + float fOutputScale) { bindProjDataTexture(D_projArray); @@ -271,9 +275,9 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData, for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) { // printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr); if (dims.iRaysPerVoxelDim == 1) - dev_par3D_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims); + dev_par3D_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); else - dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims); + dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); } cudaTextForceKernelsCompletion(); @@ -288,14 +292,15 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData, bool Par3DBP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, - const SDimensions3D& dims, const SPar3DProjection* angles) + const SDimensions3D& dims, const SPar3DProjection* angles, + float fOutputScale) { // transfer projections to array cudaArray* cuArray = allocateProjectionArray(dims); transferProjectionsToArray(D_projData, cuArray, dims); - bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles); + bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale); cudaFreeArray(cuArray); @@ -445,7 +450,7 @@ int main() cudaMemcpy3D(&p); } - astraCUDA3d::Par3DBP(volData, projData, dims, angle); + astraCUDA3d::Par3DBP(volData, projData, dims, angle, 1.0f); #if 1 float* buf = new float[256*256]; diff --git a/cuda/3d/par3d_bp.h b/cuda/3d/par3d_bp.h index ece37d1..f1fc62d 100644 --- a/cuda/3d/par3d_bp.h +++ b/cuda/3d/par3d_bp.h @@ -33,11 +33,13 @@ namespace astraCUDA3d { _AstraExport bool Par3DBP_Array(cudaPitchedPtr D_volumeData, cudaArray *D_projArray, - const SDimensions3D& dims, const SPar3DProjection* angles); + const SDimensions3D& dims, const SPar3DProjection* angles, + float fOutputScale); _AstraExport bool Par3DBP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, - const SDimensions3D& dims, const SPar3DProjection* angles); + const SDimensions3D& dims, const SPar3DProjection* angles, + float fOutputScale); } diff --git a/cuda/3d/sirt3d.cu b/cuda/3d/sirt3d.cu index 389ee6b..484521e 100644 --- a/cuda/3d/sirt3d.cu +++ b/cuda/3d/sirt3d.cu @@ -160,10 +160,10 @@ bool SIRT::precomputeWeights() zeroVolumeData(D_pixelWeight, dims); if (useSinogramMask) { - callBP(D_pixelWeight, D_smaskData); + callBP(D_pixelWeight, D_smaskData, 1.0f); } else { processSino3D<opSet>(D_projData, 1.0f, dims); - callBP(D_pixelWeight, D_projData); + callBP(D_pixelWeight, D_projData, 1.0f); } #if 0 float* bufp = new float[512*512]; @@ -293,7 +293,7 @@ bool SIRT::iterate(unsigned int iterations) #endif - callBP(D_tmpData, D_projData); + callBP(D_tmpData, D_projData, 1.0f); #if 0 printf("Dumping tmpData: %p\n", (void*)D_tmpData.ptr); float* buf = new float[256*256]; @@ -347,7 +347,7 @@ bool doSIRT(cudaPitchedPtr& D_volumeData, SIRT sirt; bool ok = true; - ok &= sirt.setConeGeometry(dims, angles); + ok &= sirt.setConeGeometry(dims, angles, 1.0f); if (D_maskData.ptr) ok &= sirt.enableVolumeMask(); |