From bcff7884bb89dbecd394c75a8f57b0a54a743b52 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Wed, 16 Apr 2014 11:13:33 +0000 Subject: Change allocate/zeroVolume to volume/sinogram-specific variants --- cuda/2d/algo.cu | 8 ++++---- cuda/2d/astra.cu | 28 ++++++++++++++-------------- cuda/2d/cgls.cu | 14 +++++++------- cuda/2d/em.cu | 18 +++++++++--------- cuda/2d/sart.cu | 27 +++++++++++++++------------ cuda/2d/sirt.cu | 26 +++++++++++++------------- cuda/2d/util.cu | 24 ++++++++++++++++++++++++ cuda/2d/util.h | 7 ++++++- 8 files changed, 92 insertions(+), 60 deletions(-) diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu index 71cbfb3..333481a 100644 --- a/cuda/2d/algo.cu +++ b/cuda/2d/algo.cu @@ -214,11 +214,11 @@ bool ReconAlgo::setMaxConstraint(float fMax) bool ReconAlgo::allocateBuffers() { bool ok; - ok = allocateVolume(D_volumeData, dims.iVolWidth, dims.iVolHeight, volumePitch); + ok = allocateVolumeData(D_volumeData, volumePitch, dims); if (!ok) return false; - ok = allocateVolume(D_sinoData, dims.iProjDets, dims.iProjAngles, sinoPitch); + ok = allocateProjectionData(D_sinoData, sinoPitch, dims); if (!ok) { cudaFree(D_volumeData); D_volumeData = 0; @@ -226,7 +226,7 @@ bool ReconAlgo::allocateBuffers() } if (useVolumeMask) { - ok = allocateVolume(D_maskData, dims.iVolWidth, dims.iVolHeight, maskPitch); + ok = allocateVolumeData(D_maskData, maskPitch, dims); if (!ok) { cudaFree(D_volumeData); cudaFree(D_sinoData); @@ -237,7 +237,7 @@ bool ReconAlgo::allocateBuffers() } if (useSinogramMask) { - ok = allocateVolume(D_smaskData, dims.iProjDets, dims.iProjAngles, smaskPitch); + ok = allocateProjectionData(D_smaskData, smaskPitch, dims); if (!ok) { cudaFree(D_volumeData); cudaFree(D_sinoData); diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu index e317f6c..4e69e8f 100644 --- a/cuda/2d/astra.cu +++ b/cuda/2d/astra.cu @@ -240,13 +240,13 @@ bool AstraFBP::init(int iGPUIndex) } } - bool ok = allocateVolume(pData->D_volumeData, pData->dims.iVolWidth, pData->dims.iVolHeight, pData->volumePitch); + bool ok = allocateVolumeData(pData->D_volumeData, pData->volumePitch, pData->dims); if (!ok) { return false; } - ok = allocateVolume(pData->D_sinoData, pData->dims.iProjDets, pData->dims.iProjAngles, pData->sinoPitch); + ok = allocateProjectionData(pData->D_sinoData, pData->sinoPitch, pData->dims); if (!ok) { cudaFree(pData->D_volumeData); @@ -304,7 +304,7 @@ bool AstraFBP::run() return false; } - zeroVolume(pData->D_volumeData, pData->volumePitch, pData->dims.iVolWidth, pData->dims.iVolHeight); + zeroVolumeData(pData->D_volumeData, pData->volumePitch, pData->dims); bool ok = false; @@ -604,7 +604,7 @@ bool BPalgo::init() bool BPalgo::iterate(unsigned int) { // TODO: This zeroVolume makes an earlier memcpy of D_volumeData redundant - zeroVolume(D_volumeData, volumePitch, dims.iVolWidth, dims.iVolHeight); + zeroVolumeData(D_volumeData, volumePitch, dims); callBP(D_volumeData, volumePitch, D_sinoData, sinoPitch); return true; } @@ -614,7 +614,7 @@ float BPalgo::computeDiffNorm() float *D_projData; unsigned int projPitch; - allocateVolume(D_projData, dims.iProjDets, dims.iProjAngles, projPitch); + allocateProjectionData(D_projData, projPitch, dims); cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*dims.iProjDets, dims.iProjAngles, cudaMemcpyDeviceToDevice); callFP(D_volumeData, volumePitch, D_projData, projPitch, -1.0f); @@ -668,14 +668,14 @@ bool astraCudaFP(const float* pfVolume, float* pfSinogram, float* D_volumeData; unsigned int volumePitch; - ok = allocateVolume(D_volumeData, dims.iVolWidth, dims.iVolHeight, volumePitch); + ok = allocateVolumeData(D_volumeData, volumePitch, dims); if (!ok) return false; float* D_sinoData; unsigned int sinoPitch; - ok = allocateVolume(D_sinoData, dims.iProjDets, dims.iProjAngles, sinoPitch); + ok = allocateProjectionData(D_sinoData, sinoPitch, dims); if (!ok) { cudaFree(D_volumeData); return false; @@ -690,7 +690,7 @@ bool astraCudaFP(const float* pfVolume, float* pfSinogram, return false; } - zeroVolume(D_sinoData, sinoPitch, dims.iProjDets, dims.iProjAngles); + zeroProjectionData(D_sinoData, sinoPitch, dims); ok = FP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, pfAngles, pfOffsets, 1.0f); if (!ok) { cudaFree(D_volumeData); @@ -755,14 +755,14 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram, float* D_volumeData; unsigned int volumePitch; - ok = allocateVolume(D_volumeData, dims.iVolWidth, dims.iVolHeight, volumePitch); + ok = allocateVolumeData(D_volumeData, volumePitch, dims); if (!ok) return false; float* D_sinoData; unsigned int sinoPitch; - ok = allocateVolume(D_sinoData, dims.iProjDets, dims.iProjAngles, sinoPitch); + ok = allocateProjectionData(D_sinoData, sinoPitch, dims); if (!ok) { cudaFree(D_volumeData); return false; @@ -777,7 +777,7 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram, return false; } - zeroVolume(D_sinoData, sinoPitch, dims.iProjDets, dims.iProjAngles); + zeroProjectionData(D_sinoData, sinoPitch, dims); // TODO: Turn this geometry conversion into a util function SFanProjection* projs = new SFanProjection[dims.iProjAngles]; @@ -866,14 +866,14 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram, float* D_volumeData; unsigned int volumePitch; - ok = allocateVolume(D_volumeData, dims.iVolWidth, dims.iVolHeight, volumePitch); + ok = allocateVolumeData(D_volumeData, volumePitch, dims); if (!ok) return false; float* D_sinoData; unsigned int sinoPitch; - ok = allocateVolume(D_sinoData, dims.iProjDets, dims.iProjAngles, sinoPitch); + ok = allocateProjectionData(D_sinoData, sinoPitch, dims); if (!ok) { cudaFree(D_volumeData); return false; @@ -888,7 +888,7 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram, return false; } - zeroVolume(D_sinoData, sinoPitch, dims.iProjDets, dims.iProjAngles); + zeroProjectionData(D_sinoData, sinoPitch, dims); ok = FanFP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, pAngles, 1.0f); diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu index df8db0b..f4175e1 100644 --- a/cuda/2d/cgls.cu +++ b/cuda/2d/cgls.cu @@ -73,16 +73,16 @@ void CGLS::reset() bool CGLS::init() { // Lifetime of z: within an iteration - allocateVolume(D_z, dims.iVolWidth, dims.iVolHeight, zPitch); + allocateVolumeData(D_z, zPitch, dims); // Lifetime of p: full algorithm - allocateVolume(D_p, dims.iVolWidth, dims.iVolHeight, pPitch); + allocateVolumeData(D_p, pPitch, dims); // Lifetime of r: full algorithm - allocateVolume(D_r, dims.iProjDets, dims.iProjAngles, rPitch); + allocateProjectionData(D_r, rPitch, dims); // Lifetime of w: within an iteration - allocateVolume(D_w, dims.iProjDets, dims.iProjAngles, wPitch); + allocateProjectionData(D_w, wPitch, dims); // TODO: check if allocations succeeded return true; @@ -134,7 +134,7 @@ bool CGLS::iterate(unsigned int iterations) // p = A'*r - zeroVolume(D_p, pPitch, dims.iVolWidth, dims.iVolHeight); + zeroVolumeData(D_p, pPitch, dims); callBP(D_p, pPitch, D_r, rPitch); if (useVolumeMask) processVol(D_p, D_maskData, pPitch, dims.iVolWidth, dims.iVolHeight); @@ -150,7 +150,7 @@ bool CGLS::iterate(unsigned int iterations) for (unsigned int iter = 0; iter < iterations && !shouldAbort; ++iter) { // w = A*p - zeroVolume(D_w, wPitch, dims.iProjDets, dims.iProjAngles); + zeroProjectionData(D_w, wPitch, dims); callFP(D_p, pPitch, D_w, wPitch, 1.0f); // alpha = gamma / @@ -165,7 +165,7 @@ bool CGLS::iterate(unsigned int iterations) // z = A'*r - zeroVolume(D_z, zPitch, dims.iVolWidth, dims.iVolHeight); + zeroVolumeData(D_z, zPitch, dims); callBP(D_z, zPitch, D_r, rPitch); if (useVolumeMask) processVol(D_z, D_maskData, zPitch, dims.iVolWidth, dims.iVolHeight); diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu index cf3f10c..b281516 100644 --- a/cuda/2d/em.cu +++ b/cuda/2d/em.cu @@ -73,14 +73,14 @@ void EM::reset() bool EM::init() { - allocateVolume(D_pixelWeight, dims.iVolWidth, dims.iVolHeight, pixelPitch); - zeroVolume(D_pixelWeight, pixelPitch, dims.iVolWidth, dims.iVolHeight); + allocateVolumeData(D_pixelWeight, pixelPitch, dims); + zeroVolumeData(D_pixelWeight, pixelPitch, dims); - allocateVolume(D_tmpData, dims.iVolWidth, dims.iVolHeight, tmpPitch); - zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight); + allocateVolumeData(D_tmpData, tmpPitch, dims); + zeroVolumeData(D_tmpData, tmpPitch, dims); - allocateVolume(D_projData, dims.iProjDets, dims.iProjAngles, projPitch); - zeroVolume(D_projData, projPitch, dims.iProjDets, dims.iProjAngles); + allocateProjectionData(D_projData, projPitch, dims); + zeroProjectionData(D_projData, projPitch, dims); // We can't precompute pixelWeights when using a volume mask #if 0 @@ -94,7 +94,7 @@ bool EM::init() bool EM::precomputeWeights() { - zeroVolume(D_pixelWeight, pixelPitch, dims.iVolWidth, dims.iVolHeight); + zeroVolumeData(D_pixelWeight, pixelPitch, dims); #if 0 if (useSinogramMask) { callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch); @@ -129,14 +129,14 @@ bool EM::iterate(unsigned int iterations) for (unsigned int iter = 0; iter < iterations && !shouldAbort; ++iter) { // Do FP of volumeData - zeroVolume(D_projData, projPitch, dims.iProjDets, dims.iProjAngles); + zeroProjectionData(D_projData, projPitch, dims); callFP(D_volumeData, volumePitch, D_projData, projPitch, 1.0f); // Divide sinogram by FP (into projData) processVol(D_projData, D_sinoData, projPitch, dims.iProjDets, dims.iProjAngles); // Do BP of projData into tmpData - zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight); + zeroVolumeData(D_tmpData, tmpPitch, dims); callBP(D_tmpData, tmpPitch, D_projData, projPitch); // Multiply volumeData with tmpData divided by pixel weights diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu index 7f499ce..79c00ef 100644 --- a/cuda/2d/sart.cu +++ b/cuda/2d/sart.cu @@ -105,15 +105,18 @@ void SART::reset() bool SART::init() { if (useVolumeMask) { - allocateVolume(D_tmpData, dims.iVolWidth, dims.iVolHeight, tmpPitch); - zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight); + allocateVolumeData(D_tmpData, tmpPitch, dims); + zeroVolumeData(D_tmpData, tmpPitch, dims); } - allocateVolume(D_projData, dims.iProjDets, 1, projPitch); - zeroVolume(D_projData, projPitch, dims.iProjDets, 1); + // NB: Non-standard dimensions + SDimensions linedims = dims; + linedims.iProjAngles = 1; + allocateProjectionData(D_projData, projPitch, linedims); + zeroProjectionData(D_projData, projPitch, linedims); - allocateVolume(D_lineWeight, dims.iProjDets, dims.iProjAngles, linePitch); - zeroVolume(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles); + allocateProjectionData(D_lineWeight, linePitch, dims); + zeroProjectionData(D_lineWeight, linePitch, dims); // We can't precompute lineWeights when using a mask if (!useVolumeMask) @@ -138,13 +141,13 @@ bool SART::setProjectionOrder(int* _projectionOrder, int _projectionCount) bool SART::precomputeWeights() { - zeroVolume(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles); + zeroProjectionData(D_lineWeight, linePitch, dims); if (useVolumeMask) { callFP(D_maskData, maskPitch, D_lineWeight, linePitch, 1.0f); } else { // Allocate tmpData temporarily - allocateVolume(D_tmpData, dims.iVolWidth, dims.iVolHeight, tmpPitch); - zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight); + allocateVolumeData(D_tmpData, tmpPitch, dims); + zeroVolumeData(D_tmpData, tmpPitch, dims); processVol(D_tmpData, 1.0f, tmpPitch, dims.iVolWidth, dims.iVolHeight); @@ -193,7 +196,7 @@ bool SART::iterate(unsigned int iterations) if (useVolumeMask) { // BP, mask, and add back // TODO: Try putting the masking directly in the BP - zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight); + zeroVolumeData(D_tmpData, tmpPitch, dims); callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle); processVol(D_volumeData, D_maskData, D_tmpData, volumePitch, dims.iVolWidth, dims.iVolHeight); } else { @@ -216,8 +219,8 @@ float SART::computeDiffNorm() { unsigned int pPitch; float *D_p; - allocateVolume(D_p, dims.iProjDets, dims.iProjAngles, pPitch); - zeroVolume(D_p, pPitch, dims.iProjDets, dims.iProjAngles); + allocateProjectionData(D_p, pPitch, dims); + zeroProjectionData(D_p, pPitch, dims); // copy sinogram to D_p cudaMemcpy2D(D_p, sizeof(float)*pPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), dims.iProjAngles, cudaMemcpyDeviceToDevice); diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu index eb65962..1b0891a 100644 --- a/cuda/2d/sirt.cu +++ b/cuda/2d/sirt.cu @@ -88,17 +88,17 @@ void SIRT::reset() bool SIRT::init() { - allocateVolume(D_pixelWeight, dims.iVolWidth, dims.iVolHeight, pixelPitch); - zeroVolume(D_pixelWeight, pixelPitch, dims.iVolWidth, dims.iVolHeight); + allocateVolumeData(D_pixelWeight, pixelPitch, dims); + zeroVolumeData(D_pixelWeight, pixelPitch, dims); - allocateVolume(D_tmpData, dims.iVolWidth, dims.iVolHeight, tmpPitch); - zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight); + allocateVolumeData(D_tmpData, tmpPitch, dims); + zeroVolumeData(D_tmpData, tmpPitch, dims); - allocateVolume(D_projData, dims.iProjDets, dims.iProjAngles, projPitch); - zeroVolume(D_projData, projPitch, dims.iProjDets, dims.iProjAngles); + allocateProjectionData(D_projData, projPitch, dims); + zeroProjectionData(D_projData, projPitch, dims); - allocateVolume(D_lineWeight, dims.iProjDets, dims.iProjAngles, linePitch); - zeroVolume(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles); + allocateProjectionData(D_lineWeight, linePitch, dims); + zeroProjectionData(D_lineWeight, linePitch, dims); // We can't precompute lineWeights and pixelWeights when using a mask if (!useVolumeMask && !useSinogramMask) @@ -110,7 +110,7 @@ bool SIRT::init() bool SIRT::precomputeWeights() { - zeroVolume(D_lineWeight, linePitch, dims.iProjDets, dims.iProjAngles); + zeroProjectionData(D_lineWeight, linePitch, dims); if (useVolumeMask) { callFP(D_maskData, maskPitch, D_lineWeight, linePitch, 1.0f); } else { @@ -125,7 +125,7 @@ bool SIRT::precomputeWeights() } - zeroVolume(D_pixelWeight, pixelPitch, dims.iVolWidth, dims.iVolHeight); + zeroVolumeData(D_pixelWeight, pixelPitch, dims); if (useSinogramMask) { callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch); } else { @@ -160,7 +160,7 @@ bool SIRT::uploadMinMaxMasks(const float* pfMinMaskData, const float* pfMaxMaskD freeMinMaxMasks = true; bool ok = true; if (pfMinMaskData) { - allocateVolume(D_minMaskData, dims.iVolWidth, dims.iVolHeight, minMaskPitch); + allocateVolumeData(D_minMaskData, minMaskPitch, dims); ok = copyVolumeToDevice(pfMinMaskData, iPitch, dims.iVolWidth, dims.iVolHeight, D_minMaskData, minMaskPitch); @@ -169,7 +169,7 @@ bool SIRT::uploadMinMaxMasks(const float* pfMinMaskData, const float* pfMaxMaskD return false; if (pfMaxMaskData) { - allocateVolume(D_maxMaskData, dims.iVolWidth, dims.iVolHeight, maxMaskPitch); + allocateVolumeData(D_maxMaskData, maxMaskPitch, dims); ok = copyVolumeToDevice(pfMaxMaskData, iPitch, dims.iVolWidth, dims.iVolHeight, D_maxMaskData, maxMaskPitch); @@ -204,7 +204,7 @@ bool SIRT::iterate(unsigned int iterations) processVol(D_projData, D_lineWeight, projPitch, dims.iProjDets, dims.iProjAngles); - zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight); + zeroVolumeData(D_tmpData, tmpPitch, dims); callBP(D_tmpData, tmpPitch, D_projData, projPitch); diff --git a/cuda/2d/util.cu b/cuda/2d/util.cu index 8bb2f2f..d5cbe44 100644 --- a/cuda/2d/util.cu +++ b/cuda/2d/util.cu @@ -97,6 +97,30 @@ void zeroVolume(float* data, unsigned int pitch, unsigned int width, unsigned in ASTRA_CUDA_ASSERT(err); } +bool allocateVolumeData(float*& D_ptr, unsigned int& pitch, const SDimensions& dims) +{ + // TODO: memory order + return allocateVolume(D_ptr, dims.iVolWidth, dims.iVolHeight, pitch); +} + +bool allocateProjectionData(float*& D_ptr, unsigned int& pitch, const SDimensions& dims) +{ + // TODO: memory order + return allocateVolume(D_ptr, dims.iProjDets, dims.iProjAngles, pitch); +} + +void zeroVolumeData(float* D_ptr, unsigned int pitch, const SDimensions& dims) +{ + // TODO: memory order + zeroVolume(D_ptr, pitch, dims.iVolWidth, dims.iVolHeight); +} + +void zeroProjectionData(float* D_ptr, unsigned int pitch, const SDimensions& dims) +{ + // TODO: memory order + zeroVolume(D_ptr, pitch, dims.iProjDets, dims.iProjAngles); +} + template __global__ void reduce1D(float *g_idata, float *g_odata, unsigned int n) diff --git a/cuda/2d/util.h b/cuda/2d/util.h index ed4a45c..3cffa08 100644 --- a/cuda/2d/util.h +++ b/cuda/2d/util.h @@ -73,9 +73,14 @@ bool copySinogramToDevice(const float* in_data, unsigned int in_pitch, float* outD_data, unsigned int out_pitch); bool allocateVolume(float*& D_ptr, unsigned int width, unsigned int height, unsigned int& pitch); - void zeroVolume(float* D_data, unsigned int pitch, unsigned int width, unsigned int height); +bool allocateVolumeData(float*& D_ptr, unsigned int& pitch, const SDimensions& dims); +bool allocateProjectionData(float*& D_ptr, unsigned int& pitch, const SDimensions& dims); +void zeroVolumeData(float* D_ptr, unsigned int pitch, const SDimensions& dims); +void zeroProjectionData(float* D_ptr, unsigned int pitch, const SDimensions& dims); + + bool cudaTextForceKernelsCompletion(); void reportCudaError(cudaError_t err); -- cgit v1.2.3