From a70ad8df8fc2a3da63fc91dd18bbfd55be7a89dd Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Thu, 12 Mar 2015 12:30:47 +0100 Subject: Add outputScale argument to 3D CUDA BP --- cuda/3d/cone_bp.cu | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'cuda/3d/cone_bp.cu') diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu index 5648d6f..5e67980 100644 --- a/cuda/3d/cone_bp.cu +++ b/cuda/3d/cone_bp.cu @@ -78,7 +78,8 @@ bool bindProjDataTexture(const cudaArray* array) //__launch_bounds__(32*16, 4) __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAngle, - int angleOffset, const astraCUDA3d::SDimensions3D dims) + int angleOffset, const astraCUDA3d::SDimensions3D dims, + float fOutputScale) { float* volData = (float*)D_volData; @@ -147,13 +148,13 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng endZ = dims.iVolZ - startZ; for(int i=0; i < endZ; i++) - volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i]; + volData[((startZ+i)*dims.iVolY+Y)*volPitch+X] += Z[i] * fOutputScale; } //End kernel // supersampling version -__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims) +__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale) { float* volData = (float*)D_volData; @@ -189,6 +190,9 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; const float fSubStep = 1.0f/dims.iRaysPerVoxelDim; + fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); + + for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f) { @@ -236,14 +240,15 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start } - volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal / (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); + volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal * fOutputScale; } } bool ConeBP_Array(cudaPitchedPtr D_volumeData, cudaArray *D_projArray, - const SDimensions3D& dims, const SConeProjection* angles) + const SDimensions3D& dims, const SConeProjection* angles, + float fOutputScale) { bindProjDataTexture(D_projArray); @@ -291,9 +296,9 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData, for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) { // printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr); if (dims.iRaysPerVoxelDim == 1) - dev_cone_BP<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims); + dev_cone_BP<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); else - dev_cone_BP_SS<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims); + dev_cone_BP_SS<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); } cudaTextForceKernelsCompletion(); @@ -309,14 +314,15 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData, bool ConeBP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, - const SDimensions3D& dims, const SConeProjection* angles) + const SDimensions3D& dims, const SConeProjection* angles, + float fOutputScale) { // transfer projections to array cudaArray* cuArray = allocateProjectionArray(dims); transferProjectionsToArray(D_projData, cuArray, dims); - bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles); + bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale); cudaFreeArray(cuArray); -- cgit v1.2.3