diff options
| -rw-r--r-- | cuda/3d/par3d_fp.cu | 545 | 
1 files changed, 281 insertions, 264 deletions
| diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index e0f743c..e85effc 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -86,197 +86,223 @@ static bool bindVolumeDataTexture(const cudaArray* array)  } +// x=0, y=1, z=2 +struct DIR_X { +	__device__ float nSlices(const SDimensions3D& dims) const { return dims.iVolX; } +	__device__ float nDim1(const SDimensions3D& dims) const { return dims.iVolY; } +	__device__ float nDim2(const SDimensions3D& dims) const { return dims.iVolZ; } +	__device__ float c0(float x, float y, float z) const { return x; } +	__device__ float c1(float x, float y, float z) const { return y; } +	__device__ float c2(float x, float y, float z) const { return z; } +	__device__ float tex(float f0, float f1, float f2) const { return tex3D(gT_par3DVolumeTexture, f0, f1, f2); } +	__device__ float x(float f0, float f1, float f2) const { return f0; } +	__device__ float y(float f0, float f1, float f2) const { return f1; } +	__device__ float z(float f0, float f1, float f2) const { return f2; } +}; + +// y=0, x=1, z=2 +struct DIR_Y { +	__device__ float nSlices(const SDimensions3D& dims) const { return dims.iVolY; } +	__device__ float nDim1(const SDimensions3D& dims) const { return dims.iVolX; } +	__device__ float nDim2(const SDimensions3D& dims) const { return dims.iVolZ; } +	__device__ float c0(float x, float y, float z) const { return y; } +	__device__ float c1(float x, float y, float z) const { return x; } +	__device__ float c2(float x, float y, float z) const { return z; } +	__device__ float tex(float f0, float f1, float f2) const { return tex3D(gT_par3DVolumeTexture, f1, f0, f2); } +	__device__ float x(float f0, float f1, float f2) const { return f1; } +	__device__ float y(float f0, float f1, float f2) const { return f0; } +	__device__ float z(float f0, float f1, float f2) const { return f2; } +}; + +// z=0, x=1, y=2 +struct DIR_Z { +	__device__ float nSlices(const SDimensions3D& dims) const { return dims.iVolZ; } +	__device__ float nDim1(const SDimensions3D& dims) const { return dims.iVolX; } +	__device__ float nDim2(const SDimensions3D& dims) const { return dims.iVolY; } +	__device__ float c0(float x, float y, float z) const { return z; } +	__device__ float c1(float x, float y, float z) const { return x; } +	__device__ float c2(float x, float y, float z) const { return y; } +	__device__ float tex(float f0, float f1, float f2) const { return tex3D(gT_par3DVolumeTexture, f1, f2, f0); } +	__device__ float x(float f0, float f1, float f2) const { return f1; } +	__device__ float y(float f0, float f1, float f2) const { return f2; } +	__device__ float z(float f0, float f1, float f2) const { return f0; } +}; + +  // threadIdx: x = u detector  //            y = relative angle  // blockIdx:  x = u/v detector  //            y = angle block -#define PAR3D_FP_BODY(c0,c1,c2) \ -	int angle = startAngle + blockIdx.y * g_anglesPerBlock + threadIdx.y;                                        \ -	if (angle >= endAngle)                                                                                       \ -		return;                                                                                                  \ -                                                                                                                 \ -	const float fRayX = gC_RayX[angle];                                                                          \ -	const float fRayY = gC_RayY[angle];                                                                          \ -	const float fRayZ = gC_RayZ[angle];                                                                          \ -	const float fDetUX = gC_DetUX[angle];                                                                        \ -	const float fDetUY = gC_DetUY[angle];                                                                        \ -	const float fDetUZ = gC_DetUZ[angle];                                                                        \ -	const float fDetVX = gC_DetVX[angle];                                                                        \ -	const float fDetVY = gC_DetVY[angle];                                                                        \ -	const float fDetVZ = gC_DetVZ[angle];                                                                        \ -	const float fDetSX = gC_DetSX[angle] + 0.5f * fDetUX + 0.5f * fDetVX;                                        \ -	const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY;                                        \ -	const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ;                                        \ -                                                                                                                 \ -                                                                                                                 \ -                                                                                                                 \ -	const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;    \ -	const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV;             \ -	int endDetectorV = startDetectorV + g_detBlockV;                                                             \ -	if (endDetectorV > dims.iProjV)                                                                              \ -		endDetectorV = dims.iProjV;                                                                              \ -                                                                                                                 \ -	int endSlice = startSlice + g_blockSlices;                                                                   \ -	if (endSlice > dims.iVol##c0)                                                                                \ -		endSlice = dims.iVol##c0;                                                                                \ -                                                                                                                 \ -	for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV)                                  \ -	{                                                                                                            \ -		/* Trace ray in direction Ray to (detectorU,detectorV) from  */                                          \ -		/* X = startSlice to X = endSlice                            */                                          \ -                                                                                                                 \ -		const float fDetX = fDetSX + detectorU*fDetUX + detectorV*fDetVX;                                        \ -		const float fDetY = fDetSY + detectorU*fDetUY + detectorV*fDetVY;                                        \ -		const float fDetZ = fDetSZ + detectorU*fDetUZ + detectorV*fDetVZ;                                        \ -                                                                                                                 \ -		/*        (x)   ( 1)       ( 0)    */                                                                    \ -		/* ray:   (y) = (ay) * x + (by)    */                                                                    \ -		/*        (z)   (az)       (bz)    */                                                                    \ -                                                                                                                 \ -		const float a##c1 = fRay##c1 / fRay##c0;                                                                 \ -		const float a##c2 = fRay##c2 / fRay##c0;                                                                 \ -		const float b##c1 = fDet##c1 - a##c1 * fDet##c0;                                                         \ -		const float b##c2 = fDet##c2 - a##c2 * fDet##c0;                                                         \ -                                                                                                                 \ -		const float fDistCorr = sqrt(a##c1*a##c1+a##c2*a##c2+1.0f) * fOutputScale;                               \ -                                                                                                                 \ -		float fVal = 0.0f;                                                                                       \ -                                                                                                                 \ -		float f##c0 = startSlice + 0.5f;                                                                         \ -		float f##c1 = a##c1 * (startSlice - 0.5f*dims.iVol##c0 + 0.5f) + b##c1 + 0.5f*dims.iVol##c1 - 0.5f + 0.5f;\ -		float f##c2 = a##c2 * (startSlice - 0.5f*dims.iVol##c0 + 0.5f) + b##c2 + 0.5f*dims.iVol##c2 - 0.5f + 0.5f;\ -                                                                                                                 \ -		for (int s = startSlice; s < endSlice; ++s)                                                              \ -		{                                                                                                        \ -			fVal += tex3D(gT_par3DVolumeTexture, fX, fY, fZ);                                                    \ -			f##c0 += 1.0f;                                                                                       \ -			f##c1 += a##c1;                                                                                      \ -			f##c2 += a##c2;                                                                                      \ -		}                                                                                                        \ -                                                                                                                 \ -		fVal *= fDistCorr;                                                                                       \ -                                                                                                                 \ -		D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fVal;                              \ -	} +template<class COORD> +__global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, +                           unsigned int startSlice, +                           unsigned int startAngle, unsigned int endAngle, +                           const SDimensions3D dims, float fOutputScale) +{ +	COORD c; + +	int angle = startAngle + blockIdx.y * g_anglesPerBlock + threadIdx.y; +	if (angle >= endAngle) +		return; + +	const float fRayX = gC_RayX[angle]; +	const float fRayY = gC_RayY[angle]; +	const float fRayZ = gC_RayZ[angle]; +	const float fDetUX = gC_DetUX[angle]; +	const float fDetUY = gC_DetUY[angle]; +	const float fDetUZ = gC_DetUZ[angle]; +	const float fDetVX = gC_DetVX[angle]; +	const float fDetVY = gC_DetVY[angle]; +	const float fDetVZ = gC_DetVZ[angle]; +	const float fDetSX = gC_DetSX[angle] + 0.5f * fDetUX + 0.5f * fDetVX; +	const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY; +	const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; + + + +	const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; +	const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; +	int endDetectorV = startDetectorV + g_detBlockV; +	if (endDetectorV > dims.iProjV) +		endDetectorV = dims.iProjV; + +	int endSlice = startSlice + g_blockSlices; +	if (endSlice > c.nSlices(dims)) +		endSlice = c.nSlices(dims); + +	for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV) +	{ +		/* Trace ray in direction Ray to (detectorU,detectorV) from  */ +		/* X = startSlice to X = endSlice                            */ + +		const float fDetX = fDetSX + detectorU*fDetUX + detectorV*fDetVX; +		const float fDetY = fDetSY + detectorU*fDetUY + detectorV*fDetVY; +		const float fDetZ = fDetSZ + detectorU*fDetUZ + detectorV*fDetVZ; + +		/*        (x)   ( 1)       ( 0)    */ +		/* ray:   (y) = (ay) * x + (by)    */ +		/*        (z)   (az)       (bz)    */ + +		const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); +		const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); +		const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); +		const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); + +		const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; + +		float fVal = 0.0f; + +		float f0 = startSlice + 0.5f; +		float f1 = a1 * (startSlice - 0.5f*c.nSlices(dims) + 0.5f) + b1 + 0.5f*c.nDim1(dims) - 0.5f + 0.5f; +		float f2 = a2 * (startSlice - 0.5f*c.nSlices(dims) + 0.5f) + b2 + 0.5f*c.nDim2(dims) - 0.5f + 0.5f; + +		for (int s = startSlice; s < endSlice; ++s) +		{ +			fVal += c.tex(f0, f1, f2); +			f0 += 1.0f; +			f1 += a1; +			f2 += a2; +		} +		fVal *= fDistCorr; -// Supersampling version -#define PAR3D_FP_SS_BODY(c0,c1,c2) \ -	int angle = startAngle + blockIdx.y * g_anglesPerBlock + threadIdx.y;                                        \ -	if (angle >= endAngle)                                                                                       \ -		return;                                                                                                  \ -                                                                                                                 \ -	const float fRayX = gC_RayX[angle];                                                                          \ -	const float fRayY = gC_RayY[angle];                                                                          \ -	const float fRayZ = gC_RayZ[angle];                                                                          \ -	const float fDetUX = gC_DetUX[angle];                                                                        \ -	const float fDetUY = gC_DetUY[angle];                                                                        \ -	const float fDetUZ = gC_DetUZ[angle];                                                                        \ -	const float fDetVX = gC_DetVX[angle];                                                                        \ -	const float fDetVY = gC_DetVY[angle];                                                                        \ -	const float fDetVZ = gC_DetVZ[angle];                                                                        \ -	const float fDetSX = gC_DetSX[angle] + 0.5f * fDetUX + 0.5f * fDetVX;                                        \ -	const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY;                                        \ -	const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ;                                        \ -                                                                                                                 \ -                                                                                                                 \ -                                                                                                                 \ -	const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;    \ -	const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV;             \ -	int endDetectorV = startDetectorV + g_detBlockV;                                                             \ -	if (endDetectorV > dims.iProjV)                                                                              \ -		endDetectorV = dims.iProjV;                                                                              \ -                                                                                                                 \ -	int endSlice = startSlice + g_blockSlices;                                                                   \ -	if (endSlice > dims.iVol##c0)                                                                                \ -		endSlice = dims.iVol##c0;                                                                                \ -                                                                                                                 \ -	const float fSubStep = 1.0f/dims.iRaysPerDetDim;                                                             \ -                                                                                                                 \ -	for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV)                                  \ -	{                                                                                                            \ -                                                                                                                 \ -		float fV = 0.0f;                                                                                         \ -                                                                                                                 \ -		float fdU = detectorU - 0.5f + 0.5f*fSubStep;                                                            \ -		for (int iSubU = 0; iSubU < dims.iRaysPerDetDim; ++iSubU, fdU+=fSubStep) {                               \ -		float fdV = detectorV - 0.5f + 0.5f*fSubStep;                                                            \ -		for (int iSubV = 0; iSubV < dims.iRaysPerDetDim; ++iSubV, fdV+=fSubStep) {                               \ -                                                                                                                 \ -		/* Trace ray in direction Ray to (detectorU,detectorV) from  */                                          \ -		/* X = startSlice to X = endSlice                            */                                          \ -                                                                                                                 \ -		const float fDetX = fDetSX + fdU*fDetUX + fdV*fDetVX;                                                    \ -		const float fDetY = fDetSY + fdU*fDetUY + fdV*fDetVY;                                                    \ -		const float fDetZ = fDetSZ + fdU*fDetUZ + fdV*fDetVZ;                                                    \ -                                                                                                                 \ -		/*        (x)   ( 1)       ( 0)    */                                                                    \ -		/* ray:   (y) = (ay) * x + (by)    */                                                                    \ -		/*        (z)   (az)       (bz)    */                                                                    \ -                                                                                                                 \ -		const float a##c1 = fRay##c1 / fRay##c0;                                                                 \ -		const float a##c2 = fRay##c2 / fRay##c0;                                                                 \ -		const float b##c1 = fDet##c1 - a##c1 * fDet##c0;                                                         \ -		const float b##c2 = fDet##c2 - a##c2 * fDet##c0;                                                         \ -                                                                                                                 \ -		const float fDistCorr = sqrt(a##c1*a##c1+a##c2*a##c2+1.0f) * fOutputScale;                               \ -                                                                                                                 \ -		float fVal = 0.0f;                                                                                       \ -                                                                                                                 \ -		float f##c0 = startSlice + 0.5f;                                                                         \ -		float f##c1 = a##c1 * (startSlice - 0.5f*dims.iVol##c0 + 0.5f) + b##c1 + 0.5f*dims.iVol##c1 - 0.5f + 0.5f;\ -		float f##c2 = a##c2 * (startSlice - 0.5f*dims.iVol##c0 + 0.5f) + b##c2 + 0.5f*dims.iVol##c2 - 0.5f + 0.5f;\ -                                                                                                                 \ -		for (int s = startSlice; s < endSlice; ++s)                                                              \ -		{                                                                                                        \ -			fVal += tex3D(gT_par3DVolumeTexture, fX, fY, fZ);                                                    \ -			f##c0 += 1.0f;                                                                                       \ -			f##c1 += a##c1;                                                                                      \ -			f##c2 += a##c2;                                                                                      \ -		}                                                                                                        \ -                                                                                                                 \ -		fVal *= fDistCorr;                                                                                       \ -		fV += fVal;                                                                                              \ -                                                                                                                 \ -		}                                                                                                        \ -		}                                                                                                        \ -                                                                                                                 \ -		D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (dims.iRaysPerDetDim * dims.iRaysPerDetDim);\ +		D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fVal;  	} +} +// Supersampling version +template<class COORD> +__global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, +                              unsigned int startSlice, +                              unsigned int startAngle, unsigned int endAngle, +                              const SDimensions3D dims, float fOutputScale) +{ +	COORD c; +	int angle = startAngle + blockIdx.y * g_anglesPerBlock + threadIdx.y; +	if (angle >= endAngle) +		return; -__global__ void par3D_FP_dirX(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions3D dims, float fOutputScale) -{ -PAR3D_FP_BODY(X,Y,Z) -} +	const float fRayX = gC_RayX[angle]; +	const float fRayY = gC_RayY[angle]; +	const float fRayZ = gC_RayZ[angle]; +	const float fDetUX = gC_DetUX[angle]; +	const float fDetUY = gC_DetUY[angle]; +	const float fDetUZ = gC_DetUZ[angle]; +	const float fDetVX = gC_DetVX[angle]; +	const float fDetVY = gC_DetVY[angle]; +	const float fDetVZ = gC_DetVZ[angle]; +	const float fDetSX = gC_DetSX[angle] + 0.5f * fDetUX + 0.5f * fDetVX; +	const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY; +	const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; -__global__ void par3D_FP_dirY(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions3D dims, float fOutputScale) -{ -PAR3D_FP_BODY(Y,X,Z) -} -__global__ void par3D_FP_dirZ(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions3D dims, float fOutputScale) -{ -PAR3D_FP_BODY(Z,X,Y) -} -__global__ void par3D_FP_SS_dirX(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions3D dims, float fOutputScale) -{ -PAR3D_FP_SS_BODY(X,Y,Z) -} +	const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; +	const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; +	int endDetectorV = startDetectorV + g_detBlockV; +	if (endDetectorV > dims.iProjV) +		endDetectorV = dims.iProjV; -__global__ void par3D_FP_SS_dirY(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions3D dims, float fOutputScale) -{ -PAR3D_FP_SS_BODY(Y,X,Z) -} +	int endSlice = startSlice + g_blockSlices; +	if (endSlice > c.nSlices(dims)) +		endSlice = c.nSlices(dims); -__global__ void par3D_FP_SS_dirZ(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions3D dims, float fOutputScale) -{ -PAR3D_FP_SS_BODY(Z,X,Y) +	const float fSubStep = 1.0f/dims.iRaysPerDetDim; + +	for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV) +	{ + +		float fV = 0.0f; + +		float fdU = detectorU - 0.5f + 0.5f*fSubStep; +		for (int iSubU = 0; iSubU < dims.iRaysPerDetDim; ++iSubU, fdU+=fSubStep) { +		float fdV = detectorV - 0.5f + 0.5f*fSubStep; +		for (int iSubV = 0; iSubV < dims.iRaysPerDetDim; ++iSubV, fdV+=fSubStep) { + +		/* Trace ray in direction Ray to (detectorU,detectorV) from  */ +		/* X = startSlice to X = endSlice                            */ + +		const float fDetX = fDetSX + fdU*fDetUX + fdV*fDetVX; +		const float fDetY = fDetSY + fdU*fDetUY + fdV*fDetVY; +		const float fDetZ = fDetSZ + fdU*fDetUZ + fdV*fDetVZ; + +		/*        (x)   ( 1)       ( 0)    */ +		/* ray:   (y) = (ay) * x + (by)    */ +		/*        (z)   (az)       (bz)    */ + +		const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); +		const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); +		const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); +		const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); + +		const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; + +		float fVal = 0.0f; + +		float f0 = startSlice + 0.5f; +		float f1 = a1 * (startSlice - 0.5f*c.nSlices(dims) + 0.5f) + b1 + 0.5f*c.nDim1(dims) - 0.5f + 0.5f; +		float f2 = a2 * (startSlice - 0.5f*c.nSlices(dims) + 0.5f) + b2 + 0.5f*c.nDim2(dims) - 0.5f + 0.5f; + +		for (int s = startSlice; s < endSlice; ++s) +		{ +			fVal += c.tex(f0, f1, f2); +			f0 += 1.0f; +			f1 += a1; +			f2 += a2; +		} + +		fVal *= fDistCorr; +		fV += fVal; + +		} +		} + +		D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (dims.iRaysPerDetDim * dims.iRaysPerDetDim); +	}  } @@ -294,92 +320,83 @@ __device__ float dirWeights(float fX, float fN) {  	return 0.0f; // outside image on right  } -#define PAR3D_FP_SUMSQW_BODY(c0,c1,c2) \ -	int angle = startAngle + blockIdx.y * g_anglesPerBlock + threadIdx.y;                                        \ -	if (angle >= endAngle)                                                                                       \ -		return;                                                                                                  \ -                                                                                                                 \ -	const float fRayX = gC_RayX[angle];                                                                          \ -	const float fRayY = gC_RayY[angle];                                                                          \ -	const float fRayZ = gC_RayZ[angle];                                                                          \ -	const float fDetUX = gC_DetUX[angle];                                                                        \ -	const float fDetUY = gC_DetUY[angle];                                                                        \ -	const float fDetUZ = gC_DetUZ[angle];                                                                        \ -	const float fDetVX = gC_DetVX[angle];                                                                        \ -	const float fDetVY = gC_DetVY[angle];                                                                        \ -	const float fDetVZ = gC_DetVZ[angle];                                                                        \ -	const float fDetSX = gC_DetSX[angle] + 0.5f * fDetUX + 0.5f * fDetVX;                                        \ -	const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY;                                        \ -	const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ;                                        \ -                                                                                                                 \ -                                                                                                                 \ -                                                                                                                 \ -	const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;    \ -	const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV;             \ -	int endDetectorV = startDetectorV + g_detBlockV;                                                             \ -	if (endDetectorV > dims.iProjV)                                                                              \ -		endDetectorV = dims.iProjV;                                                                              \ -                                                                                                                 \ -	int endSlice = startSlice + g_blockSlices;                                                                   \ -	if (endSlice > dims.iVol##c0)                                                                                \ -		endSlice = dims.iVol##c0;                                                                                \ -                                                                                                                 \ -	for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV)                                  \ -	{                                                                                                            \ -		/* Trace ray in direction Ray to (detectorU,detectorV) from  */                                          \ -		/* X = startSlice to X = endSlice                            */                                          \ -                                                                                                                 \ -		const float fDetX = fDetSX + detectorU*fDetUX + detectorV*fDetVX;                                        \ -		const float fDetY = fDetSY + detectorU*fDetUY + detectorV*fDetVY;                                        \ -		const float fDetZ = fDetSZ + detectorU*fDetUZ + detectorV*fDetVZ;                                        \ -                                                                                                                 \ -		/*        (x)   ( 1)       ( 0)    */                                                                    \ -		/* ray:   (y) = (ay) * x + (by)    */                                                                    \ -		/*        (z)   (az)       (bz)    */                                                                    \ -                                                                                                                 \ -		const float a##c1 = fRay##c1 / fRay##c0;                                                                 \ -		const float a##c2 = fRay##c2 / fRay##c0;                                                                 \ -		const float b##c1 = fDet##c1 - a##c1 * fDet##c0;                                                         \ -		const float b##c2 = fDet##c2 - a##c2 * fDet##c0;                                                         \ -                                                                                                                 \ -		const float fDistCorr = sqrt(a##c1*a##c1+a##c2*a##c2+1.0f) * fOutputScale;                               \ -                                                                                                                 \ -		float fVal = 0.0f;                                                                                       \ -                                                                                                                 \ -		float f##c0 = startSlice + 0.5f;                                                                         \ -		float f##c1 = a##c1 * (startSlice - 0.5f*dims.iVol##c0 + 0.5f) + b##c1 + 0.5f*dims.iVol##c1 - 0.5f + 0.5f;\ -		float f##c2 = a##c2 * (startSlice - 0.5f*dims.iVol##c0 + 0.5f) + b##c2 + 0.5f*dims.iVol##c2 - 0.5f + 0.5f;\ -                                                                                                                 \ -		for (int s = startSlice; s < endSlice; ++s)                                                              \ -		{                                                                                                        \ -			fVal += dirWeights(f##c1, dims.iVol##c1) * dirWeights(f##c2, dims.iVol##c2) * fDistCorr * fDistCorr; \ -			f##c0 += 1.0f;                                                                                       \ -			f##c1 += a##c1;                                                                                      \ -			f##c2 += a##c2;                                                                                      \ -		}                                                                                                        \ -                                                                                                                 \ -		D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fVal;                              \ -	} - -// Supersampling version -// TODO - - -__global__ void par3D_FP_SumSqW_dirX(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions3D dims, float fOutputScale) -{ -PAR3D_FP_SUMSQW_BODY(X,Y,Z) -} - -__global__ void par3D_FP_SumSqW_dirY(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions3D dims, float fOutputScale) +template<class COORD> +__global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, +                                  unsigned int startSlice, +                                  unsigned int startAngle, unsigned int endAngle, +                                  const SDimensions3D dims, float fOutputScale)  { -PAR3D_FP_SUMSQW_BODY(Y,X,Z) -} +	COORD c; + +	int angle = startAngle + blockIdx.y * g_anglesPerBlock + threadIdx.y; +	if (angle >= endAngle) +		return; + +	const float fRayX = gC_RayX[angle]; +	const float fRayY = gC_RayY[angle]; +	const float fRayZ = gC_RayZ[angle]; +	const float fDetUX = gC_DetUX[angle]; +	const float fDetUY = gC_DetUY[angle]; +	const float fDetUZ = gC_DetUZ[angle]; +	const float fDetVX = gC_DetVX[angle]; +	const float fDetVY = gC_DetVY[angle]; +	const float fDetVZ = gC_DetVZ[angle]; +	const float fDetSX = gC_DetSX[angle] + 0.5f * fDetUX + 0.5f * fDetVX; +	const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY; +	const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; + + + +	const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; +	const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; +	int endDetectorV = startDetectorV + g_detBlockV; +	if (endDetectorV > dims.iProjV) +		endDetectorV = dims.iProjV; + +	int endSlice = startSlice + g_blockSlices; +	if (endSlice > c.nSlices(dims)) +		endSlice = c.nSlices(dims); + +	for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV) +	{ +		/* Trace ray in direction Ray to (detectorU,detectorV) from  */ +		/* X = startSlice to X = endSlice                            */ + +		const float fDetX = fDetSX + detectorU*fDetUX + detectorV*fDetVX; +		const float fDetY = fDetSY + detectorU*fDetUY + detectorV*fDetVY; +		const float fDetZ = fDetSZ + detectorU*fDetUZ + detectorV*fDetVZ; + +		/*        (x)   ( 1)       ( 0)    */ +		/* ray:   (y) = (ay) * x + (by)    */ +		/*        (z)   (az)       (bz)    */ + +		const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); +		const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); +		const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); +		const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); + +		const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; + +		float fVal = 0.0f; + +		float f0 = startSlice + 0.5f; +		float f1 = a1 * (startSlice - 0.5f*c.nSlices(dims) + 0.5f) + b1 + 0.5f*c.nDim1(dims) - 0.5f + 0.5f; +		float f2 = a2 * (startSlice - 0.5f*c.nSlices(dims) + 0.5f) + b2 + 0.5f*c.nDim2(dims) - 0.5f + 0.5f; + +		for (int s = startSlice; s < endSlice; ++s) +		{ +			fVal += dirWeights(f1, c.nDim1(dims)) * dirWeights(f2, c.nDim2(dims)) * fDistCorr * fDistCorr; +			f0 += 1.0f; +			f1 += a1; +			f2 += a2; +		} -__global__ void par3D_FP_SumSqW_dirZ(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions3D dims, float fOutputScale) -{ -PAR3D_FP_SUMSQW_BODY(Z,X,Y) +		D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fVal; +	}  } +// Supersampling version +// TODO  bool Par3DFP_Array(cudaArray *D_volArray, @@ -462,21 +479,21 @@ bool Par3DFP_Array(cudaArray *D_volArray,  				if (blockDirection == 0) {  					for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices)  						if (dims.iRaysPerDetDim == 1) -							par3D_FP_dirX<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +							par3D_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);  						else -							par3D_FP_SS_dirX<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +							par3D_FP_SS_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);  				} else if (blockDirection == 1) {  					for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices)  						if (dims.iRaysPerDetDim == 1) -							par3D_FP_dirY<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +							par3D_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);  						else -							par3D_FP_SS_dirY<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +							par3D_FP_SS_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);  				} else if (blockDirection == 2) {  					for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices)  						if (dims.iRaysPerDetDim == 1) -							par3D_FP_dirZ<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +							par3D_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);  						else -							par3D_FP_SS_dirZ<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +							par3D_FP_SS_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);  				}  			} @@ -593,7 +610,7 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,  				if (blockDirection == 0) {  					for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices)  						if (dims.iRaysPerDetDim == 1) -							par3D_FP_SumSqW_dirX<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +							par3D_FP_SumSqW_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);  						else  #if 0  							par3D_FP_SS_SumSqW_dirX<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); @@ -603,7 +620,7 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,  				} else if (blockDirection == 1) {  					for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices)  						if (dims.iRaysPerDetDim == 1) -							par3D_FP_SumSqW_dirY<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +							par3D_FP_SumSqW_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);  						else  #if 0  							par3D_FP_SS_SumSqW_dirY<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); @@ -613,7 +630,7 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,  				} else if (blockDirection == 2) {  					for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices)  						if (dims.iRaysPerDetDim == 1) -							par3D_FP_SumSqW_dirZ<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); +							par3D_FP_SumSqW_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);  						else  #if 0  							par3D_FP_SS_SumSqW_dirZ<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); | 
