diff options
| -rw-r--r-- | cuda/2d/algo.cu | 12 | ||||
| -rw-r--r-- | cuda/2d/fan_bp.cu | 62 | ||||
| -rw-r--r-- | cuda/2d/par_bp.cu | 20 | ||||
| -rw-r--r-- | cuda/3d/cone_bp.cu | 6 | ||||
| -rw-r--r-- | src/CudaReconstructionAlgorithm2D.cpp | 3 | ||||
| -rw-r--r-- | tests/python/test_line2d.py | 59 | 
6 files changed, 132 insertions, 30 deletions
| diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu index b4c2864..11422ff 100644 --- a/cuda/2d/algo.cu +++ b/cuda/2d/algo.cu @@ -258,10 +258,10 @@ bool ReconAlgo::copyDataToGPU(const float* pfSinogram, unsigned int iSinogramPit  	if (!ok)  		return false; -	// rescale sinogram to adjust for pixel size -	processSino<opMul>(D_sinoData, fSinogramScale, -	                       //1.0f/(fPixelSize*fPixelSize), -	                       sinoPitch, dims); +	// rescale sinogram +	if (fSinogramScale != 1.0f) +		processSino<opMul>(D_sinoData, fSinogramScale, +		                   sinoPitch, dims);  	ok = copyVolumeToDevice(pfReconstruction, iReconstructionPitch,  	                        dims, @@ -331,11 +331,11 @@ bool ReconAlgo::callBP(float* D_volumeData, unsigned int volumePitch,  	if (parProjs) {  		assert(!fanProjs);  		return BP(D_volumeData, volumePitch, D_projData, projPitch, -		          dims, parProjs, outputScale); +		          dims, parProjs, fOutputScale * outputScale);  	} else {  		assert(fanProjs);  		return FanBP(D_volumeData, volumePitch, D_projData, projPitch, -		             dims, fanProjs, outputScale); +		             dims, fanProjs, fOutputScale * outputScale);  	}  } diff --git a/cuda/2d/fan_bp.cu b/cuda/2d/fan_bp.cu index dac3ac2..2072cd4 100644 --- a/cuda/2d/fan_bp.cu +++ b/cuda/2d/fan_bp.cu @@ -48,7 +48,7 @@ const unsigned int g_anglesPerBlock = 16;  const unsigned int g_blockSliceSize = 32;  const unsigned int g_blockSlices = 16; -const unsigned int g_MaxAngles = 2560; +const unsigned int g_MaxAngles = 2240;  __constant__ float gC_SrcX[g_MaxAngles];  __constant__ float gC_SrcY[g_MaxAngles]; @@ -56,6 +56,7 @@ __constant__ float gC_DetSX[g_MaxAngles];  __constant__ float gC_DetSY[g_MaxAngles];  __constant__ float gC_DetUX[g_MaxAngles];  __constant__ float gC_DetUY[g_MaxAngles]; +__constant__ float gC_Scale[g_MaxAngles];  static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int width, unsigned int height, cudaTextureAddressMode mode = cudaAddressModeBorder) @@ -96,8 +97,6 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s  	float fVal = 0.0f;  	float fA = startAngle + 0.5f; -	// TODO: Distance correction? -  	for (int angle = startAngle; angle < endAngle; ++angle)  	{  		const float fSrcX = gC_SrcX[angle]; @@ -106,15 +105,24 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s  		const float fDetSY = gC_DetSY[angle];  		const float fDetUX = gC_DetUX[angle];  		const float fDetUY = gC_DetUY[angle]; +		const float fScale = gC_Scale[angle];  		const float fXD = fSrcX - fX;  		const float fYD = fSrcY - fY;  		const float fNum = fDetSY * fXD - fDetSX * fYD + fX*fSrcY - fY*fSrcX;  		const float fDen = fDetUX * fYD - fDetUY * fXD; -		 -		const float fT = fNum / fDen; -		fVal += tex2D(gT_FanProjTexture, fT, fA); + +		// fDen = || u (x-s) ||     (2x2 determinant) + +		// Scale factor is the approximate number of rays traversing this pixel, +		// given by the inverse size of a detector pixel scaled by the magnification +		// factor of this pixel. +		// Magnification factor is || u (d-s) || / || u (x-s) || + +		const float fr = __fdividef(1.0f, fDen); +		const float fT = fNum * fr; +		fVal += tex2D(gT_FanProjTexture, fT, fA) * fScale * fr;  		fA += 1.0f;  	} @@ -148,8 +156,6 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in  	float fVal = 0.0f;  	float fA = startAngle + 0.5f; -	// TODO: Distance correction? -  	for (int angle = startAngle; angle < endAngle; ++angle)  	{  		const float fSrcX = gC_SrcX[angle]; @@ -158,6 +164,7 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in  		const float fDetSY = gC_DetSY[angle];  		const float fDetUX = gC_DetUX[angle];  		const float fDetUY = gC_DetUY[angle]; +		const float fScale = gC_Scale[angle];  		// TODO: Optimize these loops...  		float fX = fXb; @@ -169,9 +176,10 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in  				const float fNum = fDetSY * fXD - fDetSX * fYD + fX*fSrcY - fY*fSrcX;  				const float fDen = fDetUX * fYD - fDetUY * fXD; -		 -				const float fT = fNum / fDen; -				fVal += tex2D(gT_FanProjTexture, fT, fA); +				const float fr = __fdividef(1.0f, fDen); + +				const float fT = fNum * fr; +				fVal += tex2D(gT_FanProjTexture, fT, fA) * fScale * fr;  				fY -= fSubStep;  			}  			fX += fSubStep; @@ -202,8 +210,6 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi  	float* volData = (float*)D_volData; -	// TODO: Distance correction? -  	// TODO: Constant memory vs parameters.  	const float fSrcX = gC_SrcX[0];  	const float fSrcY = gC_SrcY[0]; @@ -211,15 +217,17 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi  	const float fDetSY = gC_DetSY[0];  	const float fDetUX = gC_DetUX[0];  	const float fDetUY = gC_DetUY[0]; +	const float fScale = gC_Scale[0];  	const float fXD = fSrcX - fX;  	const float fYD = fSrcY - fY;  	const float fNum = fDetSY * fXD - fDetSX * fYD + fX*fSrcY - fY*fSrcX;  	const float fDen = fDetUX * fYD - fDetUY * fXD; -		 -	const float fT = fNum / fDen; -	const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f); +	const float fr = __fdividef(1.0f, fDen); + +	const float fT = fNum * fr; +	const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f) * fScale * fr;  	volData[Y*volPitch+X] += fVal * fOutputScale;  } @@ -248,7 +256,7 @@ __global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, un  	float fVal = 0.0f;  	float fA = startAngle + 0.5f; -	// TODO: Distance correction? +	// TODO: Update for new projection scaling  	for (int angle = startAngle; angle < endAngle; ++angle)  	{ @@ -299,6 +307,13 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,  #undef TRANSFER_TO_CONSTANT +	for (unsigned int i = 0; i < dims.iProjAngles; ++i) { +		double detsize = sqrt(angles[i].fDetUX * angles[i].fDetUX + angles[i].fDetUY * angles[i].fDetUY); +		double scale = (angles[i].fDetUX * (angles[i].fSrcY - angles[i].fDetSY) - angles[i].fDetUY * (angles[i].fSrcX - angles[i].fDetSX)) / detsize; +		tmp[i] = (float)scale; +	} +	cudaMemcpyToSymbol(gC_Scale, tmp, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); +  	delete[] tmp;  	dim3 dimBlock(g_blockSlices, g_blockSliceSize); @@ -346,6 +361,14 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,  #undef TRANSFER_TO_CONSTANT +	for (unsigned int i = 0; i < dims.iProjAngles; ++i) { +		double detsize = sqrt(angles[i].fDetUX * angles[i].fDetUX + angles[i].fDetUY * angles[i].fDetUY); +		double scale = (angles[i].fDetUX * (angles[i].fSrcY - angles[i].fDetSY) - angles[i].fDetUY * (angles[i].fSrcX - angles[i].fDetSX)) / detsize; +		tmp[i] = (float)scale; +	} +	cudaMemcpyToSymbol(gC_Scale, tmp, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); + +  	delete[] tmp;  	dim3 dimBlock(g_blockSlices, g_blockSliceSize); @@ -389,6 +412,11 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,  #undef TRANSFER_TO_CONSTANT +	double detsize = sqrt(angles[angle].fDetUX * angles[angle].fDetUX + angles[angle].fDetUY * angles[angle].fDetUY); +	double scale = (angles[angle].fDetUX * (angles[angle].fSrcY - angles[angle].fDetSY) - angles[angle].fDetUY * (angles[angle].fSrcX - angles[angle].fDetSX)) / detsize; +	float tmp = (float)scale; +	cudaMemcpyToSymbol(gC_Scale, &tmp, sizeof(float), 0, cudaMemcpyHostToDevice); +  	dim3 dimBlock(g_blockSlices, g_blockSliceSize);  	dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,  	             (dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize); diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu index 09a6554..21484b1 100644 --- a/cuda/2d/par_bp.cu +++ b/cuda/2d/par_bp.cu @@ -53,6 +53,7 @@ const unsigned int g_MaxAngles = 2560;  __constant__ float gC_angle_scaled_sin[g_MaxAngles];  __constant__ float gC_angle_scaled_cos[g_MaxAngles];  __constant__ float gC_angle_offset[g_MaxAngles]; +__constant__ float gC_angle_scale[g_MaxAngles];  static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int width, unsigned int height, cudaTextureAddressMode mode = cudaAddressModeBorder)  { @@ -70,6 +71,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi  	return true;  } +// TODO: Templated version with/without scale? (Or only the global outputscale)  __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)  {  	const int relX = threadIdx.x; @@ -97,9 +99,10 @@ __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int star  		const float scaled_cos_theta = gC_angle_scaled_cos[angle];  		const float scaled_sin_theta = gC_angle_scaled_sin[angle];  		const float TOffset = gC_angle_offset[angle]; +		const float scale = gC_angle_scale[angle];  		const float fT = fX * scaled_cos_theta - fY * scaled_sin_theta + TOffset; -		fVal += tex2D(gT_projTexture, fT, fA); +		fVal += tex2D(gT_projTexture, fT, fA) * scale;  		fA += 1.0f;  	} @@ -138,6 +141,7 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s  		const float cos_theta = gC_angle_scaled_cos[angle];  		const float sin_theta = gC_angle_scaled_sin[angle];  		const float TOffset = gC_angle_offset[angle]; +		const float scale = gC_angle_scale[angle];  		float fT = fX * cos_theta - fY * sin_theta + TOffset; @@ -145,7 +149,7 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s  			float fTy = fT;  			fT += fSubStep * cos_theta;  			for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) { -				fVal += tex2D(gT_projTexture, fTy, fA); +				fVal += tex2D(gT_projTexture, fTy, fA) * scale;  				fTy -= fSubStep * sin_theta;  			}  		} @@ -172,6 +176,8 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset  	const float fT = fX * angle_cos - fY * angle_sin + offset;  	const float fVal = tex2D(gT_projTexture, fT, 0.5f); +	// NB: The 'scale' constant in devBP has been folded into fOutputScale by the caller here +  	D_volData[Y*volPitch+X] += fVal * fOutputScale;  } @@ -186,27 +192,34 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,  	float* angle_scaled_sin = new float[dims.iProjAngles];  	float* angle_scaled_cos = new float[dims.iProjAngles];  	float* angle_offset = new float[dims.iProjAngles]; +	float* angle_scale = new float[dims.iProjAngles];  	bindProjDataTexture(D_projData, projPitch, dims.iProjDets, dims.iProjAngles);  	for (unsigned int i = 0; i < dims.iProjAngles; ++i) {  		double d = angles[i].fDetUX * angles[i].fRayY - angles[i].fDetUY * angles[i].fRayX;  		angle_scaled_cos[i] = angles[i].fRayY / d; -		angle_scaled_sin[i] = -angles[i].fRayX / d; // TODO: Check signs +		angle_scaled_sin[i] = -angles[i].fRayX / d;  		angle_offset[i] = (angles[i].fDetSY * angles[i].fRayX - angles[i].fDetSX * angles[i].fRayY) / d; +		angle_scale[i] = sqrt(angles[i].fRayX * angles[i].fRayX + angles[i].fRayY * angles[i].fRayY) / abs(d);  	} +	//fprintf(stderr, "outputscale in BP_internal: %f, %f\n", fOutputScale, angle_scale[0]); +	//fprintf(stderr, "ray in BP_internal: %f,%f (length %f)\n", angles[0].fRayX, angles[0].fRayY, sqrt(angles[0].fRayX * angles[0].fRayX + angles[0].fRayY * angles[0].fRayY));  	cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_scaled_sin, angle_scaled_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);  	cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_scaled_cos, angle_scaled_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);  	cudaError_t e3 = cudaMemcpyToSymbol(gC_angle_offset, angle_offset, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); +	cudaError_t e4 = cudaMemcpyToSymbol(gC_angle_scale, angle_scale, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);  	assert(e1 == cudaSuccess);  	assert(e2 == cudaSuccess);  	assert(e3 == cudaSuccess); +	assert(e4 == cudaSuccess);  	delete[] angle_scaled_sin;  	delete[] angle_scaled_cos;  	delete[] angle_offset; +	delete[] angle_scale;  	dim3 dimBlock(g_blockSlices, g_blockSliceSize);  	dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices, @@ -267,6 +280,7 @@ bool BP_SART(float* D_volumeData, unsigned int volumePitch,  	float angle_scaled_cos = angles[angle].fRayY / d;  	float angle_scaled_sin = -angles[angle].fRayX / d; // TODO: Check signs  	float angle_offset = (angles[angle].fDetSY * angles[angle].fRayX - angles[angle].fDetSX * angles[angle].fRayY) / d; +	fOutputScale *= sqrt(angles[angle].fRayX * angles[angle].fRayX + angles[angle].fRayY * angles[angle].fRayY) / abs(d);  	dim3 dimBlock(g_blockSlices, g_blockSliceSize);  	dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices, diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu index feebda2..2a7ec80 100644 --- a/cuda/3d/cone_bp.cu +++ b/cuda/3d/cone_bp.cu @@ -140,8 +140,10 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng  				if (FDKWEIGHT) {  					// The correct factor here is this one:  					// Z[idx] += (fr*fCd.w)*(fr*fCd.w)*fVal; -					// This is the square of the inverse magnification factor -					// from fX,fY,fZ to the detector. +					// This is the square of the magnification factor +					// from fX,fY,fZ to the virtual detector, where the +					// virtual detector is the plane through the origin +					// parallel to the detector, so spanned by u,v.  					// Since we are assuming we have a circular cone  					// beam trajectory, fCd.w is constant, and we instead diff --git a/src/CudaReconstructionAlgorithm2D.cpp b/src/CudaReconstructionAlgorithm2D.cpp index 1e81390..939a026 100644 --- a/src/CudaReconstructionAlgorithm2D.cpp +++ b/src/CudaReconstructionAlgorithm2D.cpp @@ -309,8 +309,7 @@ void CCudaReconstructionAlgorithm2D::run(int _iNrIterations)  		m_bAlgoInit = true;  	} -	float fPixelSize = volgeom.getPixelLengthX(); -	float fSinogramScale = 1.0f/(fPixelSize*fPixelSize); +	float fSinogramScale = 1.0f;  	ok = m_pAlgo->copyDataToGPU(m_pSinogram->getDataConst(), m_pSinogram->getGeometry()->getDetectorCount(), fSinogramScale,  	                            m_pReconstruction->getDataConst(), volgeom.getGridColCount(), diff --git a/tests/python/test_line2d.py b/tests/python/test_line2d.py index 4c3d174..8b6e200 100644 --- a/tests/python/test_line2d.py +++ b/tests/python/test_line2d.py @@ -586,10 +586,66 @@ class Test2DKernel(unittest.TestCase):        else:          raise RuntimeError("Unsupported projector") +  def single_test_adjoint(self, type, proj_type): +      shape = np.random.randint(*range2d, size=2) +      if FLEXVOL: +          if not NONSQUARE: +            pixsize = np.array([0.5, 0.5]) + np.random.random() +          else: +            pixsize = 0.5 + np.random.random(size=2) +          origin = 10 * np.random.random(size=2) +      else: +          pixsize = (1.,1.) +          origin = (0.,0.) +      vg = astra.create_vol_geom(shape[1], shape[0], +                                 origin[0] - 0.5 * shape[0] * pixsize[0], +                                 origin[0] + 0.5 * shape[0] * pixsize[0], +                                 origin[1] - 0.5 * shape[1] * pixsize[1], +                                 origin[1] + 0.5 * shape[1] * pixsize[1]) + +      if type == 'parallel': +        pg = gen_random_geometry_parallel() +        projector_id = astra.create_projector(proj_type, pg, vg) +      elif type == 'parallel_vec': +        pg = gen_random_geometry_parallel_vec() +        projector_id = astra.create_projector(proj_type, pg, vg) +      elif type == 'fanflat': +        pg = gen_random_geometry_fanflat() +        projector_id = astra.create_projector(proj_type_to_fan(proj_type), pg, vg) +      elif type == 'fanflat_vec': +        pg = gen_random_geometry_fanflat_vec() +        projector_id = astra.create_projector(proj_type_to_fan(proj_type), pg, vg) + +      for i in range(5): +        X = np.random.random((shape[1], shape[0])) +        Y = np.random.random(astra.geom_size(pg)) + +        sinogram_id, fX = astra.create_sino(X, projector_id) +        bp_id, fTY = astra.create_backprojection(Y, projector_id) + +        astra.data2d.delete(sinogram_id) +        astra.data2d.delete(bp_id) + +        da = np.dot(fX.ravel(), Y.ravel()) +        db = np.dot(X.ravel(), fTY.ravel()) +        m = np.abs(da - db) +        TOL = 1e-3 if 'cuda' not in proj_type else 1e-1 +        if m / da >= TOL: +          print(vg) +          print(pg) +          print(m/da, da/db, da, db) +        self.assertTrue(m / da < TOL) +      astra.projector.delete(projector_id) + +    def multi_test(self, type, proj_type):      np.random.seed(seed)      for _ in range(nloops):        self.single_test(type, proj_type) +  def multi_test_adjoint(self, type, proj_type): +    np.random.seed(seed) +    for _ in range(nloops): +      self.single_test_adjoint(type, proj_type)  __combinations = { 'parallel': [ 'line', 'linear', 'distance_driven', 'strip', 'cuda' ],                     'parallel_vec': [ 'line', 'linear', 'distance_driven', 'strip', 'cuda' ], @@ -600,7 +656,10 @@ for k, l in __combinations.items():    for v in l:      def f(k,v):        return lambda self: self.multi_test(k, v) +    def f_adj(k,v): +      return lambda self: self.multi_test_adjoint(k, v)      setattr(Test2DKernel, 'test_' + k + '_' + v, f(k,v)) +    setattr(Test2DKernel, 'test_' + k + '_' + v + '_adjoint', f_adj(k,v))  if __name__ == '__main__':    unittest.main() | 
