diff options
Diffstat (limited to 'cuda/2d')
| -rw-r--r-- | cuda/2d/algo.cu | 7 | ||||
| -rw-r--r-- | cuda/2d/algo.h | 3 | ||||
| -rw-r--r-- | cuda/2d/astra.cu | 12 | ||||
| -rw-r--r-- | cuda/2d/cgls.cu | 4 | ||||
| -rw-r--r-- | cuda/2d/em.cu | 4 | ||||
| -rw-r--r-- | cuda/2d/fan_bp.cu | 47 | ||||
| -rw-r--r-- | cuda/2d/fan_bp.h | 9 | ||||
| -rw-r--r-- | cuda/2d/fft.cu | 46 | ||||
| -rw-r--r-- | cuda/2d/par_bp.cu | 31 | ||||
| -rw-r--r-- | cuda/2d/par_bp.h | 4 | ||||
| -rw-r--r-- | cuda/2d/sart.cu | 13 | ||||
| -rw-r--r-- | cuda/2d/sart.h | 6 | ||||
| -rw-r--r-- | cuda/2d/sirt.cu | 14 | ||||
| -rw-r--r-- | cuda/2d/sirt.h | 4 | 
14 files changed, 134 insertions, 70 deletions
diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu index 144fabd..dc74e51 100644 --- a/cuda/2d/algo.cu +++ b/cuda/2d/algo.cu @@ -336,16 +336,17 @@ bool ReconAlgo::callFP(float* D_volumeData, unsigned int volumePitch,  }  bool ReconAlgo::callBP(float* D_volumeData, unsigned int volumePitch, -                       float* D_projData, unsigned int projPitch) +                       float* D_projData, unsigned int projPitch, +                       float outputScale)  {  	if (angles) {  		assert(!fanProjs);  		return BP(D_volumeData, volumePitch, D_projData, projPitch, -		          dims, angles, TOffsets); +		          dims, angles, TOffsets, outputScale);  	} else {  		assert(fanProjs);  		return FanBP(D_volumeData, volumePitch, D_projData, projPitch, -		             dims, fanProjs); +		             dims, fanProjs, outputScale);  	}  } diff --git a/cuda/2d/algo.h b/cuda/2d/algo.h index a75905e..99959c8 100644 --- a/cuda/2d/algo.h +++ b/cuda/2d/algo.h @@ -118,7 +118,8 @@ protected:  	            float* D_projData, unsigned int projPitch,  	            float outputScale);  	bool callBP(float* D_volumeData, unsigned int volumePitch, -	            float* D_projData, unsigned int projPitch); +	            float* D_projData, unsigned int projPitch, +	            float outputScale);  	SDimensions dims; diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu index 379c690..c1e6566 100644 --- a/cuda/2d/astra.cu +++ b/cuda/2d/astra.cu @@ -368,21 +368,19 @@ bool AstraFBP::run()  	} +	float fOutputScale = (M_PI / 2.0f) / (float)pData->dims.iProjAngles; +  	if (pData->bFanBeam) { -		ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections); +		ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections, fOutputScale);  	} else { -		ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets); +		ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets, fOutputScale);  	}  	if(!ok)  	{  		return false;  	} -	processVol<opMul>(pData->D_volumeData, -	                      (M_PI / 2.0f) / (float)pData->dims.iProjAngles, -	                      pData->volumePitch, pData->dims); -  	return true;  } @@ -594,7 +592,7 @@ bool BPalgo::iterate(unsigned int)  {  	// TODO: This zeroVolume makes an earlier memcpy of D_volumeData redundant  	zeroVolumeData(D_volumeData, volumePitch, dims); -	callBP(D_volumeData, volumePitch, D_sinoData, sinoPitch); +	callBP(D_volumeData, volumePitch, D_sinoData, sinoPitch, 1.0f);  	return true;  } diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu index 9ead563..f402914 100644 --- a/cuda/2d/cgls.cu +++ b/cuda/2d/cgls.cu @@ -135,7 +135,7 @@ bool CGLS::iterate(unsigned int iterations)  		// p = A'*r  		zeroVolumeData(D_p, pPitch, dims); -		callBP(D_p, pPitch, D_r, rPitch); +		callBP(D_p, pPitch, D_r, rPitch, 1.0f);  		if (useVolumeMask)  			processVol<opMul>(D_p, D_maskData, pPitch, dims); @@ -166,7 +166,7 @@ bool CGLS::iterate(unsigned int iterations)  		// z = A'*r  		zeroVolumeData(D_z, zPitch, dims); -		callBP(D_z, zPitch, D_r, rPitch); +		callBP(D_z, zPitch, D_r, rPitch, 1.0f);  		if (useVolumeMask)  			processVol<opMul>(D_z, D_maskData, zPitch, dims); diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu index 00127c0..8593b08 100644 --- a/cuda/2d/em.cu +++ b/cuda/2d/em.cu @@ -102,7 +102,7 @@ bool EM::precomputeWeights()  #endif  	{  		processSino<opSet>(D_projData, 1.0f, projPitch, dims); -		callBP(D_pixelWeight, pixelPitch, D_projData, projPitch); +		callBP(D_pixelWeight, pixelPitch, D_projData, projPitch, 1.0f);  	}  	processVol<opInvert>(D_pixelWeight, pixelPitch, dims); @@ -137,7 +137,7 @@ bool EM::iterate(unsigned int iterations)  		// Do BP of projData into tmpData  		zeroVolumeData(D_tmpData, tmpPitch, dims); -		callBP(D_tmpData, tmpPitch, D_projData, projPitch); +		callBP(D_tmpData, tmpPitch, D_projData, projPitch, 1.0f);  		// Multiply volumeData with tmpData divided by pixel weights  		processVol<opMul2>(D_volumeData, D_tmpData, D_pixelWeight, pixelPitch, dims); diff --git a/cuda/2d/fan_bp.cu b/cuda/2d/fan_bp.cu index 74e8b12..b4321ba 100644 --- a/cuda/2d/fan_bp.cu +++ b/cuda/2d/fan_bp.cu @@ -77,7 +77,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi  	return true;  } -__global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims) +__global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)  {  	const int relX = threadIdx.x;  	const int relY = threadIdx.y; @@ -121,11 +121,11 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s  		fA += 1.0f;  	} -	volData[Y*volPitch+X] += fVal; +	volData[Y*volPitch+X] += fVal * fOutputScale;  }  // supersampling version -__global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims) +__global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)  {  	const int relX = threadIdx.x;  	const int relY = threadIdx.y; @@ -146,6 +146,8 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in  	float* volData = (float*)D_volData; +	fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim); +  	float fVal = 0.0f;  	float fA = startAngle + 0.5f; @@ -180,14 +182,14 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in  		fA += 1.0f;  	} -	volData[Y*volPitch+X] += fVal / (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim); +	volData[Y*volPitch+X] += fVal * fOutputScale;  }  // BP specifically for SART.  // It includes (free) weighting with voxel weight.  // It assumes the proj texture is set up _without_ padding, unlike regular BP. -__global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDimensions dims) +__global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDimensions dims, float fOutputScale)  {  	const int relX = threadIdx.x;  	const int relY = threadIdx.y; @@ -222,12 +224,12 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi  	const float fT = fNum / fDen;  	const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f); -	volData[Y*volPitch+X] += fVal; +	volData[Y*volPitch+X] += fVal * fOutputScale;  }  // Weighted BP for use in fan beam FBP  // Each pixel/ray is weighted by 1/L^2 where L is the distance to the source. -__global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims) +__global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)  {  	const int relX = threadIdx.x;  	const int relY = threadIdx.y; @@ -273,13 +275,14 @@ __global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, un  		fA += 1.0f;  	} -	volData[Y*volPitch+X] += fVal; +	volData[Y*volPitch+X] += fVal * fOutputScale;  }  bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,             float* D_projData, unsigned int projPitch, -           const SDimensions& dims, const SFanProjection* angles) +           const SDimensions& dims, const SFanProjection* angles, +           float fOutputScale)  {  	assert(dims.iProjAngles <= g_MaxAngles); @@ -310,9 +313,9 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,  	for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {  		if (dims.iRaysPerPixelDim > 1) -			devFanBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims); +			devFanBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);  		else -			devFanBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims); +			devFanBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);  	}  	cudaThreadSynchronize(); @@ -325,7 +328,8 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,  bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,             float* D_projData, unsigned int projPitch, -           const SDimensions& dims, const SFanProjection* angles) +           const SDimensions& dims, const SFanProjection* angles, +           float fOutputScale)  {  	assert(dims.iProjAngles <= g_MaxAngles); @@ -355,7 +359,7 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,  	cudaStreamCreate(&stream);  	for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) { -		devFanBP_FBPWeighted<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims); +		devFanBP_FBPWeighted<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);  	}  	cudaThreadSynchronize(); @@ -370,7 +374,8 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,  bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,                  float* D_projData, unsigned int projPitch,                  unsigned int angle, -                const SDimensions& dims, const SFanProjection* angles) +                const SDimensions& dims, const SFanProjection* angles, +                float fOutputScale)  {  	// only one angle  	bindProjDataTexture(D_projData, projPitch, dims.iProjDets, 1, cudaAddressModeClamp); @@ -391,7 +396,7 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,  	dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,  	             (dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize); -	devFanBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, dims); +	devFanBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, dims, fOutputScale);  	cudaThreadSynchronize();  	cudaTextForceKernelsCompletion(); @@ -401,7 +406,8 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,  bool FanBP(float* D_volumeData, unsigned int volumePitch,             float* D_projData, unsigned int projPitch, -           const SDimensions& dims, const SFanProjection* angles) +           const SDimensions& dims, const SFanProjection* angles, +           float fOutputScale)  {  	for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {  		SDimensions subdims = dims; @@ -413,7 +419,7 @@ bool FanBP(float* D_volumeData, unsigned int volumePitch,  		bool ret;  		ret = FanBP_internal(D_volumeData, volumePitch,  		                  D_projData + iAngle * projPitch, projPitch, -		                  subdims, angles + iAngle); +		                  subdims, angles + iAngle, fOutputScale);  		if (!ret)  			return false;  	} @@ -422,7 +428,8 @@ bool FanBP(float* D_volumeData, unsigned int volumePitch,  bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch,             float* D_projData, unsigned int projPitch, -           const SDimensions& dims, const SFanProjection* angles) +           const SDimensions& dims, const SFanProjection* angles, +           float fOutputScale)  {  	for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {  		SDimensions subdims = dims; @@ -434,7 +441,7 @@ bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch,  		bool ret;  		ret = FanBP_FBPWeighted_internal(D_volumeData, volumePitch,  		                  D_projData + iAngle * projPitch, projPitch, -		                  subdims, angles + iAngle); +		                  subdims, angles + iAngle, fOutputScale);  		if (!ret)  			return false; @@ -498,7 +505,7 @@ int main()  	copyVolumeToDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);  	copySinogramToDevice(sino, dims.iProjDets, dims.iProjDets, dims.iProjAngles, D_projData, projPitch); -	FanBP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs); +	FanBP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs, 1.0f);  	copyVolumeFromDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch); diff --git a/cuda/2d/fan_bp.h b/cuda/2d/fan_bp.h index e4e69b0..3ebe1e8 100644 --- a/cuda/2d/fan_bp.h +++ b/cuda/2d/fan_bp.h @@ -33,16 +33,19 @@ namespace astraCUDA {  _AstraExport bool FanBP(float* D_volumeData, unsigned int volumePitch,             float* D_projData, unsigned int projPitch, -           const SDimensions& dims, const SFanProjection* angles); +           const SDimensions& dims, const SFanProjection* angles, +           float fOutputScale);  _AstraExport bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,                  float* D_projData, unsigned int projPitch,                  unsigned int angle, -                const SDimensions& dims, const SFanProjection* angles); +                const SDimensions& dims, const SFanProjection* angles, +                float fOutputScale);  _AstraExport bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch,             float* D_projData, unsigned int projPitch, -           const SDimensions& dims, const SFanProjection* angles); +           const SDimensions& dims, const SFanProjection* angles, +           float fOutputScale);  } diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index 2bfd493..2d259a9 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -35,6 +35,7 @@ $Id$  #include <fstream>  #include "../../include/astra/Logging.h" +#include "astra/Fourier.h"  using namespace astra; @@ -303,16 +304,48 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,  	float * pfFilt = new float[_iFFTFourierDetectorCount];  	float * pfW = new float[_iFFTFourierDetectorCount]; +	// We cache one Fourier transform for repeated FBP's of the same size +	static float *pfData = 0; +	static int iFilterCacheSize = 0; + +	if (!pfData || iFilterCacheSize != _iFFTRealDetectorCount) { +		// Compute filter in spatial domain + +		delete[] pfData; +		pfData = new float[2*_iFFTRealDetectorCount]; +		int *ip = new int[int(2+sqrt(_iFFTRealDetectorCount)+1)]; +		ip[0] = 0; +		float32 *w = new float32[_iFFTRealDetectorCount/2]; + +		for (int i = 0; i < _iFFTRealDetectorCount; ++i) { +			pfData[2*i+1] = 0.0f; + +			if (i & 1) { +				int j = i; +				if (2*j > _iFFTRealDetectorCount) +					j = _iFFTRealDetectorCount - j; +				float f = M_PI * j; +				pfData[2*i] = -1 / (f*f); +			} else { +				pfData[2*i] = 0.0f; +			} +		} + +		pfData[0] = 0.25f; + +		cdft(2*_iFFTRealDetectorCount, -1, pfData, ip, w); +		delete[] ip; +		delete[] w; + +		iFilterCacheSize = _iFFTRealDetectorCount; +	} +  	for(int iDetectorIndex = 0; iDetectorIndex < _iFFTFourierDetectorCount; iDetectorIndex++)  	{  		float fRelIndex = (float)iDetectorIndex / (float)_iFFTRealDetectorCount; -		// filt = 2*( 0:(order/2) )./order; -		pfFilt[iDetectorIndex] = 2.0f * fRelIndex; -		//pfFilt[iDetectorIndex] = 1.0f; - -		// w = 2*pi*(0:size(filt,2)-1)/order -		pfW[iDetectorIndex] = 3.1415f * 2.0f * fRelIndex; +		pfFilt[iDetectorIndex] = 2.0f * pfData[2*iDetectorIndex]; +		pfW[iDetectorIndex] = M_PI * 2.0f * fRelIndex;  	}  	switch(_eFilter) @@ -866,5 +899,4 @@ void downloadDebugFilterReal(float * _pfHostSinogram, int _iProjectionCount,  	free(pfHostFilter);  } -  #endif diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu index 635200f..d9f7325 100644 --- a/cuda/2d/par_bp.cu +++ b/cuda/2d/par_bp.cu @@ -73,7 +73,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi  	return true;  } -__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims) +__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale)  {  	const int relX = threadIdx.x;  	const int relY = threadIdx.y; @@ -123,11 +123,11 @@ __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int star  	} -	volData[Y*volPitch+X] += fVal; +	volData[Y*volPitch+X] += fVal * fOutputScale;  }  // supersampling version -__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims) +__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale)  {  	const int relX = threadIdx.x;  	const int relY = threadIdx.y; @@ -152,6 +152,8 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s  	float fA = startAngle + 0.5f;  	const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f; +	fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim); +  	if (offsets) {  		for (int angle = startAngle; angle < endAngle; ++angle) @@ -196,10 +198,10 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s  	} -	volData[Y*volPitch+X] += fVal / (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim); +	volData[Y*volPitch+X] += fVal * fOutputScale;  } -__global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset, float angle_sin, float angle_cos, const SDimensions dims) +__global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset, float angle_sin, float angle_cos, const SDimensions dims, float fOutputScale)  {  	const int relX = threadIdx.x;  	const int relY = threadIdx.y; @@ -218,13 +220,13 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset  	const float fT = fT_base + fX * angle_cos - fY * angle_sin + offset;  	const float fVal = tex2D(gT_projTexture, fT, 0.5f); -	D_volData[Y*volPitch+X] += fVal; +	D_volData[Y*volPitch+X] += fVal * fOutputScale;  }  bool BP_internal(float* D_volumeData, unsigned int volumePitch,          float* D_projData, unsigned int projPitch, -        const SDimensions& dims, const float* angles, const float* TOffsets) +        const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale)  {  	// TODO: process angles block by block  	assert(dims.iProjAngles <= g_MaxAngles); @@ -261,9 +263,9 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,  	for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {  		if (dims.iRaysPerPixelDim > 1) -			devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims); +			devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale);  		else -			devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims); +			devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale);  	}  	cudaThreadSynchronize(); @@ -276,7 +278,7 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,  bool BP(float* D_volumeData, unsigned int volumePitch,          float* D_projData, unsigned int projPitch, -        const SDimensions& dims, const float* angles, const float* TOffsets) +        const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale)  {  	for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {  		SDimensions subdims = dims; @@ -289,7 +291,8 @@ bool BP(float* D_volumeData, unsigned int volumePitch,  		ret = BP_internal(D_volumeData, volumePitch,  		                  D_projData + iAngle * projPitch, projPitch,  		                  subdims, angles + iAngle, -		                  TOffsets ? TOffsets + iAngle : 0); +		                  TOffsets ? TOffsets + iAngle : 0, +		                  fOutputScale);  		if (!ret)  			return false;  	} @@ -300,7 +303,7 @@ bool BP(float* D_volumeData, unsigned int volumePitch,  bool BP_SART(float* D_volumeData, unsigned int volumePitch,               float* D_projData, unsigned int projPitch,               unsigned int angle, const SDimensions& dims, -             const float* angles, const float* TOffsets) +             const float* angles, const float* TOffsets, float fOutputScale)  {  	// Only one angle.  	// We need to Clamp to the border pixels instead of to zero, because @@ -318,7 +321,7 @@ bool BP_SART(float* D_volumeData, unsigned int volumePitch,  	dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,  	             (dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize); -	devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims); +	devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims, fOutputScale);  	cudaThreadSynchronize();  	cudaTextForceKernelsCompletion(); @@ -369,7 +372,7 @@ int main()  	for (unsigned int i = 0; i < dims.iProjAngles; ++i)  		angle[i] = i*(M_PI/dims.iProjAngles); -	BP(D_volumeData, volumePitch, D_projData, projPitch, dims, angle, 0); +	BP(D_volumeData, volumePitch, D_projData, projPitch, dims, angle, 0, 1.0f);  	delete[] angle; diff --git a/cuda/2d/par_bp.h b/cuda/2d/par_bp.h index eaeafd8..64bcd34 100644 --- a/cuda/2d/par_bp.h +++ b/cuda/2d/par_bp.h @@ -36,12 +36,12 @@ namespace astraCUDA {  _AstraExport bool BP(float* D_volumeData, unsigned int volumePitch,          float* D_projData, unsigned int projPitch,          const SDimensions& dims, const float* angles, -        const float* TOffsets); +        const float* TOffsets, float fOutputScale);  _AstraExport bool BP_SART(float* D_volumeData, unsigned int volumePitch,               float* D_projData, unsigned int projPitch,               unsigned int angle, const SDimensions& dims, -             const float* angles, const float* TOffsets); +             const float* angles, const float* TOffsets, float fOutputScale);  } diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu index 29670c3..c8608a3 100644 --- a/cuda/2d/sart.cu +++ b/cuda/2d/sart.cu @@ -71,6 +71,8 @@ SART::SART() : ReconAlgo()  	projectionCount = 0;  	iteration = 0;  	customOrder = false; + +	fRelaxation = 1.0f;  } @@ -98,6 +100,7 @@ void SART::reset()  	projectionCount = 0;  	iteration = 0;  	customOrder = false; +	fRelaxation = 1.0f;  	ReconAlgo::reset();  } @@ -200,10 +203,10 @@ bool SART::iterate(unsigned int iterations)  			// BP, mask, and add back  			// TODO: Try putting the masking directly in the BP  			zeroVolumeData(D_tmpData, tmpPitch, dims); -			callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle); +			callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle, fRelaxation);  			processVol<opAddMul>(D_volumeData, D_maskData, D_tmpData, volumePitch, dims);  		} else { -			callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle); +			callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle, fRelaxation);  		}  		if (useMinConstraint) @@ -264,16 +267,16 @@ bool SART::callFP_SART(float* D_volumeData, unsigned int volumePitch,  bool SART::callBP_SART(float* D_volumeData, unsigned int volumePitch,                         float* D_projData, unsigned int projPitch, -                       unsigned int angle) +                       unsigned int angle, float outputScale)  {  	if (angles) {  		assert(!fanProjs);  		return BP_SART(D_volumeData, volumePitch, D_projData, projPitch, -		               angle, dims, angles, TOffsets); +		               angle, dims, angles, TOffsets, outputScale);  	} else {  		assert(fanProjs);  		return FanBP_SART(D_volumeData, volumePitch, D_projData, projPitch, -		                  angle, dims, fanProjs); +		                  angle, dims, fanProjs, outputScale);  	}  } diff --git a/cuda/2d/sart.h b/cuda/2d/sart.h index 6574a6f..eff9ecf 100644 --- a/cuda/2d/sart.h +++ b/cuda/2d/sart.h @@ -50,6 +50,8 @@ public:  	virtual float computeDiffNorm(); +	void setRelaxation(float r) { fRelaxation = r; } +  protected:  	void reset();  	bool precomputeWeights(); @@ -59,7 +61,7 @@ protected:  	                 unsigned int angle, float outputScale);  	bool callBP_SART(float* D_volumeData, unsigned int volumePitch,  	                 float* D_projData, unsigned int projPitch, -	                 unsigned int angle); +	                 unsigned int angle, float outputScale);  	// projection angle variables @@ -78,6 +80,8 @@ protected:  	// Geometry-specific precomputed data  	float* D_lineWeight;  	unsigned int linePitch; + +	float fRelaxation;  };  } diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu index a6194a5..4baaccb 100644 --- a/cuda/2d/sirt.cu +++ b/cuda/2d/sirt.cu @@ -50,6 +50,8 @@ SIRT::SIRT() : ReconAlgo()  	D_minMaskData = 0;  	D_maxMaskData = 0; +	fRelaxation = 1.0f; +  	freeMinMaxMasks = false;  } @@ -83,6 +85,8 @@ void SIRT::reset()  	useVolumeMask = false;  	useSinogramMask = false; +	fRelaxation = 1.0f; +  	ReconAlgo::reset();  } @@ -127,10 +131,10 @@ bool SIRT::precomputeWeights()  	zeroVolumeData(D_pixelWeight, pixelPitch, dims);  	if (useSinogramMask) { -		callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch); +		callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch, 1.0f);  	} else {  		processSino<opSet>(D_projData, 1.0f, projPitch, dims); -		callBP(D_pixelWeight, pixelPitch, D_projData, projPitch); +		callBP(D_pixelWeight, pixelPitch, D_projData, projPitch, 1.0f);  	}  	processVol<opInvert>(D_pixelWeight, pixelPitch, dims); @@ -139,6 +143,9 @@ bool SIRT::precomputeWeights()  		processVol<opMul>(D_pixelWeight, D_maskData, pixelPitch, dims);  	} +	// Also fold the relaxation factor into pixel weights +	processVol<opMul>(D_pixelWeight, fRelaxation, pixelPitch, dims); +  	return true;  } @@ -251,8 +258,9 @@ bool SIRT::iterate(unsigned int iterations)  		zeroVolumeData(D_tmpData, tmpPitch, dims); -		callBP(D_tmpData, tmpPitch, D_projData, projPitch); +		callBP(D_tmpData, tmpPitch, D_projData, projPitch, 1.0f); +		// pixel weights also contain the volume mask and relaxation factor  		processVol<opAddMul>(D_volumeData, D_pixelWeight, D_tmpData, volumePitch, dims);  		if (useMinConstraint) diff --git a/cuda/2d/sirt.h b/cuda/2d/sirt.h index 21094a1..bc913f4 100644 --- a/cuda/2d/sirt.h +++ b/cuda/2d/sirt.h @@ -53,6 +53,8 @@ public:  	bool uploadMinMaxMasks(const float* minMaskData, const float* maxMaskData,  	                       unsigned int pitch); +	void setRelaxation(float r) { fRelaxation = r; } +  	virtual bool iterate(unsigned int iterations);  	virtual float computeDiffNorm(); @@ -81,6 +83,8 @@ protected:  	unsigned int minMaskPitch;  	float* D_maxMaskData;  	unsigned int maxMaskPitch; + +	float fRelaxation;  };  bool doSIRT(float* D_volumeData, unsigned int volumePitch,  | 
