diff options
| -rw-r--r-- | cuda/2d/fft.cu | 124 | 
1 files changed, 58 insertions, 66 deletions
diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index 2cdb7c3..08acfd4 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -40,33 +40,18 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.  using namespace astra; -// TODO: evaluate what we want to do in these situations: - -#define CHECK_ERROR(errorMessage) do {                                     \ -  cudaError_t err = cudaThreadSynchronize();                               \ -  if( cudaSuccess != err) {                                                \ -      ASTRA_ERROR("Cuda error %s : %s",                                    \ -              errorMessage,cudaGetErrorString( err));                      \ -      exit(EXIT_FAILURE);                                                  \ -  } } while (0) - -#define SAFE_CALL( call) do {                                              \ -  cudaError err = call;                                                    \ -  if( cudaSuccess != err) {                                                \ -      ASTRA_ERROR("Cuda error: %s ",                                       \ -              cudaGetErrorString( err));                                   \ -      exit(EXIT_FAILURE);                                                  \ -  }                                                                        \ -  err = cudaThreadSynchronize();                                           \ -  if( cudaSuccess != err) {                                                \ -      ASTRA_ERROR("Cuda error: %s : ",                                     \ -              cudaGetErrorString( err));                                   \ -      exit(EXIT_FAILURE);                                                  \ -  } } while (0) - -  namespace astraCUDA { +bool checkCufft(cufftResult err, const char *msg) +{ +	if (err != CUFFT_SUCCESS) { +		ASTRA_ERROR("%s: CUFFT error %d.", msg, err); +		return false; +	} else { +		return true; +	} +} +  __global__ static void applyFilter_kernel(int _iProjectionCount,                                            int _iFreqBinCount,                                            cufftComplex * _pSinogram, @@ -115,7 +100,8 @@ static void rescaleInverseFourier(int _iProjectionCount, int _iDetectorCount,  	rescaleInverseFourier_kernel<<< iBlockCount, iBlockSize >>>(_iProjectionCount,  	                                                            _iDetectorCount,  	                                                            _pfInFourierOutput); -	CHECK_ERROR("rescaleInverseFourier_kernel failed"); + +	checkCuda(cudaThreadSynchronize(), "rescaleInverseFourier");  }  void applyFilter(int _iProjectionCount, int _iFreqBinCount, @@ -128,7 +114,8 @@ void applyFilter(int _iProjectionCount, int _iFreqBinCount,  	applyFilter_kernel<<< iBlockCount, iBlockSize >>>(_iProjectionCount,  	                                                  _iFreqBinCount,  	                                                  _pSinogram, _pFilter); -	CHECK_ERROR("applyFilter_kernel failed"); + +	checkCuda(cudaThreadSynchronize(), "applyFilter");  }  static bool invokeCudaFFT(int _iProjectionCount, int _iDetectorCount, @@ -136,24 +123,22 @@ static bool invokeCudaFFT(int _iProjectionCount, int _iDetectorCount,                            cufftComplex * _pDevTargetComplex)  {  	cufftHandle plan; -	cufftResult result; -	result = cufftPlan1d(&plan, _iDetectorCount, CUFFT_R2C, _iProjectionCount); -	if(result != CUFFT_SUCCESS) -	{ -		ASTRA_ERROR("Failed to plan 1d r2c fft"); +	if (!checkCufft(cufftPlan1d(&plan, _iDetectorCount, CUFFT_R2C, _iProjectionCount), "invokeCudaFFT plan")) {  		return false;  	} -	result = cufftExecR2C(plan, (cufftReal *)_pfDevSource, _pDevTargetComplex); -	cufftDestroy(plan); +	if (!checkCufft(cufftExecR2C(plan, (cufftReal *)_pfDevSource, _pDevTargetComplex), "invokeCudaFFT exec")) { +		cufftDestroy(plan); +		return false; +	} -	if(result != CUFFT_SUCCESS) -	{ -		ASTRA_ERROR("Failed to exec 1d r2c fft"); +	if (!checkCuda(cudaDeviceSynchronize(), "invokeCudaFFT sync")) { +		cufftDestroy(plan);  		return false;  	} +	cufftDestroy(plan);  	return true;  } @@ -162,26 +147,25 @@ static bool invokeCudaIFFT(int _iProjectionCount, int _iDetectorCount,                             float * _pfDevTarget)  {  	cufftHandle plan; -	cufftResult result; -	result = cufftPlan1d(&plan, _iDetectorCount, CUFFT_C2R, _iProjectionCount); -	if(result != CUFFT_SUCCESS) -	{ -		ASTRA_ERROR("Failed to plan 1d c2r fft"); +	if (!checkCufft(cufftPlan1d(&plan, _iDetectorCount, CUFFT_C2R, _iProjectionCount), "invokeCudaIFFT plan")) {  		return false;  	} -	// todo: why do we have to get rid of the const qualifier? -	result = cufftExecC2R(plan, (cufftComplex *)_pDevSourceComplex, -	                      (cufftReal *)_pfDevTarget); -	cufftDestroy(plan); - -	if(result != CUFFT_SUCCESS) +	// Getting rid of the const qualifier is due to cufft API issue? +	if (!checkCufft(cufftExecC2R(plan, (cufftComplex *)_pDevSourceComplex, +	                      (cufftReal *)_pfDevTarget), "invokeCudaIFFT exec"))  	{ -		ASTRA_ERROR("Failed to exec 1d c2r fft"); +		cufftDestroy(plan);  		return false;  	} +	if (!checkCuda(cudaDeviceSynchronize(), "invokeCudaIFFT sync")) { +		cufftDestroy(plan); +		return false; +	} + +	cufftDestroy(plan);  	return true;  } @@ -189,14 +173,12 @@ bool allocateComplexOnDevice(int _iProjectionCount, int _iDetectorCount,                               cufftComplex ** _ppDevComplex)  {  	size_t bufferSize = sizeof(cufftComplex) * _iProjectionCount * _iDetectorCount; -	SAFE_CALL(cudaMalloc((void **)_ppDevComplex, bufferSize)); -	return true; +	return checkCuda(cudaMalloc((void **)_ppDevComplex, bufferSize), "fft allocateComplexOnDevice");  }  bool freeComplexOnDevice(cufftComplex * _pDevComplex)  { -	SAFE_CALL(cudaFree(_pDevComplex)); -	return true; +	return checkCuda(cudaFree(_pDevComplex), "fft freeComplexOnDevice");  }  bool uploadComplexArrayToDevice(int _iProjectionCount, int _iDetectorCount, @@ -204,9 +186,7 @@ bool uploadComplexArrayToDevice(int _iProjectionCount, int _iDetectorCount,                                  cufftComplex * _pDevComplexTarget)  {  	size_t memSize = sizeof(cufftComplex) * _iProjectionCount * _iDetectorCount; -	SAFE_CALL(cudaMemcpy(_pDevComplexTarget, _pHostComplexSource, memSize, cudaMemcpyHostToDevice)); - -	return true; +	return checkCuda(cudaMemcpy(_pDevComplexTarget, _pHostComplexSource, memSize, cudaMemcpyHostToDevice), "fft uploadComplexArrayToDevice");  }  bool runCudaFFT(int _iProjectionCount, const float * _pfDevRealSource, @@ -217,25 +197,30 @@ bool runCudaFFT(int _iProjectionCount, const float * _pfDevRealSource,  	float * pfDevRealFFTSource = NULL;  	size_t bufferMemSize = sizeof(float) * _iProjectionCount * _iFFTRealDetectorCount; -	SAFE_CALL(cudaMalloc((void **)&pfDevRealFFTSource, bufferMemSize)); -	SAFE_CALL(cudaMemset(pfDevRealFFTSource, 0, bufferMemSize)); +	if (!checkCuda(cudaMalloc((void **)&pfDevRealFFTSource, bufferMemSize), "runCudaFFT malloc")) +		return false; +	if (!checkCuda(cudaMemset(pfDevRealFFTSource, 0, bufferMemSize), "runCudaFFT memset")) { +		cudaFree(pfDevRealFFTSource); +		return false; +	}  	for(int iProjectionIndex = 0; iProjectionIndex < _iProjectionCount; iProjectionIndex++)  	{  		const float * pfSourceLocation = _pfDevRealSource + iProjectionIndex * _iSourcePitch;  		float * pfTargetLocation = pfDevRealFFTSource + iProjectionIndex * _iFFTRealDetectorCount; -		SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice)); +		if (!checkCuda(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice), "runCudaFFT memcpy")) { +			cudaFree(pfDevRealFFTSource); +			return false; +		}  	}  	bool bResult = invokeCudaFFT(_iProjectionCount, _iFFTRealDetectorCount,  	                             pfDevRealFFTSource, _pDevTargetComplex);  	if(!bResult) -	{  		return false; -	} -	SAFE_CALL(cudaFree(pfDevRealFFTSource)); +	cudaFree(pfDevRealFFTSource);  	return true;  } @@ -248,7 +233,8 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex,  	float * pfDevRealFFTTarget = NULL;  	size_t bufferMemSize = sizeof(float) * _iProjectionCount * _iFFTRealDetectorCount; -	SAFE_CALL(cudaMalloc((void **)&pfDevRealFFTTarget, bufferMemSize)); +	if (!checkCuda(cudaMalloc((void **)&pfDevRealFFTTarget, bufferMemSize), "runCudaIFFT malloc")) +		return false;  	bool bResult = invokeCudaIFFT(_iProjectionCount, _iFFTRealDetectorCount,  	                              _pDevSourceComplex, pfDevRealFFTTarget); @@ -260,17 +246,23 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex,  	rescaleInverseFourier(_iProjectionCount, _iFFTRealDetectorCount,  	                      pfDevRealFFTTarget); -	SAFE_CALL(cudaMemset(_pfRealTarget, 0, sizeof(float) * _iProjectionCount * _iTargetPitch)); +	if (!checkCuda(cudaMemset(_pfRealTarget, 0, sizeof(float) * _iProjectionCount * _iTargetPitch), "runCudaIFFT memset")) { +		cudaFree(pfDevRealFFTTarget); +		return false; +	}  	for(int iProjectionIndex = 0; iProjectionIndex < _iProjectionCount; iProjectionIndex++)  	{  		const float * pfSourceLocation = pfDevRealFFTTarget + iProjectionIndex * _iFFTRealDetectorCount;  		float* pfTargetLocation = _pfRealTarget + iProjectionIndex * _iTargetPitch; -		SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice)); +		if (!checkCuda(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice), "runCudaIFFT memcpy")) { +			cudaFree(pfDevRealFFTTarget); +			return false; +		}  	} -	SAFE_CALL(cudaFree(pfDevRealFFTTarget)); +	cudaFree(pfDevRealFFTTarget);  	return true;  }  | 
