From 63de6635ac1e5b232f88e90afdf6492df2682a38 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Tue, 16 Nov 2021 11:54:47 +0100 Subject: Add checkCufft function --- cuda/2d/fft.cu | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) (limited to 'cuda') diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index 2cdb7c3..2bb5dc3 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -67,6 +67,16 @@ using namespace astra; namespace astraCUDA { +bool checkCufft(cufftResult err, const char *msg) +{ + if (err != CUFFT_SUCCESS) { + ASTRA_ERROR("%s: CUFFT error %d.", msg, err); + return false; + } else { + return true; + } +} + __global__ static void applyFilter_kernel(int _iProjectionCount, int _iFreqBinCount, cufftComplex * _pSinogram, @@ -136,24 +146,18 @@ static bool invokeCudaFFT(int _iProjectionCount, int _iDetectorCount, cufftComplex * _pDevTargetComplex) { cufftHandle plan; - cufftResult result; - result = cufftPlan1d(&plan, _iDetectorCount, CUFFT_R2C, _iProjectionCount); - if(result != CUFFT_SUCCESS) - { - ASTRA_ERROR("Failed to plan 1d r2c fft"); + if (!checkCufft(cufftPlan1d(&plan, _iDetectorCount, CUFFT_R2C, _iProjectionCount), "invokeCudaFFT plan")) { return false; } - result = cufftExecR2C(plan, (cufftReal *)_pfDevSource, _pDevTargetComplex); - cufftDestroy(plan); - - if(result != CUFFT_SUCCESS) - { - ASTRA_ERROR("Failed to exec 1d r2c fft"); + if (!checkCufft(cufftExecR2C(plan, (cufftReal *)_pfDevSource, _pDevTargetComplex), "invokeCudaFFT exec")) { + cufftDestroy(plan); return false; } + cufftDestroy(plan); + return true; } @@ -162,26 +166,21 @@ static bool invokeCudaIFFT(int _iProjectionCount, int _iDetectorCount, float * _pfDevTarget) { cufftHandle plan; - cufftResult result; - result = cufftPlan1d(&plan, _iDetectorCount, CUFFT_C2R, _iProjectionCount); - if(result != CUFFT_SUCCESS) - { - ASTRA_ERROR("Failed to plan 1d c2r fft"); + if (!checkCufft(cufftPlan1d(&plan, _iDetectorCount, CUFFT_C2R, _iProjectionCount), "invokeCudaIFFT plan")) { return false; } - // todo: why do we have to get rid of the const qualifier? - result = cufftExecC2R(plan, (cufftComplex *)_pDevSourceComplex, - (cufftReal *)_pfDevTarget); - cufftDestroy(plan); - - if(result != CUFFT_SUCCESS) + // Getting rid of the const qualifier is due to cufft API issue? + if (!checkCufft(cufftExecC2R(plan, (cufftComplex *)_pDevSourceComplex, + (cufftReal *)_pfDevTarget), "invokeCudaIFFT exec")) { - ASTRA_ERROR("Failed to exec 1d c2r fft"); + cufftDestroy(plan); return false; } + cufftDestroy(plan); + return true; } -- cgit v1.2.3 From 7f5a50d5b142fe8aeea22754b9895d1fae25e662 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Tue, 16 Nov 2021 11:58:04 +0100 Subject: Add missing synchronize before free in FFT --- cuda/2d/fft.cu | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'cuda') diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index 2bb5dc3..413f3aa 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -156,8 +156,12 @@ static bool invokeCudaFFT(int _iProjectionCount, int _iDetectorCount, return false; } - cufftDestroy(plan); + if (!checkCuda(cudaDeviceSynchronize(), "invokeCudaFFT sync")) { + cufftDestroy(plan); + return false; + } + cufftDestroy(plan); return true; } @@ -179,8 +183,12 @@ static bool invokeCudaIFFT(int _iProjectionCount, int _iDetectorCount, return false; } - cufftDestroy(plan); + if (!checkCuda(cudaDeviceSynchronize(), "invokeCudaIFFT sync")) { + cufftDestroy(plan); + return false; + } + cufftDestroy(plan); return true; } -- cgit v1.2.3 From 4ee4f090a8eb6dc3dd9ee4be254d70a2e7f213f1 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Tue, 16 Nov 2021 12:19:07 +0100 Subject: Remove unnecessary costly syncs in FFT --- cuda/2d/fft.cu | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'cuda') diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index 413f3aa..e72ee85 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -232,7 +232,10 @@ bool runCudaFFT(int _iProjectionCount, const float * _pfDevRealSource, const float * pfSourceLocation = _pfDevRealSource + iProjectionIndex * _iSourcePitch; float * pfTargetLocation = pfDevRealFFTSource + iProjectionIndex * _iFFTRealDetectorCount; - SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice)); + if (!checkCuda(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice), "runCudaFFT memcpy")) { + cudaFree(pfDevRealFFTSource); + return false; + } } bool bResult = invokeCudaFFT(_iProjectionCount, _iFFTRealDetectorCount, @@ -274,7 +277,10 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex, const float * pfSourceLocation = pfDevRealFFTTarget + iProjectionIndex * _iFFTRealDetectorCount; float* pfTargetLocation = _pfRealTarget + iProjectionIndex * _iTargetPitch; - SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice)); + if (!checkCuda(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice), "runCudaIFFT memcpy")) { + cudaFree(pfDevRealFFTTarget); + return false; + } } SAFE_CALL(cudaFree(pfDevRealFFTTarget)); -- cgit v1.2.3 From 063c97d04a757e3c288dcf156a63bf1e0ffd074e Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Tue, 16 Nov 2021 13:51:56 +0100 Subject: Remove fft.cu custom cuda error handling macros --- cuda/2d/fft.cu | 63 ++++++++++++++++++++-------------------------------------- 1 file changed, 21 insertions(+), 42 deletions(-) (limited to 'cuda') diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index e72ee85..08acfd4 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -40,31 +40,6 @@ along with the ASTRA Toolbox. If not, see . using namespace astra; -// TODO: evaluate what we want to do in these situations: - -#define CHECK_ERROR(errorMessage) do { \ - cudaError_t err = cudaThreadSynchronize(); \ - if( cudaSuccess != err) { \ - ASTRA_ERROR("Cuda error %s : %s", \ - errorMessage,cudaGetErrorString( err)); \ - exit(EXIT_FAILURE); \ - } } while (0) - -#define SAFE_CALL( call) do { \ - cudaError err = call; \ - if( cudaSuccess != err) { \ - ASTRA_ERROR("Cuda error: %s ", \ - cudaGetErrorString( err)); \ - exit(EXIT_FAILURE); \ - } \ - err = cudaThreadSynchronize(); \ - if( cudaSuccess != err) { \ - ASTRA_ERROR("Cuda error: %s : ", \ - cudaGetErrorString( err)); \ - exit(EXIT_FAILURE); \ - } } while (0) - - namespace astraCUDA { bool checkCufft(cufftResult err, const char *msg) @@ -125,7 +100,8 @@ static void rescaleInverseFourier(int _iProjectionCount, int _iDetectorCount, rescaleInverseFourier_kernel<<< iBlockCount, iBlockSize >>>(_iProjectionCount, _iDetectorCount, _pfInFourierOutput); - CHECK_ERROR("rescaleInverseFourier_kernel failed"); + + checkCuda(cudaThreadSynchronize(), "rescaleInverseFourier"); } void applyFilter(int _iProjectionCount, int _iFreqBinCount, @@ -138,7 +114,8 @@ void applyFilter(int _iProjectionCount, int _iFreqBinCount, applyFilter_kernel<<< iBlockCount, iBlockSize >>>(_iProjectionCount, _iFreqBinCount, _pSinogram, _pFilter); - CHECK_ERROR("applyFilter_kernel failed"); + + checkCuda(cudaThreadSynchronize(), "applyFilter"); } static bool invokeCudaFFT(int _iProjectionCount, int _iDetectorCount, @@ -196,14 +173,12 @@ bool allocateComplexOnDevice(int _iProjectionCount, int _iDetectorCount, cufftComplex ** _ppDevComplex) { size_t bufferSize = sizeof(cufftComplex) * _iProjectionCount * _iDetectorCount; - SAFE_CALL(cudaMalloc((void **)_ppDevComplex, bufferSize)); - return true; + return checkCuda(cudaMalloc((void **)_ppDevComplex, bufferSize), "fft allocateComplexOnDevice"); } bool freeComplexOnDevice(cufftComplex * _pDevComplex) { - SAFE_CALL(cudaFree(_pDevComplex)); - return true; + return checkCuda(cudaFree(_pDevComplex), "fft freeComplexOnDevice"); } bool uploadComplexArrayToDevice(int _iProjectionCount, int _iDetectorCount, @@ -211,9 +186,7 @@ bool uploadComplexArrayToDevice(int _iProjectionCount, int _iDetectorCount, cufftComplex * _pDevComplexTarget) { size_t memSize = sizeof(cufftComplex) * _iProjectionCount * _iDetectorCount; - SAFE_CALL(cudaMemcpy(_pDevComplexTarget, _pHostComplexSource, memSize, cudaMemcpyHostToDevice)); - - return true; + return checkCuda(cudaMemcpy(_pDevComplexTarget, _pHostComplexSource, memSize, cudaMemcpyHostToDevice), "fft uploadComplexArrayToDevice"); } bool runCudaFFT(int _iProjectionCount, const float * _pfDevRealSource, @@ -224,8 +197,12 @@ bool runCudaFFT(int _iProjectionCount, const float * _pfDevRealSource, float * pfDevRealFFTSource = NULL; size_t bufferMemSize = sizeof(float) * _iProjectionCount * _iFFTRealDetectorCount; - SAFE_CALL(cudaMalloc((void **)&pfDevRealFFTSource, bufferMemSize)); - SAFE_CALL(cudaMemset(pfDevRealFFTSource, 0, bufferMemSize)); + if (!checkCuda(cudaMalloc((void **)&pfDevRealFFTSource, bufferMemSize), "runCudaFFT malloc")) + return false; + if (!checkCuda(cudaMemset(pfDevRealFFTSource, 0, bufferMemSize), "runCudaFFT memset")) { + cudaFree(pfDevRealFFTSource); + return false; + } for(int iProjectionIndex = 0; iProjectionIndex < _iProjectionCount; iProjectionIndex++) { @@ -241,11 +218,9 @@ bool runCudaFFT(int _iProjectionCount, const float * _pfDevRealSource, bool bResult = invokeCudaFFT(_iProjectionCount, _iFFTRealDetectorCount, pfDevRealFFTSource, _pDevTargetComplex); if(!bResult) - { return false; - } - SAFE_CALL(cudaFree(pfDevRealFFTSource)); + cudaFree(pfDevRealFFTSource); return true; } @@ -258,7 +233,8 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex, float * pfDevRealFFTTarget = NULL; size_t bufferMemSize = sizeof(float) * _iProjectionCount * _iFFTRealDetectorCount; - SAFE_CALL(cudaMalloc((void **)&pfDevRealFFTTarget, bufferMemSize)); + if (!checkCuda(cudaMalloc((void **)&pfDevRealFFTTarget, bufferMemSize), "runCudaIFFT malloc")) + return false; bool bResult = invokeCudaIFFT(_iProjectionCount, _iFFTRealDetectorCount, _pDevSourceComplex, pfDevRealFFTTarget); @@ -270,7 +246,10 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex, rescaleInverseFourier(_iProjectionCount, _iFFTRealDetectorCount, pfDevRealFFTTarget); - SAFE_CALL(cudaMemset(_pfRealTarget, 0, sizeof(float) * _iProjectionCount * _iTargetPitch)); + if (!checkCuda(cudaMemset(_pfRealTarget, 0, sizeof(float) * _iProjectionCount * _iTargetPitch), "runCudaIFFT memset")) { + cudaFree(pfDevRealFFTTarget); + return false; + } for(int iProjectionIndex = 0; iProjectionIndex < _iProjectionCount; iProjectionIndex++) { @@ -283,7 +262,7 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex, } } - SAFE_CALL(cudaFree(pfDevRealFFTTarget)); + cudaFree(pfDevRealFFTTarget); return true; } -- cgit v1.2.3