From 4ee4f090a8eb6dc3dd9ee4be254d70a2e7f213f1 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Tue, 16 Nov 2021 12:19:07 +0100 Subject: Remove unnecessary costly syncs in FFT --- cuda/2d/fft.cu | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index 413f3aa..e72ee85 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -232,7 +232,10 @@ bool runCudaFFT(int _iProjectionCount, const float * _pfDevRealSource, const float * pfSourceLocation = _pfDevRealSource + iProjectionIndex * _iSourcePitch; float * pfTargetLocation = pfDevRealFFTSource + iProjectionIndex * _iFFTRealDetectorCount; - SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice)); + if (!checkCuda(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice), "runCudaFFT memcpy")) { + cudaFree(pfDevRealFFTSource); + return false; + } } bool bResult = invokeCudaFFT(_iProjectionCount, _iFFTRealDetectorCount, @@ -274,7 +277,10 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex, const float * pfSourceLocation = pfDevRealFFTTarget + iProjectionIndex * _iFFTRealDetectorCount; float* pfTargetLocation = _pfRealTarget + iProjectionIndex * _iTargetPitch; - SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice)); + if (!checkCuda(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice), "runCudaIFFT memcpy")) { + cudaFree(pfDevRealFFTTarget); + return false; + } } SAFE_CALL(cudaFree(pfDevRealFFTTarget)); -- cgit v1.2.3