diff options
author | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2021-11-16 12:19:07 +0100 |
---|---|---|
committer | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2021-11-16 14:08:57 +0100 |
commit | 4ee4f090a8eb6dc3dd9ee4be254d70a2e7f213f1 (patch) | |
tree | 05bf29ce288b24d52f2a40b40bc1b64ae75aeee0 /cuda/2d | |
parent | 7f5a50d5b142fe8aeea22754b9895d1fae25e662 (diff) | |
download | astra-4ee4f090a8eb6dc3dd9ee4be254d70a2e7f213f1.tar.gz astra-4ee4f090a8eb6dc3dd9ee4be254d70a2e7f213f1.tar.bz2 astra-4ee4f090a8eb6dc3dd9ee4be254d70a2e7f213f1.tar.xz astra-4ee4f090a8eb6dc3dd9ee4be254d70a2e7f213f1.zip |
Remove unnecessary costly syncs in FFT
Diffstat (limited to 'cuda/2d')
-rw-r--r-- | cuda/2d/fft.cu | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index 413f3aa..e72ee85 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -232,7 +232,10 @@ bool runCudaFFT(int _iProjectionCount, const float * _pfDevRealSource, const float * pfSourceLocation = _pfDevRealSource + iProjectionIndex * _iSourcePitch; float * pfTargetLocation = pfDevRealFFTSource + iProjectionIndex * _iFFTRealDetectorCount; - SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice)); + if (!checkCuda(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice), "runCudaFFT memcpy")) { + cudaFree(pfDevRealFFTSource); + return false; + } } bool bResult = invokeCudaFFT(_iProjectionCount, _iFFTRealDetectorCount, @@ -274,7 +277,10 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex, const float * pfSourceLocation = pfDevRealFFTTarget + iProjectionIndex * _iFFTRealDetectorCount; float* pfTargetLocation = _pfRealTarget + iProjectionIndex * _iTargetPitch; - SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice)); + if (!checkCuda(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice), "runCudaIFFT memcpy")) { + cudaFree(pfDevRealFFTTarget); + return false; + } } SAFE_CALL(cudaFree(pfDevRealFFTTarget)); |