summaryrefslogtreecommitdiffstats
path: root/cuda/2d
diff options
context:
space:
mode:
authorWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2021-11-16 12:19:07 +0100
committerWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2021-11-16 14:08:57 +0100
commit4ee4f090a8eb6dc3dd9ee4be254d70a2e7f213f1 (patch)
tree05bf29ce288b24d52f2a40b40bc1b64ae75aeee0 /cuda/2d
parent7f5a50d5b142fe8aeea22754b9895d1fae25e662 (diff)
downloadastra-4ee4f090a8eb6dc3dd9ee4be254d70a2e7f213f1.tar.gz
astra-4ee4f090a8eb6dc3dd9ee4be254d70a2e7f213f1.tar.bz2
astra-4ee4f090a8eb6dc3dd9ee4be254d70a2e7f213f1.tar.xz
astra-4ee4f090a8eb6dc3dd9ee4be254d70a2e7f213f1.zip
Remove unnecessary costly syncs in FFT
Diffstat (limited to 'cuda/2d')
-rw-r--r--cuda/2d/fft.cu10
1 files changed, 8 insertions, 2 deletions
diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu
index 413f3aa..e72ee85 100644
--- a/cuda/2d/fft.cu
+++ b/cuda/2d/fft.cu
@@ -232,7 +232,10 @@ bool runCudaFFT(int _iProjectionCount, const float * _pfDevRealSource,
const float * pfSourceLocation = _pfDevRealSource + iProjectionIndex * _iSourcePitch;
float * pfTargetLocation = pfDevRealFFTSource + iProjectionIndex * _iFFTRealDetectorCount;
- SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice));
+ if (!checkCuda(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice), "runCudaFFT memcpy")) {
+ cudaFree(pfDevRealFFTSource);
+ return false;
+ }
}
bool bResult = invokeCudaFFT(_iProjectionCount, _iFFTRealDetectorCount,
@@ -274,7 +277,10 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex,
const float * pfSourceLocation = pfDevRealFFTTarget + iProjectionIndex * _iFFTRealDetectorCount;
float* pfTargetLocation = _pfRealTarget + iProjectionIndex * _iTargetPitch;
- SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice));
+ if (!checkCuda(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice), "runCudaIFFT memcpy")) {
+ cudaFree(pfDevRealFFTTarget);
+ return false;
+ }
}
SAFE_CALL(cudaFree(pfDevRealFFTTarget));