From 4ee4f090a8eb6dc3dd9ee4be254d70a2e7f213f1 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Tue, 16 Nov 2021 12:19:07 +0100
Subject: Remove unnecessary costly syncs in FFT

---
 cuda/2d/fft.cu | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'cuda')

diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu
index 413f3aa..e72ee85 100644
--- a/cuda/2d/fft.cu
+++ b/cuda/2d/fft.cu
@@ -232,7 +232,10 @@ bool runCudaFFT(int _iProjectionCount, const float * _pfDevRealSource,
 		const float * pfSourceLocation = _pfDevRealSource + iProjectionIndex * _iSourcePitch;
 		float * pfTargetLocation = pfDevRealFFTSource + iProjectionIndex * _iFFTRealDetectorCount;
 
-		SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice));
+		if (!checkCuda(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice), "runCudaFFT memcpy")) {
+			cudaFree(pfDevRealFFTSource);
+			return false;
+		}
 	}
 
 	bool bResult = invokeCudaFFT(_iProjectionCount, _iFFTRealDetectorCount,
@@ -274,7 +277,10 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex,
 		const float * pfSourceLocation = pfDevRealFFTTarget + iProjectionIndex * _iFFTRealDetectorCount;
 		float* pfTargetLocation = _pfRealTarget + iProjectionIndex * _iTargetPitch;
 
-		SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice));
+		if (!checkCuda(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice), "runCudaIFFT memcpy")) {
+			cudaFree(pfDevRealFFTTarget);
+			return false;
+		}
 	}
 
 	SAFE_CALL(cudaFree(pfDevRealFFTTarget));
-- 
cgit v1.2.3