diff options
author | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2021-11-16 13:44:13 +0100 |
---|---|---|
committer | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2021-11-16 14:06:30 +0100 |
commit | 39582115bc93b5435d25e56891815ae7cb1898fd (patch) | |
tree | 6c728e1125961fc04ba6f77bf9af637925825f5b /cuda/3d/par3d_fp.cu | |
parent | b492e3d049e300132d2f22eee7922ff308342a84 (diff) | |
download | astra-39582115bc93b5435d25e56891815ae7cb1898fd.tar.gz astra-39582115bc93b5435d25e56891815ae7cb1898fd.tar.bz2 astra-39582115bc93b5435d25e56891815ae7cb1898fd.tar.xz astra-39582115bc93b5435d25e56891815ae7cb1898fd.zip |
Remove cudaTextForceKernelsCompletion
Diffstat (limited to 'cuda/3d/par3d_fp.cu')
-rw-r--r-- | cuda/3d/par3d_fp.cu | 30 |
1 files changed, 14 insertions, 16 deletions
diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index 1f58516..cf8336c 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -501,8 +501,8 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, dim3 dimGrid( ((dims.iProjU+g_detBlockU-1)/g_detBlockU)*((dims.iProjV+g_detBlockV-1)/g_detBlockV), (blockEnd-blockStart+g_anglesPerBlock-1)/g_anglesPerBlock); - // TODO: check if we can't immediately - // destroy the stream after use + // TODO: consider limiting number of handle (chaotic) geoms + // with many alternating directions cudaStream_t stream; cudaStreamCreate(&stream); streams.push_back(stream); @@ -545,17 +545,16 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, } } - for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter) - cudaStreamDestroy(*iter); - - streams.clear(); - - cudaTextForceKernelsCompletion(); + bool ok = true; + for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter) { + ok &= checkCuda(cudaStreamSynchronize(*iter), "par3d_fp"); + cudaStreamDestroy(*iter); + } // printf("%f\n", toc(t)); - return true; + return ok; } bool Par3DFP(cudaPitchedPtr D_volumeData, @@ -726,17 +725,16 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, } } - for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter) - cudaStreamDestroy(*iter); - - streams.clear(); - - cudaTextForceKernelsCompletion(); + bool ok = true; + for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter) { + ok = ok &= checkCuda(cudaStreamSynchronize(*iter), "Par3DFP_SumSqW"); + cudaStreamDestroy(*iter); + } // printf("%f\n", toc(t)); - return true; + return ok; } |