summaryrefslogtreecommitdiffstats
path: root/cuda/3d/par3d_fp.cu
diff options
context:
space:
mode:
authorWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2021-11-16 13:44:13 +0100
committerWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2021-11-16 14:06:30 +0100
commit39582115bc93b5435d25e56891815ae7cb1898fd (patch)
tree6c728e1125961fc04ba6f77bf9af637925825f5b /cuda/3d/par3d_fp.cu
parentb492e3d049e300132d2f22eee7922ff308342a84 (diff)
downloadastra-39582115bc93b5435d25e56891815ae7cb1898fd.tar.gz
astra-39582115bc93b5435d25e56891815ae7cb1898fd.tar.bz2
astra-39582115bc93b5435d25e56891815ae7cb1898fd.tar.xz
astra-39582115bc93b5435d25e56891815ae7cb1898fd.zip
Remove cudaTextForceKernelsCompletion
Diffstat (limited to 'cuda/3d/par3d_fp.cu')
-rw-r--r--cuda/3d/par3d_fp.cu30
1 files changed, 14 insertions, 16 deletions
diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu
index 1f58516..cf8336c 100644
--- a/cuda/3d/par3d_fp.cu
+++ b/cuda/3d/par3d_fp.cu
@@ -501,8 +501,8 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData,
dim3 dimGrid(
((dims.iProjU+g_detBlockU-1)/g_detBlockU)*((dims.iProjV+g_detBlockV-1)/g_detBlockV),
(blockEnd-blockStart+g_anglesPerBlock-1)/g_anglesPerBlock);
- // TODO: check if we can't immediately
- // destroy the stream after use
+ // TODO: consider limiting number of handle (chaotic) geoms
+ // with many alternating directions
cudaStream_t stream;
cudaStreamCreate(&stream);
streams.push_back(stream);
@@ -545,17 +545,16 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData,
}
}
- for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter)
- cudaStreamDestroy(*iter);
-
- streams.clear();
-
- cudaTextForceKernelsCompletion();
+ bool ok = true;
+ for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter) {
+ ok &= checkCuda(cudaStreamSynchronize(*iter), "par3d_fp");
+ cudaStreamDestroy(*iter);
+ }
// printf("%f\n", toc(t));
- return true;
+ return ok;
}
bool Par3DFP(cudaPitchedPtr D_volumeData,
@@ -726,17 +725,16 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,
}
}
- for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter)
- cudaStreamDestroy(*iter);
-
- streams.clear();
-
- cudaTextForceKernelsCompletion();
+ bool ok = true;
+ for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter) {
+ ok = ok &= checkCuda(cudaStreamSynchronize(*iter), "Par3DFP_SumSqW");
+ cudaStreamDestroy(*iter);
+ }
// printf("%f\n", toc(t));
- return true;
+ return ok;
}