summaryrefslogtreecommitdiffstats
path: root/cuda
diff options
context:
space:
mode:
Diffstat (limited to 'cuda')
-rw-r--r--cuda/2d/arith.cu299
-rw-r--r--cuda/2d/arith.h17
-rw-r--r--cuda/2d/dims.h9
3 files changed, 7 insertions, 318 deletions
diff --git a/cuda/2d/arith.cu b/cuda/2d/arith.cu
index 9544026..04d4de9 100644
--- a/cuda/2d/arith.cu
+++ b/cuda/2d/arith.cu
@@ -619,277 +619,6 @@ void processSino(float* out, const float* in1, const float* in2, float param, un
-template<typename op>
-void processVol3D(cudaPitchedPtr& out, const SDimensions3D& dims)
-{
- dim3 blockSize(16,16);
- dim3 gridSize((dims.iVolX+15)/16, (dims.iVolY+511)/512);
- float *pfOut = (float*)out.ptr;
- unsigned int step = out.pitch/sizeof(float) * dims.iVolY;
-
- for (unsigned int i = 0; i < dims.iVolZ; ++i) {
- devtoD<op, 32><<<gridSize, blockSize>>>(pfOut, out.pitch/sizeof(float), dims.iVolX, dims.iVolY);
- pfOut += step;
- }
-
- cudaTextForceKernelsCompletion();
-}
-
-template<typename op>
-void processVol3D(cudaPitchedPtr& out, float fParam, const SDimensions3D& dims)
-{
- dim3 blockSize(16,16);
- dim3 gridSize((dims.iVolX+15)/16, (dims.iVolY+511)/512);
- float *pfOut = (float*)out.ptr;
- unsigned int step = out.pitch/sizeof(float) * dims.iVolY;
-
- for (unsigned int i = 0; i < dims.iVolZ; ++i) {
- devFtoD<op, 32><<<gridSize, blockSize>>>(pfOut, fParam, out.pitch/sizeof(float), dims.iVolX, dims.iVolY);
- pfOut += step;
- }
-
- cudaTextForceKernelsCompletion();
-}
-
-template<typename op>
-void processVol3D(cudaPitchedPtr& out1, cudaPitchedPtr& out2, float fParam1, float fParam2, const SDimensions3D& dims)
-{
- dim3 blockSize(16,16);
- dim3 gridSize((dims.iVolX+15)/16, (dims.iVolY+511)/512);
- float *pfOut1 = (float*)out1.ptr;
- float *pfOut2 = (float*)out2.ptr;
- unsigned int step = out1.pitch/sizeof(float) * dims.iVolY;
-
- for (unsigned int i = 0; i < dims.iVolZ; ++i) {
- devFFtoDD<op, 32><<<gridSize, blockSize>>>(pfOut1, pfOut2, fParam1, fParam2, out1.pitch/sizeof(float), dims.iVolX, dims.iVolY);
- pfOut1 += step;
- pfOut2 += step;
- }
-
- cudaTextForceKernelsCompletion();
-}
-
-
-template<typename op>
-void processVol3D(cudaPitchedPtr& out, const cudaPitchedPtr& in, const SDimensions3D& dims)
-{
- dim3 blockSize(16,16);
- dim3 gridSize((dims.iVolX+15)/16, (dims.iVolY+511)/512);
- float *pfOut = (float*)out.ptr;
- float *pfIn = (float*)in.ptr;
- unsigned int step = out.pitch/sizeof(float) * dims.iVolY;
-
- for (unsigned int i = 0; i < dims.iVolZ; ++i) {
- devDtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn, out.pitch/sizeof(float), dims.iVolX, dims.iVolY);
- pfOut += step;
- pfIn += step;
- }
-
- cudaTextForceKernelsCompletion();
-}
-
-template<typename op>
-void processVol3D(cudaPitchedPtr& out, const cudaPitchedPtr& in, float fParam, const SDimensions3D& dims)
-{
- dim3 blockSize(16,16);
- dim3 gridSize((dims.iVolX+15)/16, (dims.iVolY+511)/512);
- float *pfOut = (float*)out.ptr;
- float *pfIn = (float*)in.ptr;
- unsigned int step = out.pitch/sizeof(float) * dims.iVolY;
-
- for (unsigned int i = 0; i < dims.iVolZ; ++i) {
- devDFtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn, fParam, out.pitch/sizeof(float), dims.iVolX, dims.iVolY);
- pfOut += step;
- pfIn += step;
- }
-
- cudaTextForceKernelsCompletion();
-}
-
-template<typename op>
-void processVol3D(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitchedPtr& in2, float fParam, const SDimensions3D& dims)
-{
- dim3 blockSize(16,16);
- dim3 gridSize((dims.iVolX+15)/16, (dims.iVolY+511)/512);
- float *pfOut = (float*)out.ptr;
- float *pfIn1 = (float*)in1.ptr;
- float *pfIn2 = (float*)in2.ptr;
- unsigned int step = out.pitch/sizeof(float) * dims.iVolY;
-
- for (unsigned int i = 0; i < dims.iVolZ; ++i) {
- devDDFtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn1, pfIn2, fParam, out.pitch/sizeof(float), dims.iVolX, dims.iVolY);
- pfOut += step;
- pfIn1 += step;
- pfIn2 += step;
- }
-
- cudaTextForceKernelsCompletion();
-}
-
-template<typename op>
-void processVol3D(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitchedPtr& in2, const SDimensions3D& dims)
-{
- dim3 blockSize(16,16);
- dim3 gridSize((dims.iVolX+15)/16, (dims.iVolY+511)/512);
- float *pfOut = (float*)out.ptr;
- float *pfIn1 = (float*)in1.ptr;
- float *pfIn2 = (float*)in2.ptr;
- unsigned int step = out.pitch/sizeof(float) * dims.iVolY;
-
- for (unsigned int i = 0; i < dims.iVolZ; ++i) {
- devDDtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn1, pfIn2, out.pitch/sizeof(float), dims.iVolX, dims.iVolY);
- pfOut += step;
- pfIn1 += step;
- pfIn2 += step;
- }
-
- cudaTextForceKernelsCompletion();
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-template<typename op>
-void processSino3D(cudaPitchedPtr& out, const SDimensions3D& dims)
-{
- dim3 blockSize(16,16);
- dim3 gridSize((dims.iProjU+15)/16, (dims.iProjAngles+511)/512);
- float *pfOut = (float*)out.ptr;
- unsigned int step = out.pitch/sizeof(float) * dims.iProjAngles;
-
- for (unsigned int i = 0; i < dims.iProjV; ++i) {
- devtoD<op, 32><<<gridSize, blockSize>>>(pfOut, out.pitch/sizeof(float), dims.iProjU, dims.iProjAngles);
- pfOut += step;
- }
-
- cudaTextForceKernelsCompletion();
-}
-
-template<typename op>
-void processSino3D(cudaPitchedPtr& out, float fParam, const SDimensions3D& dims)
-{
- dim3 blockSize(16,16);
- dim3 gridSize((dims.iProjU+15)/16, (dims.iProjAngles+511)/512);
- float *pfOut = (float*)out.ptr;
- unsigned int step = out.pitch/sizeof(float) * dims.iProjAngles;
-
- for (unsigned int i = 0; i < dims.iProjV; ++i) {
- devFtoD<op, 32><<<gridSize, blockSize>>>(pfOut, fParam, out.pitch/sizeof(float), dims.iProjU, dims.iProjAngles);
- pfOut += step;
- }
-
- cudaTextForceKernelsCompletion();
-}
-
-template<typename op>
-void processSino3D(cudaPitchedPtr& out1, cudaPitchedPtr& out2, float fParam1, float fParam2, const SDimensions3D& dims)
-{
- dim3 blockSize(16,16);
- dim3 gridSize((dims.iProjU+15)/16, (dims.iProjAngles+511)/512);
- float *pfOut1 = (float*)out1.ptr;
- float *pfOut2 = (float*)out2.ptr;
- unsigned int step = out1.pitch/sizeof(float) * dims.iProjAngles;
-
- for (unsigned int i = 0; i < dims.iProjV; ++i) {
- devFFtoDD<op, 32><<<gridSize, blockSize>>>(pfOut1, pfOut2, fParam1, fParam2, out1.pitch/sizeof(float), dims.iProjU, dims.iProjAngles);
- pfOut1 += step;
- pfOut2 += step;
- }
-
- cudaTextForceKernelsCompletion();
-}
-
-
-template<typename op>
-void processSino3D(cudaPitchedPtr& out, const cudaPitchedPtr& in, const SDimensions3D& dims)
-{
- dim3 blockSize(16,16);
- dim3 gridSize((dims.iProjU+15)/16, (dims.iProjAngles+511)/512);
- float *pfOut = (float*)out.ptr;
- float *pfIn = (float*)in.ptr;
- unsigned int step = out.pitch/sizeof(float) * dims.iProjAngles;
-
- for (unsigned int i = 0; i < dims.iProjV; ++i) {
- devDtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn, out.pitch/sizeof(float), dims.iProjU, dims.iProjAngles);
- pfOut += step;
- pfIn += step;
- }
-
- cudaTextForceKernelsCompletion();
-}
-
-template<typename op>
-void processSino3D(cudaPitchedPtr& out, const cudaPitchedPtr& in, float fParam, const SDimensions3D& dims)
-{
- dim3 blockSize(16,16);
- dim3 gridSize((dims.iProjU+15)/16, (dims.iProjAngles+511)/512);
- float *pfOut = (float*)out.ptr;
- float *pfIn = (float*)in.ptr;
- unsigned int step = out.pitch/sizeof(float) * dims.iProjAngles;
-
- for (unsigned int i = 0; i < dims.iProjV; ++i) {
- devDFtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn, fParam, out.pitch/sizeof(float), dims.iProjU, dims.iProjAngles);
- pfOut += step;
- pfIn += step;
- }
-
- cudaTextForceKernelsCompletion();
-}
-
-template<typename op>
-void processSino3D(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitchedPtr& in2, float fParam, const SDimensions3D& dims)
-{
- dim3 blockSize(16,16);
- dim3 gridSize((dims.iProjU+15)/16, (dims.iProjAngles+511)/512);
- float *pfOut = (float*)out.ptr;
- float *pfIn1 = (float*)in1.ptr;
- float *pfIn2 = (float*)in2.ptr;
- unsigned int step = out.pitch/sizeof(float) * dims.iProjAngles;
-
- for (unsigned int i = 0; i < dims.iProjV; ++i) {
- devDDFtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn1, pfIn2, fParam, out.pitch/sizeof(float), dims.iProjU, dims.iProjAngles);
- pfOut += step;
- pfIn1 += step;
- pfIn2 += step;
- }
-
- cudaTextForceKernelsCompletion();
-}
-
-template<typename op>
-void processSino3D(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitchedPtr& in2, const SDimensions3D& dims)
-{
- dim3 blockSize(16,16);
- dim3 gridSize((dims.iProjU+15)/16, (dims.iProjAngles+511)/512);
- float *pfOut = (float*)out.ptr;
- float *pfIn1 = (float*)in1.ptr;
- float *pfIn2 = (float*)in2.ptr;
- unsigned int step = out.pitch/sizeof(float) * dims.iProjAngles;
-
- for (unsigned int i = 0; i < dims.iProjV; ++i) {
- devDDtoD<op, 32><<<gridSize, blockSize>>>(pfOut, pfIn1, pfIn2, out.pitch/sizeof(float), dims.iProjU, dims.iProjAngles);
- pfOut += step;
- pfIn1 += step;
- pfIn2 += step;
- }
-
- cudaTextForceKernelsCompletion();
-}
-
-
-
-
-
-
@@ -905,52 +634,38 @@ void processSino3D(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPit
#define INST_DFtoD(name) \
template void processVolCopy<name>(float* out, const float* in, float param, const SDimensions& dims); \
template void processVol<name>(float* out, const float* in, float param, unsigned int pitch, const SDimensions& dims); \
- template void processSino<name>(float* out, const float* in, float param, unsigned int pitch, const SDimensions& dims); \
- template void processVol3D<name>(cudaPitchedPtr& out, const cudaPitchedPtr& in, float fParam, const SDimensions3D& dims); \
- template void processSino3D<name>(cudaPitchedPtr& out, const cudaPitchedPtr& in, float fParam, const SDimensions3D& dims);
+ template void processSino<name>(float* out, const float* in, float param, unsigned int pitch, const SDimensions& dims);
#define INST_DtoD(name) \
template void processVolCopy<name>(float* out, const float* in, const SDimensions& dims); \
template void processVol<name>(float* out, const float* in, unsigned int pitch, const SDimensions& dims); \
- template void processSino<name>(float* out, const float* in, unsigned int pitch, const SDimensions& dims); \
- template void processVol3D<name>(cudaPitchedPtr& out, const cudaPitchedPtr& in, const SDimensions3D& dims); \
- template void processSino3D<name>(cudaPitchedPtr& out, const cudaPitchedPtr& in, const SDimensions3D& dims);
+ template void processSino<name>(float* out, const float* in, unsigned int pitch, const SDimensions& dims);
#define INST_DDtoD(name) \
template void processVolCopy<name>(float* out, const float* in1, const float* in2, const SDimensions& dims); \
template void processVol<name>(float* out, const float* in1, const float* in2, unsigned int pitch, const SDimensions& dims); \
- template void processSino<name>(float* out, const float* in1, const float* in2, unsigned int pitch, const SDimensions& dims); \
- template void processVol3D<name>(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitchedPtr& in2, const SDimensions3D& dims); \
- template void processSino3D<name>(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitchedPtr& in2, const SDimensions3D& dims);
+ template void processSino<name>(float* out, const float* in1, const float* in2, unsigned int pitch, const SDimensions& dims);
#define INST_DDFtoD(name) \
template void processVolCopy<name>(float* out, const float* in1, const float* in2, float fParam, const SDimensions& dims); \
template void processVol<name>(float* out, const float* in1, const float* in2, float fParam, unsigned int pitch, const SDimensions& dims); \
- template void processSino<name>(float* out, const float* in1, const float* in2, float fParam, unsigned int pitch, const SDimensions& dims); \
- template void processVol3D<name>(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitchedPtr& in2, float fParam, const SDimensions3D& dims); \
- template void processSino3D<name>(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitchedPtr& in2, float fParam, const SDimensions3D& dims);
+ template void processSino<name>(float* out, const float* in1, const float* in2, float fParam, unsigned int pitch, const SDimensions& dims);
#define INST_toD(name) \
template void processVolCopy<name>(float* out, const SDimensions& dims); \
template void processVol<name>(float* out, unsigned int pitch, const SDimensions& dims); \
- template void processSino<name>(float* out, unsigned int pitch, const SDimensions& dims); \
- template void processVol3D<name>(cudaPitchedPtr& out, const SDimensions3D& dims); \
- template void processSino3D<name>(cudaPitchedPtr& out, const SDimensions3D& dims);
+ template void processSino<name>(float* out, unsigned int pitch, const SDimensions& dims);
#define INST_FtoD(name) \
template void processVolCopy<name>(float* out, float param, const SDimensions& dims); \
template void processVol<name>(float* out, float param, unsigned int pitch, const SDimensions& dims); \
- template void processSino<name>(float* out, float param, unsigned int pitch, const SDimensions& dims); \
- template void processVol3D<name>(cudaPitchedPtr& out, float param, const SDimensions3D& dims); \
- template void processSino3D<name>(cudaPitchedPtr& out, float param, const SDimensions3D& dims);
+ template void processSino<name>(float* out, float param, unsigned int pitch, const SDimensions& dims);
#define INST_FFtoDD(name) \
template void processVolCopy<name>(float* out1, float* out2, float fParam1, float fParam2, const SDimensions& dims); \
template void processVol<name>(float* out1, float* out2, float fParam1, float fParam2, unsigned int pitch, const SDimensions& dims); \
- template void processSino<name>(float* out1, float* out2, float fParam1, float fParam2, unsigned int pitch, const SDimensions& dims); \
- template void processVol3D<name>(cudaPitchedPtr& out1, cudaPitchedPtr& out2, float fParam1, float fParam2, const SDimensions3D& dims); \
- template void processSino3D<name>(cudaPitchedPtr& out1, cudaPitchedPtr& out2, float fParam1, float fParam2, const SDimensions3D& dims);
+ template void processSino<name>(float* out1, float* out2, float fParam1, float fParam2, unsigned int pitch, const SDimensions& dims);
diff --git a/cuda/2d/arith.h b/cuda/2d/arith.h
index c32a63a..f730e2f 100644
--- a/cuda/2d/arith.h
+++ b/cuda/2d/arith.h
@@ -79,23 +79,6 @@ template<typename op> void processSino(float* out, const float* in, float fParam
template<typename op> void processSino(float* out, const float* in1, const float* in2, float fParam, unsigned int pitch, const SDimensions& dims);
template<typename op> void processSino(float* out, const float* in1, const float* in2, unsigned int pitch, const SDimensions& dims);
-template<typename op> void processVol3D(cudaPitchedPtr& out, const SDimensions3D& dims);
-template<typename op> void processVol3D(cudaPitchedPtr& out, float fParam, const SDimensions3D& dims);
-template<typename op> void processVol3D(cudaPitchedPtr& out1, cudaPitchedPtr& out2, float fParam1, float fParam2, const SDimensions3D& dims);
-template<typename op> void processVol3D(cudaPitchedPtr& out, const cudaPitchedPtr& in, const SDimensions3D& dims);
-template<typename op> void processVol3D(cudaPitchedPtr& out, const cudaPitchedPtr& in, float fParam, const SDimensions3D& dims);
-template<typename op> void processVol3D(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitchedPtr& in2, float fParam, const SDimensions3D& dims);
-template<typename op> void processVol3D(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitchedPtr& in2, const SDimensions3D& dims);
-
-template<typename op> void processSino3D(cudaPitchedPtr& out, const SDimensions3D& dims);
-template<typename op> void processSino3D(cudaPitchedPtr& out, float fParam, const SDimensions3D& dims);
-template<typename op> void processSino3D(cudaPitchedPtr& out1, cudaPitchedPtr& out2, float fParam1, float fParam2, const SDimensions3D& dims);
-template<typename op> void processSino3D(cudaPitchedPtr& out, const cudaPitchedPtr& in, const SDimensions3D& dims);
-template<typename op> void processSino3D(cudaPitchedPtr& out, const cudaPitchedPtr& in, float fParam, const SDimensions3D& dims);
-template<typename op> void processSino3D(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitchedPtr& in2, float fParam, const SDimensions3D& dims);
-template<typename op> void processSino3D(cudaPitchedPtr& out, const cudaPitchedPtr& in1, const cudaPitchedPtr& in2, const SDimensions3D& dims);
-
-
}
diff --git a/cuda/2d/dims.h b/cuda/2d/dims.h
index 21ccb31..df349f7 100644
--- a/cuda/2d/dims.h
+++ b/cuda/2d/dims.h
@@ -53,15 +53,6 @@ struct SDimensions {
unsigned int iRaysPerPixelDim;
};
-struct SDimensions3D {
- unsigned int iVolX;
- unsigned int iVolY;
- unsigned int iVolZ;
- unsigned int iProjAngles;
- unsigned int iProjU; // number of detectors in the U direction
- unsigned int iProjV; // number of detectors in the V direction
-};
-
}
#endif