summaryrefslogtreecommitdiffstats
path: root/cuda/3d/darthelper3d.cu
diff options
context:
space:
mode:
Diffstat (limited to 'cuda/3d/darthelper3d.cu')
-rw-r--r--cuda/3d/darthelper3d.cu229
1 files changed, 229 insertions, 0 deletions
diff --git a/cuda/3d/darthelper3d.cu b/cuda/3d/darthelper3d.cu
new file mode 100644
index 0000000..68330a1
--- /dev/null
+++ b/cuda/3d/darthelper3d.cu
@@ -0,0 +1,229 @@
+/*
+-----------------------------------------------------------------------
+Copyright 2012 iMinds-Vision Lab, University of Antwerp
+
+Contact: astra@ua.ac.be
+Website: http://astra.ua.ac.be
+
+
+This file is part of the
+All Scale Tomographic Reconstruction Antwerp Toolbox ("ASTRA Toolbox").
+
+The ASTRA Toolbox is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The ASTRA Toolbox is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
+
+-----------------------------------------------------------------------
+$Id$
+*/
+
+#include "util3d.h"
+#include "dims3d.h"
+#include "darthelper3d.h"
+#include <cassert>
+
+namespace astraCUDA3d {
+
+
+ // -------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ __global__ void devDartSmoothing(cudaPitchedPtr out, cudaPitchedPtr in, float b, SDimensions3D dims)
+ {
+ unsigned int x = threadIdx.x + 16*blockIdx.x;
+ unsigned int y = threadIdx.y + 16*blockIdx.y;
+
+ // Sacrifice the border pixels to simplify the implementation.
+ if (x > 0 && x < dims.iVolX - 1 && y > 0 && y < dims.iVolY - 1) {
+
+ float* d = (float*)in.ptr;
+ float* m = (float*)out.ptr;
+
+ unsigned int index;
+ unsigned int p = (out.pitch >> 2);
+
+ for (unsigned int z = 0; z <= dims.iVolZ-1; z++) {
+
+ float res = 0.0f;
+
+ // bottom slice
+ if (z > 0) {
+ index = ((z-1)*dims.iVolY + y) * p + x;
+ res += d[index-p-1] + d[index-p] + d[index-p+1] +
+ d[index -1] + d[index ] + d[index +1] +
+ d[index+p-1] + d[index+p] + d[index+p+1];
+ }
+
+ // top slice
+ if (z < dims.iVolZ-1) {
+ index = ((z+1)*dims.iVolY + y) * p + x;
+ res += d[index-p-1] + d[index-p] + d[index-p+1] +
+ d[index -1] + d[index ] + d[index +1] +
+ d[index+p-1] + d[index+p] + d[index+p+1];
+ }
+
+ // same slice
+ index = (z*dims.iVolY + y) * p + x;
+ res += d[index-p-1] + d[index-p] + d[index-p+1] +
+ d[index -1] + d[index +1] +
+ d[index+p-1] + d[index+p] + d[index+p+1];
+
+ // result
+ m[index] = (1.0f-b) * d[index] + b * 0.038461538f * res;
+
+ }
+
+ }
+ }
+
+ // -------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ void dartSmoothing(float* out, const float* in, float b, unsigned int radius, SDimensions3D dims)
+ {
+ cudaPitchedPtr D_inData;
+ D_inData = allocateVolumeData(dims);
+ copyVolumeToDevice(in, D_inData, dims);
+
+ cudaPitchedPtr D_outData;
+ D_outData = allocateVolumeData(dims);
+ copyVolumeToDevice(out, D_outData, dims);
+
+ dim3 blockSize(16,16);
+ dim3 gridSize((dims.iVolX+15)/16, (dims.iVolY+15)/16);
+
+ devDartSmoothing<<<gridSize, blockSize>>>(D_outData, D_inData, b, dims);
+
+ copyVolumeFromDevice(out, D_outData, dims);
+
+ cudaFree(D_outData.ptr);
+ cudaFree(D_inData.ptr);
+
+ }
+
+
+ // -------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ // CUDA function for the masking of DART with a radius == 1
+ __global__ void devDartMasking(cudaPitchedPtr mask, cudaPitchedPtr in, unsigned int conn, SDimensions3D dims)
+ {
+ unsigned int x = threadIdx.x + 16*blockIdx.x;
+ unsigned int y = threadIdx.y + 16*blockIdx.y;
+
+ // Sacrifice the border pixels to simplify the implementation.
+ if (x > 0 && x < dims.iVolX - 1 && y > 0 && y < dims.iVolY - 1) {
+
+ float* d = (float*)in.ptr;
+ float* m = (float*)mask.ptr;
+
+ unsigned int index;
+ unsigned int p = (in.pitch >> 2);
+
+ for (unsigned int z = 0; z <= dims.iVolZ-1; z++) {
+
+ unsigned int o2 = (z*dims.iVolY + y) * p + x;
+
+ m[o2] = 0.0f;
+
+ // bottom slice
+ if (z > 0) {
+ index = ((z-1)*dims.iVolY + y) * p + x;
+ if ((conn == 26 &&
+ (d[index-p-1] != d[o2] || d[index-p] != d[o2] || d[index-p+1] != d[o2] ||
+ d[index -1] != d[o2] || d[index ] != d[o2] || d[index +1] != d[o2] ||
+ d[index+p-1] != d[o2] || d[index+p] != d[o2] || d[index+p+1] != d[o2] ))
+ ||
+ (conn == 6 && d[index] != d[o2]))
+ {
+ m[o2] = 1.0f;
+ continue;
+ }
+ }
+
+ // top slice
+ if (z < dims.iVolZ-1) {
+ index = ((z+1)*dims.iVolY + y) * p + x;
+ if ((conn == 26 &&
+ (d[index-p-1] != d[o2] || d[index-p] != d[o2] || d[index-p+1] != d[o2] ||
+ d[index -1] != d[o2] || d[index ] != d[o2] || d[index +1] != d[o2] ||
+ d[index+p-1] != d[o2] || d[index+p] != d[o2] || d[index+p+1] != d[o2] ))
+ ||
+ (conn == 6 && d[index] != d[o2]))
+ {
+ m[o2] = 1.0f;
+ continue;
+ }
+ }
+
+ // other slices
+ index = (z*dims.iVolY + y) * p + x;
+ if ((conn == 26 &&
+ (d[index-p-1] != d[o2] || d[index-p] != d[o2] || d[index-p+1] != d[o2] ||
+ d[index -1] != d[o2] || d[index +1] != d[o2] ||
+ d[index+p-1] != d[o2] || d[index+p] != d[o2] || d[index+p+1] != d[o2] ))
+ ||
+ (conn == 6 &&
+ ( d[index-p] != d[o2] ||
+ d[index -1] != d[o2] || d[index +1] != d[o2] ||
+ d[index+p] != d[o2] )))
+ {
+ m[o2] = 1.0f;
+ continue;
+ }
+
+ }
+
+ }
+ }
+
+
+
+ // -------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ void dartMasking(float* mask, const float* segmentation, unsigned int conn, unsigned int radius, unsigned int threshold, SDimensions3D dims)
+ {
+ cudaPitchedPtr D_maskData;
+ D_maskData = allocateVolumeData(dims);
+ copyVolumeToDevice(mask, D_maskData, dims);
+
+ cudaPitchedPtr D_segmentationData;
+ D_segmentationData = allocateVolumeData(dims);
+ copyVolumeToDevice(segmentation, D_segmentationData, dims);
+
+ dim3 blockSize(16,16);
+ dim3 gridSize((dims.iVolX+15)/16, (dims.iVolY+15)/16);
+
+ if (threshold == 1 && radius == 1)
+ devDartMasking<<<gridSize, blockSize>>>(D_maskData, D_segmentationData, conn, dims);
+ //else if (threshold > 1 && radius == 1)
+ // devADartMask<<<gridSize, blockSize>>>(D_maskData, D_segmentationData, conn, threshold, pitch, width, height, 1, 1);
+ //else if (threshold == 1 && radius > 1)
+ // devDartMaskRadius<<<gridSize, blockSize>>>(D_maskData, D_segmentationData, conn, radius, pitch, width, height, 1, 1);
+ //else
+ // devADartMaskRadius<<<gridSize, blockSize>>>(D_maskData, D_segmentationData, conn, radius, threshold, pitch, width, height, 1, 1);
+
+ copyVolumeFromDevice(mask, D_maskData, dims);
+
+ cudaFree(D_maskData.ptr);
+ cudaFree(D_segmentationData.ptr);
+
+ }
+ // -------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+ bool setGPUIndex(int iGPUIndex)
+ {
+ cudaSetDevice(iGPUIndex);
+ cudaError_t err = cudaGetLastError();
+
+ // Ignore errors caused by calling cudaSetDevice multiple times
+ if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess)
+ return false;
+
+ return true;
+ }
+
+
+}