diff options
Diffstat (limited to 'cuda')
-rw-r--r-- | cuda/2d/astra.cu | 7 | ||||
-rw-r--r-- | cuda/2d/darthelper.cu | 13 | ||||
-rw-r--r-- | cuda/2d/fft.cu | 45 | ||||
-rw-r--r-- | cuda/2d/par_fp.cu | 2 | ||||
-rw-r--r-- | cuda/2d/util.cu | 8 | ||||
-rw-r--r-- | cuda/3d/cone_fp.cu | 2 | ||||
-rw-r--r-- | cuda/3d/par3d_fp.cu | 2 | ||||
-rw-r--r-- | cuda/3d/util3d.cu | 12 |
8 files changed, 47 insertions, 44 deletions
diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu index dc16a56..379c690 100644 --- a/cuda/2d/astra.cu +++ b/cuda/2d/astra.cu @@ -42,12 +42,13 @@ $Id$ #include <fstream> #include <cuda.h> -#include "../../include/astra/Logger.h" #include "../../include/astra/VolumeGeometry2D.h" #include "../../include/astra/ParallelProjectionGeometry2D.h" #include "../../include/astra/FanFlatProjectionGeometry2D.h" #include "../../include/astra/FanFlatVecProjectionGeometry2D.h" +#include "../../include/astra/Logging.h" + // For fan beam FBP weighting #include "../3d/fdk.h" @@ -538,7 +539,7 @@ bool AstraFBP::setFilter(E_FBPFILTER _eFilter, const float * _pfHostFilter /* = int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth; int iFilterShiftSize = _iFilterWidth / 2; - + for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++) { int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount; @@ -563,7 +564,7 @@ bool AstraFBP::setFilter(E_FBPFILTER _eFilter, const float * _pfHostFilter /* = } default: { - fprintf(stderr, "AstraFBP::setFilter: Unknown filter type requested"); + ASTRA_ERROR("AstraFBP::setFilter: Unknown filter type requested"); delete [] pHostFilter; return false; } diff --git a/cuda/2d/darthelper.cu b/cuda/2d/darthelper.cu index 28ca557..1d10d49 100644 --- a/cuda/2d/darthelper.cu +++ b/cuda/2d/darthelper.cu @@ -57,7 +57,7 @@ void roiSelect(float* out, float radius, unsigned int width, unsigned int height // We abuse dims here... SDimensions dims; dims.iVolWidth = width; - dims.iVolHeight = width; + dims.iVolHeight = height; allocateVolumeData(D_data, pitch, dims); copyVolumeToDevice(out, width, dims, D_data, pitch); @@ -245,7 +245,7 @@ void dartMask(float* mask, const float* segmentation, unsigned int conn, unsigne // We abuse dims here... SDimensions dims; dims.iVolWidth = width; - dims.iVolHeight = width; + dims.iVolHeight = height; allocateVolumeData(D_segmentationData, pitch, dims); copyVolumeToDevice(segmentation, width, dims, D_segmentationData, pitch); @@ -278,7 +278,7 @@ __global__ void devDartSmoothingRadius(float* out, const float* in, float b, uns unsigned int x = threadIdx.x + 16*blockIdx.x; unsigned int y = threadIdx.y + 16*blockIdx.y; - // Sacrifice the border pixels to simplify the implementation. + // Sacrifice the border pixels to simplify the implementation. if (x > radius-1 && x < width - radius && y > radius-1 && y < height - radius) { float* d = (float*)in; @@ -286,9 +286,10 @@ __global__ void devDartSmoothingRadius(float* out, const float* in, float b, uns unsigned int o2 = y*pitch+x; int r = radius; + float count = 4*r*(r+1); float res = -d[o2]; - for (int row = -r; row < r; row++) + for (int row = -r; row <= r; row++) { unsigned int o1 = (y+row)*pitch+x; for (int col = -r; col <= r; col++) @@ -297,7 +298,7 @@ __global__ void devDartSmoothingRadius(float* out, const float* in, float b, uns } } - res *= b / 4*r*(r+1); + res *= b / count; res += (1.0f-b) * d[o2]; m[o2] = res; @@ -333,7 +334,7 @@ void dartSmoothing(float* out, const float* in, float b, unsigned int radius, un // We abuse dims here... SDimensions dims; dims.iVolWidth = width; - dims.iVolHeight = width; + dims.iVolHeight = height; allocateVolumeData(D_inData, pitch, dims); copyVolumeToDevice(in, width, dims, D_inData, pitch); diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index d105e29..2bfd493 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -34,7 +34,7 @@ $Id$ #include <cuda.h> #include <fstream> -#include "../../include/astra/Logger.h" +#include "../../include/astra/Logging.h" using namespace astra; @@ -43,25 +43,22 @@ using namespace astra; #define CHECK_ERROR(errorMessage) do { \ cudaError_t err = cudaThreadSynchronize(); \ if( cudaSuccess != err) { \ - fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ - errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ - CLogger::writeTerminalCUDAError(__FILE__, __LINE__, cudaGetErrorString( err)); \ + ASTRA_ERROR("Cuda error %s : %s", \ + errorMessage,cudaGetErrorString( err)); \ exit(EXIT_FAILURE); \ } } while (0) #define SAFE_CALL( call) do { \ cudaError err = call; \ if( cudaSuccess != err) { \ - fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \ - __FILE__, __LINE__, cudaGetErrorString( err) ); \ - CLogger::writeTerminalCUDAError(__FILE__, __LINE__, cudaGetErrorString( err)); \ + ASTRA_ERROR("Cuda error: %s ", \ + cudaGetErrorString( err)); \ exit(EXIT_FAILURE); \ } \ err = cudaThreadSynchronize(); \ if( cudaSuccess != err) { \ - fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \ - __FILE__, __LINE__, cudaGetErrorString( err) ); \ - CLogger::writeTerminalCUDAError(__FILE__, __LINE__, cudaGetErrorString( err)); \ + ASTRA_ERROR("Cuda error: %s : ", \ + cudaGetErrorString( err)); \ exit(EXIT_FAILURE); \ } } while (0) @@ -140,7 +137,7 @@ static bool invokeCudaFFT(int _iProjectionCount, int _iDetectorCount, result = cufftPlan1d(&plan, _iDetectorCount, CUFFT_R2C, _iProjectionCount); if(result != CUFFT_SUCCESS) { - std::cerr << "Failed to plan 1d r2c fft" << std::endl; + ASTRA_ERROR("Failed to plan 1d r2c fft"); return false; } @@ -149,7 +146,7 @@ static bool invokeCudaFFT(int _iProjectionCount, int _iDetectorCount, if(result != CUFFT_SUCCESS) { - std::cerr << "Failed to exec 1d r2c fft" << std::endl; + ASTRA_ERROR("Failed to exec 1d r2c fft"); return false; } @@ -166,18 +163,18 @@ static bool invokeCudaIFFT(int _iProjectionCount, int _iDetectorCount, result = cufftPlan1d(&plan, _iDetectorCount, CUFFT_C2R, _iProjectionCount); if(result != CUFFT_SUCCESS) { - std::cerr << "Failed to plan 1d c2r fft" << std::endl; + ASTRA_ERROR("Failed to plan 1d c2r fft"); return false; } // todo: why do we have to get rid of the const qualifier? result = cufftExecC2R(plan, (cufftComplex *)_pDevSourceComplex, - (cufftReal *)_pfDevTarget); + (cufftReal *)_pfDevTarget); cufftDestroy(plan); if(result != CUFFT_SUCCESS) { - std::cerr << "Failed to exec 1d c2r fft" << std::endl; + ASTRA_ERROR("Failed to exec 1d c2r fft"); return false; } @@ -257,7 +254,7 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex, } rescaleInverseFourier(_iProjectionCount, _iFFTRealDetectorCount, - pfDevRealFFTTarget); + pfDevRealFFTTarget); SAFE_CALL(cudaMemset(_pfRealTarget, 0, sizeof(float) * _iProjectionCount * _iTargetPitch)); @@ -460,7 +457,7 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount, const float fA1 = 0.48f; const float fA2 = 0.38f; float fNMinusOne = (float)(_iFFTFourierDetectorCount) - 1.0f; - + for(int iDetectorIndex = 1; iDetectorIndex < _iFFTFourierDetectorCount; iDetectorIndex++) { float fSmallN = (float)iDetectorIndex; @@ -633,7 +630,7 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount, } default: { - std::cerr << "Cannot serve requested filter" << std::endl; + ASTRA_ERROR("Cannot serve requested filter"); } } @@ -746,7 +743,7 @@ void testCudaFFT() { for(int iDetectorIndex = 0; iDetectorIndex < iDetectorCount; iDetectorIndex++) { -// int +// int // pfHostProj[iIndex] = (float)rand() / (float)RAND_MAX; } @@ -767,13 +764,13 @@ void testCudaFFT() result = cufftPlan1d(&plan, iDetectorCount, CUFFT_R2C, iProjectionCount); if(result != CUFFT_SUCCESS) { - cerr << "Failed to plan 1d r2c fft" << endl; + ASTRA_ERROR("Failed to plan 1d r2c fft"); } result = cufftExecR2C(plan, pfDevProj, pDevFourProj); if(result != CUFFT_SUCCESS) { - cerr << "Failed to exec 1d r2c fft" << endl; + ASTRA_ERROR("Failed to exec 1d r2c fft"); } cufftDestroy(plan); @@ -787,7 +784,7 @@ void testCudaFFT() float * pfHostFourProjImaginary = new float[iTotalElementCount]; convertComplexToRealImg(pHostFourProj, iTotalElementCount, pfHostFourProjReal, pfHostFourProjImaginary); - + writeToMatlabFile("proj_four_real.mat", pfHostFourProjReal, iProjectionCount, iDetectorCount); writeToMatlabFile("proj_four_imaginary.mat", pfHostFourProjImaginary, iProjectionCount, iDetectorCount); @@ -797,13 +794,13 @@ void testCudaFFT() result = cufftPlan1d(&plan, iDetectorCount, CUFFT_C2R, iProjectionCount); if(result != CUFFT_SUCCESS) { - cerr << "Failed to plan 1d c2r fft" << endl; + ASTRA_ERROR("Failed to plan 1d c2r fft"); } result = cufftExecC2R(plan, pDevFourProj, pfDevInFourProj); if(result != CUFFT_SUCCESS) { - cerr << "Failed to exec 1d c2r fft" << endl; + ASTRA_ERROR("Failed to exec 1d c2r fft"); } cufftDestroy(plan); diff --git a/cuda/2d/par_fp.cu b/cuda/2d/par_fp.cu index d0ca7ff..bb8b909 100644 --- a/cuda/2d/par_fp.cu +++ b/cuda/2d/par_fp.cu @@ -487,7 +487,7 @@ bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch, unsigned int blockEnd = 0; bool blockVertical = false; for (unsigned int a = 0; a <= dims.iProjAngles; ++a) { - bool vertical; + bool vertical = false; // TODO: Having <= instead of < below causes a 5% speedup. // Maybe we should detect corner cases and put them in the optimal // group of angles. diff --git a/cuda/2d/util.cu b/cuda/2d/util.cu index 81e368f..a4f8f3e 100644 --- a/cuda/2d/util.cu +++ b/cuda/2d/util.cu @@ -30,6 +30,8 @@ $Id$ #include <cassert> #include "util.h" +#include "../../include/astra/Logging.h" + namespace astraCUDA { bool copyVolumeToDevice(const float* in_data, unsigned int in_pitch, @@ -91,7 +93,7 @@ bool allocateVolume(float*& ptr, unsigned int width, unsigned int height, unsign cudaError_t ret = cudaMallocPitch((void**)&ptr, &p, sizeof(float)*width, height); if (ret != cudaSuccess) { reportCudaError(ret); - fprintf(stderr, "Failed to allocate %dx%d GPU buffer\n", width, height); + ASTRA_ERROR("Failed to allocate %dx%d GPU buffer", width, height); return false; } @@ -259,7 +261,7 @@ bool cudaTextForceKernelsCompletion() cudaError_t returnedCudaError = cudaThreadSynchronize(); if(returnedCudaError != cudaSuccess) { - fprintf(stderr, "Failed to force completion of cuda kernels: %d: %s.\n", returnedCudaError, cudaGetErrorString(returnedCudaError)); + ASTRA_ERROR("Failed to force completion of cuda kernels: %d: %s.", returnedCudaError, cudaGetErrorString(returnedCudaError)); return false; } @@ -269,7 +271,7 @@ bool cudaTextForceKernelsCompletion() void reportCudaError(cudaError_t err) { if(err != cudaSuccess) - fprintf(stderr, "CUDA error %d: %s.\n", err, cudaGetErrorString(err)); + ASTRA_ERROR("CUDA error %d: %s.", err, cudaGetErrorString(err)); } diff --git a/cuda/3d/cone_fp.cu b/cuda/3d/cone_fp.cu index bda71ba..b36d2bc 100644 --- a/cuda/3d/cone_fp.cu +++ b/cuda/3d/cone_fp.cu @@ -340,7 +340,7 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData, // tic(t); for (unsigned int a = 0; a <= angleCount; ++a) { - int dir; + int dir = -1; if (a != angleCount) { float dX = fabsf(angles[a].fSrcX - (angles[a].fDetSX + dims.iProjU*angles[a].fDetUX*0.5f + dims.iProjV*angles[a].fDetVX*0.5f)); float dY = fabsf(angles[a].fSrcY - (angles[a].fDetSY + dims.iProjU*angles[a].fDetUY*0.5f + dims.iProjV*angles[a].fDetVY*0.5f)); diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index 8d44540..b14c494 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -440,7 +440,7 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, // tic(t); for (unsigned int a = 0; a <= angleCount; ++a) { - int dir; + int dir = -1; if (a != dims.iProjAngles) { float dX = fabsf(angles[a].fRayX); float dY = fabsf(angles[a].fRayY); diff --git a/cuda/3d/util3d.cu b/cuda/3d/util3d.cu index d85a928..537ed69 100644 --- a/cuda/3d/util3d.cu +++ b/cuda/3d/util3d.cu @@ -31,6 +31,8 @@ $Id$ #include "util3d.h" #include "../2d/util.h" +#include "../../include/astra/Logging.h" + namespace astraCUDA3d { @@ -46,7 +48,7 @@ cudaPitchedPtr allocateVolumeData(const SDimensions3D& dims) cudaError err = cudaMalloc3D(&volData, extentV); if (err != cudaSuccess) { astraCUDA::reportCudaError(err); - fprintf(stderr, "Failed to allocate %dx%dx%d GPU buffer\n", dims.iVolX, dims.iVolY, dims.iVolZ); + ASTRA_ERROR("Failed to allocate %dx%dx%d GPU buffer", dims.iVolX, dims.iVolY, dims.iVolZ); volData.ptr = 0; // TODO: return 0 somehow? } @@ -65,7 +67,7 @@ cudaPitchedPtr allocateProjectionData(const SDimensions3D& dims) cudaError err = cudaMalloc3D(&projData, extentP); if (err != cudaSuccess) { astraCUDA::reportCudaError(err); - fprintf(stderr, "Failed to allocate %dx%dx%d GPU buffer\n", dims.iProjU, dims.iProjAngles, dims.iProjV); + ASTRA_ERROR("Failed to allocate %dx%dx%d GPU buffer", dims.iProjU, dims.iProjAngles, dims.iProjV); projData.ptr = 0; // TODO: return 0 somehow? } @@ -303,7 +305,7 @@ cudaArray* allocateVolumeArray(const SDimensions3D& dims) cudaError err = cudaMalloc3DArray(&cuArray, &channelDesc, extentA); if (err != cudaSuccess) { astraCUDA::reportCudaError(err); - fprintf(stderr, "Failed to allocate %dx%dx%d GPU array\n", dims.iVolX, dims.iVolY, dims.iVolZ); + ASTRA_ERROR("Failed to allocate %dx%dx%d GPU array", dims.iVolX, dims.iVolY, dims.iVolZ); return 0; } @@ -321,7 +323,7 @@ cudaArray* allocateProjectionArray(const SDimensions3D& dims) if (err != cudaSuccess) { astraCUDA::reportCudaError(err); - fprintf(stderr, "Failed to allocate %dx%dx%d GPU array\n", dims.iProjU, dims.iProjAngles, dims.iProjV); + ASTRA_ERROR("Failed to allocate %dx%dx%d GPU array", dims.iProjU, dims.iProjAngles, dims.iProjV); return 0; } @@ -397,7 +399,7 @@ bool cudaTextForceKernelsCompletion() cudaError_t returnedCudaError = cudaThreadSynchronize(); if(returnedCudaError != cudaSuccess) { - fprintf(stderr, "Failed to force completion of cuda kernels: %d: %s.\n", returnedCudaError, cudaGetErrorString(returnedCudaError)); + ASTRA_ERROR("Failed to force completion of cuda kernels: %d: %s.", returnedCudaError, cudaGetErrorString(returnedCudaError)); return false; } |