/*
-----------------------------------------------------------------------
Copyright: 2010-2018, imec Vision Lab, University of Antwerp
2014-2018, CWI, Amsterdam
Contact: astra@astra-toolbox.com
Website: http://www.astra-toolbox.com/
This file is part of the ASTRA Toolbox.
The ASTRA Toolbox is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
The ASTRA Toolbox is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the ASTRA Toolbox. If not, see .
-----------------------------------------------------------------------
*/
#include "astra/cuda/2d/fft.h"
#include "astra/cuda/2d/util.h"
#include "astra/Logging.h"
#include "astra/Fourier.h"
#include
#include
#include
#include
using namespace astra;
// TODO: evaluate what we want to do in these situations:
#define CHECK_ERROR(errorMessage) do { \
cudaError_t err = cudaThreadSynchronize(); \
if( cudaSuccess != err) { \
ASTRA_ERROR("Cuda error %s : %s", \
errorMessage,cudaGetErrorString( err)); \
exit(EXIT_FAILURE); \
} } while (0)
#define SAFE_CALL( call) do { \
cudaError err = call; \
if( cudaSuccess != err) { \
ASTRA_ERROR("Cuda error: %s ", \
cudaGetErrorString( err)); \
exit(EXIT_FAILURE); \
} \
err = cudaThreadSynchronize(); \
if( cudaSuccess != err) { \
ASTRA_ERROR("Cuda error: %s : ", \
cudaGetErrorString( err)); \
exit(EXIT_FAILURE); \
} } while (0)
namespace astraCUDA {
__global__ static void applyFilter_kernel(int _iProjectionCount,
int _iFreqBinCount,
cufftComplex * _pSinogram,
cufftComplex * _pFilter)
{
int iIndex = threadIdx.x + blockIdx.x * blockDim.x;
int iProjectionIndex = iIndex / _iFreqBinCount;
if(iProjectionIndex >= _iProjectionCount)
{
return;
}
float fA = _pSinogram[iIndex].x;
float fB = _pSinogram[iIndex].y;
float fC = _pFilter[iIndex].x;
float fD = _pFilter[iIndex].y;
_pSinogram[iIndex].x = fA * fC - fB * fD;
_pSinogram[iIndex].y = fA * fD + fC * fB;
}
__global__ static void rescaleInverseFourier_kernel(int _iProjectionCount,
int _iDetectorCount,
float* _pfInFourierOutput)
{
int iIndex = threadIdx.x + blockIdx.x * blockDim.x;
int iProjectionIndex = iIndex / _iDetectorCount;
int iDetectorIndex = iIndex % _iDetectorCount;
if(iProjectionIndex >= _iProjectionCount)
{
return;
}
_pfInFourierOutput[iProjectionIndex * _iDetectorCount + iDetectorIndex] /= (float)_iDetectorCount;
}
static void rescaleInverseFourier(int _iProjectionCount, int _iDetectorCount,
float * _pfInFourierOutput)
{
const int iBlockSize = 256;
int iElementCount = _iProjectionCount * _iDetectorCount;
int iBlockCount = (iElementCount + iBlockSize - 1) / iBlockSize;
rescaleInverseFourier_kernel<<< iBlockCount, iBlockSize >>>(_iProjectionCount,
_iDetectorCount,
_pfInFourierOutput);
CHECK_ERROR("rescaleInverseFourier_kernel failed");
}
void applyFilter(int _iProjectionCount, int _iFreqBinCount,
cufftComplex * _pSinogram, cufftComplex * _pFilter)
{
const int iBlockSize = 256;
int iElementCount = _iProjectionCount * _iFreqBinCount;
int iBlockCount = (iElementCount + iBlockSize - 1) / iBlockSize;
applyFilter_kernel<<< iBlockCount, iBlockSize >>>(_iProjectionCount,
_iFreqBinCount,
_pSinogram, _pFilter);
CHECK_ERROR("applyFilter_kernel failed");
}
static bool invokeCudaFFT(int _iProjectionCount, int _iDetectorCount,
const float * _pfDevSource,
cufftComplex * _pDevTargetComplex)
{
cufftHandle plan;
cufftResult result;
result = cufftPlan1d(&plan, _iDetectorCount, CUFFT_R2C, _iProjectionCount);
if(result != CUFFT_SUCCESS)
{
ASTRA_ERROR("Failed to plan 1d r2c fft");
return false;
}
result = cufftExecR2C(plan, (cufftReal *)_pfDevSource, _pDevTargetComplex);
cufftDestroy(plan);
if(result != CUFFT_SUCCESS)
{
ASTRA_ERROR("Failed to exec 1d r2c fft");
return false;
}
return true;
}
static bool invokeCudaIFFT(int _iProjectionCount, int _iDetectorCount,
const cufftComplex * _pDevSourceComplex,
float * _pfDevTarget)
{
cufftHandle plan;
cufftResult result;
result = cufftPlan1d(&plan, _iDetectorCount, CUFFT_C2R, _iProjectionCount);
if(result != CUFFT_SUCCESS)
{
ASTRA_ERROR("Failed to plan 1d c2r fft");
return false;
}
// todo: why do we have to get rid of the const qualifier?
result = cufftExecC2R(plan, (cufftComplex *)_pDevSourceComplex,
(cufftReal *)_pfDevTarget);
cufftDestroy(plan);
if(result != CUFFT_SUCCESS)
{
ASTRA_ERROR("Failed to exec 1d c2r fft");
return false;
}
return true;
}
bool allocateComplexOnDevice(int _iProjectionCount, int _iDetectorCount,
cufftComplex ** _ppDevComplex)
{
size_t bufferSize = sizeof(cufftComplex) * _iProjectionCount * _iDetectorCount;
SAFE_CALL(cudaMalloc((void **)_ppDevComplex, bufferSize));
return true;
}
bool freeComplexOnDevice(cufftComplex * _pDevComplex)
{
SAFE_CALL(cudaFree(_pDevComplex));
return true;
}
bool uploadComplexArrayToDevice(int _iProjectionCount, int _iDetectorCount,
cufftComplex * _pHostComplexSource,
cufftComplex * _pDevComplexTarget)
{
size_t memSize = sizeof(cufftComplex) * _iProjectionCount * _iDetectorCount;
SAFE_CALL(cudaMemcpy(_pDevComplexTarget, _pHostComplexSource, memSize, cudaMemcpyHostToDevice));
return true;
}
bool runCudaFFT(int _iProjectionCount, const float * _pfDevRealSource,
int _iSourcePitch, int _iProjDets,
int _iFFTRealDetectorCount, int _iFFTFourierDetectorCount,
cufftComplex * _pDevTargetComplex)
{
float * pfDevRealFFTSource = NULL;
size_t bufferMemSize = sizeof(float) * _iProjectionCount * _iFFTRealDetectorCount;
SAFE_CALL(cudaMalloc((void **)&pfDevRealFFTSource, bufferMemSize));
SAFE_CALL(cudaMemset(pfDevRealFFTSource, 0, bufferMemSize));
for(int iProjectionIndex = 0; iProjectionIndex < _iProjectionCount; iProjectionIndex++)
{
const float * pfSourceLocation = _pfDevRealSource + iProjectionIndex * _iSourcePitch;
float * pfTargetLocation = pfDevRealFFTSource + iProjectionIndex * _iFFTRealDetectorCount;
SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice));
}
bool bResult = invokeCudaFFT(_iProjectionCount, _iFFTRealDetectorCount,
pfDevRealFFTSource, _pDevTargetComplex);
if(!bResult)
{
return false;
}
SAFE_CALL(cudaFree(pfDevRealFFTSource));
return true;
}
bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex,
float * _pfRealTarget,
int _iTargetPitch, int _iProjDets,
int _iFFTRealDetectorCount, int _iFFTFourierDetectorCount)
{
float * pfDevRealFFTTarget = NULL;
size_t bufferMemSize = sizeof(float) * _iProjectionCount * _iFFTRealDetectorCount;
SAFE_CALL(cudaMalloc((void **)&pfDevRealFFTTarget, bufferMemSize));
bool bResult = invokeCudaIFFT(_iProjectionCount, _iFFTRealDetectorCount,
_pDevSourceComplex, pfDevRealFFTTarget);
if(!bResult)
{
return false;
}
rescaleInverseFourier(_iProjectionCount, _iFFTRealDetectorCount,
pfDevRealFFTTarget);
SAFE_CALL(cudaMemset(_pfRealTarget, 0, sizeof(float) * _iProjectionCount * _iTargetPitch));
for(int iProjectionIndex = 0; iProjectionIndex < _iProjectionCount; iProjectionIndex++)
{
const float * pfSourceLocation = pfDevRealFFTTarget + iProjectionIndex * _iFFTRealDetectorCount;
float* pfTargetLocation = _pfRealTarget + iProjectionIndex * _iTargetPitch;
SAFE_CALL(cudaMemcpy(pfTargetLocation, pfSourceLocation, sizeof(float) * _iProjDets, cudaMemcpyDeviceToDevice));
}
SAFE_CALL(cudaFree(pfDevRealFFTTarget));
return true;
}
void genIdenFilter(int _iProjectionCount, cufftComplex * _pFilter,
int _iFFTRealDetectorCount, int _iFFTFourierDetectorCount)
{
for(int iProjectionIndex = 0; iProjectionIndex < _iProjectionCount; iProjectionIndex++)
{
for(int iDetectorIndex = 0; iDetectorIndex < _iFFTFourierDetectorCount; iDetectorIndex++)
{
int iIndex = iDetectorIndex + iProjectionIndex * _iFFTFourierDetectorCount;
_pFilter[iIndex].x = 1.0f;
_pFilter[iIndex].y = 0.0f;
}
}
}
void genCuFFTFilter(const SFilterConfig &_cfg, int _iProjectionCount,
cufftComplex * _pFilter, int _iFFTRealDetectorCount,
int _iFFTFourierDetectorCount)
{
float * pfFilt = astra::genFilter(_cfg,
_iFFTRealDetectorCount,
_iFFTFourierDetectorCount);
for(int iDetectorIndex = 0; iDetectorIndex < _iFFTFourierDetectorCount; iDetectorIndex++)
{
float fFilterValue = pfFilt[iDetectorIndex];
for(int iProjectionIndex = 0; iProjectionIndex < _iProjectionCount; iProjectionIndex++)
{
int iIndex = iDetectorIndex + iProjectionIndex * _iFFTFourierDetectorCount;
_pFilter[iIndex].x = fFilterValue;
_pFilter[iIndex].y = 0.0f;
}
}
delete[] pfFilt;
}
}