diff options
| author | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2016-07-29 12:03:38 +0200 | 
|---|---|---|
| committer | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2016-07-29 12:11:52 +0200 | 
| commit | b1ffc11d930c19bd73af9837a08bc8dde9fe8e72 (patch) | |
| tree | 40ce622ffdc8d75c885135db1ce4773c9670e2c3 | |
| parent | b2611a03577c285ddf48edab0d05dafa09ab362c (diff) | |
| download | astra-b1ffc11d930c19bd73af9837a08bc8dde9fe8e72.tar.gz astra-b1ffc11d930c19bd73af9837a08bc8dde9fe8e72.tar.bz2 astra-b1ffc11d930c19bd73af9837a08bc8dde9fe8e72.tar.xz astra-b1ffc11d930c19bd73af9837a08bc8dde9fe8e72.zip | |
Add CUDA parvec support
36 files changed, 1179 insertions, 1715 deletions
| diff --git a/astra_vc09.vcproj b/astra_vc09.vcproj index f2cf62a..428bf8d 100644 --- a/astra_vc09.vcproj +++ b/astra_vc09.vcproj @@ -1077,6 +1077,10 @@  					>  				</File>  				<File +					RelativePath=".\include\astra\ParallelVecProjectionGeometry2D.h" +					> +				</File> +				<File  					RelativePath=".\include\astra\ParallelVecProjectionGeometry3D.h"  					>  				</File> @@ -1121,6 +1125,10 @@  					>  				</File>  				<File +					RelativePath=".\src\GeometryUtil2D.cpp" +					> +				</File> +				<File  					RelativePath=".\src\GeometryUtil3D.cpp"  					>  				</File> @@ -1133,6 +1141,10 @@  					>  				</File>  				<File +					RelativePath=".\src\ParallelVecProjectionGeometry2D.cpp" +					> +				</File> +				<File  					RelativePath=".\src\ParallelVecProjectionGeometry3D.cpp"  					>  				</File> @@ -2185,6 +2197,10 @@  					>  				</File>  				<File +					RelativePath=".\cuda\2d\fbp.h" +					> +				</File> +				<File  					RelativePath=".\cuda\2d\fft.h"  					>  				</File> @@ -2557,6 +2573,42 @@  					</FileConfiguration>  				</File>  				<File +					RelativePath=".\cuda\2d\fbp.cu" +					> +					<FileConfiguration +						Name="Debug|Win32" +						ExcludedFromBuild="true" +						> +						<Tool +							Name="Cudart Build Rule" +						/> +					</FileConfiguration> +					<FileConfiguration +						Name="Debug|x64" +						ExcludedFromBuild="true" +						> +						<Tool +							Name="Cudart Build Rule" +						/> +					</FileConfiguration> +					<FileConfiguration +						Name="Release|Win32" +						ExcludedFromBuild="true" +						> +						<Tool +							Name="Cudart Build Rule" +						/> +					</FileConfiguration> +					<FileConfiguration +						Name="Release|x64" +						ExcludedFromBuild="true" +						> +						<Tool +							Name="Cudart Build Rule" +						/> +					</FileConfiguration> +				</File> +				<File  					RelativePath=".\cuda\2d\fft.cu"  					>  					<FileConfiguration diff --git a/astra_vc11.vcxproj b/astra_vc11.vcxproj index cd73076..d48796b 100644 --- a/astra_vc11.vcxproj +++ b/astra_vc11.vcxproj @@ -529,6 +529,7 @@      <ClCompile Include="src\Float32VolumeData3DMemory.cpp" />      <ClCompile Include="src\ForwardProjectionAlgorithm.cpp" />      <ClCompile Include="src\Fourier.cpp" /> +    <ClCompile Include="src\GeometryUtil2D.cpp" />      <ClCompile Include="src\GeometryUtil3D.cpp" />      <ClCompile Include="src\Globals.cpp" />      <ClCompile Include="src\Logging.cpp" /> @@ -569,6 +570,7 @@      <ClInclude Include="cuda\2d\em.h" />      <ClInclude Include="cuda\2d\fan_bp.h" />      <ClInclude Include="cuda\2d\fan_fp.h" /> +    <ClInclude Include="cuda\2d\fbp.h" />      <ClInclude Include="cuda\2d\fbp_filters.h" />      <ClInclude Include="cuda\2d\fft.h" />      <ClInclude Include="cuda\2d\par_bp.h" /> @@ -652,8 +654,8 @@      <ClInclude Include="include\astra\ParallelBeamStripKernelProjector2D.h" />      <ClInclude Include="include\astra\ParallelProjectionGeometry2D.h" />      <ClInclude Include="include\astra\ParallelProjectionGeometry3D.h" /> -    <ClInclude Include="include\astra\ParallelVecProjectionGeometry3D.h" />      <ClInclude Include="include\astra\ParallelVecProjectionGeometry2D.h" /> +    <ClInclude Include="include\astra\ParallelVecProjectionGeometry3D.h" />      <ClInclude Include="include\astra\PlatformDepSystemCode.h" />      <ClInclude Include="include\astra\PluginAlgorithm.h" />      <ClInclude Include="include\astra\ProjectionGeometry2D.h" /> @@ -727,6 +729,12 @@        <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>        <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>      </CudaCompile> +    <CudaCompile Include="cuda\2d\fbp.cu"> +      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild> +      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild> +      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild> +      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild> +    </CudaCompile>      <CudaCompile Include="cuda\2d\fft.cu">        <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>        <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild> diff --git a/astra_vc11.vcxproj.filters b/astra_vc11.vcxproj.filters index 4143cd9..d6748f1 100644 --- a/astra_vc11.vcxproj.filters +++ b/astra_vc11.vcxproj.filters @@ -25,6 +25,9 @@      <CudaCompile Include="cuda\2d\fan_fp.cu">        <Filter>CUDA\cuda source</Filter>      </CudaCompile> +    <CudaCompile Include="cuda\2d\fbp.cu"> +      <Filter>CUDA\cuda source</Filter> +    </CudaCompile>      <CudaCompile Include="cuda\2d\fft.cu">        <Filter>CUDA\cuda source</Filter>      </CudaCompile> @@ -198,6 +201,9 @@      <ClCompile Include="src\FanFlatVecProjectionGeometry2D.cpp">        <Filter>Geometries\source</Filter>      </ClCompile> +    <ClCompile Include="src\GeometryUtil2D.cpp"> +      <Filter>Geometries\source</Filter> +    </ClCompile>      <ClCompile Include="src\GeometryUtil3D.cpp">        <Filter>Geometries\source</Filter>      </ClCompile> @@ -207,6 +213,9 @@      <ClCompile Include="src\ParallelProjectionGeometry3D.cpp">        <Filter>Geometries\source</Filter>      </ClCompile> +    <ClCompile Include="src\ParallelVecProjectionGeometry2D.cpp"> +      <Filter>Geometries\source</Filter> +    </ClCompile>      <ClCompile Include="src\ParallelVecProjectionGeometry3D.cpp">        <Filter>Geometries\source</Filter>      </ClCompile> @@ -321,10 +330,6 @@      <ClCompile Include="src\CudaSirtAlgorithm3D.cpp">        <Filter>CUDA\astra source</Filter>      </ClCompile> -    <ClCompile Include="src\GeometryUtil3D.cpp" /> -    <ClCompile Include="src\ParallelVecProjectionGeometry2D.cpp"> -      <Filter>Geometries\source</Filter> -    </ClCompile>    </ItemGroup>    <ItemGroup>      <ClInclude Include="include\astra\Algorithm.h"> @@ -474,6 +479,9 @@      <ClInclude Include="include\astra\ParallelProjectionGeometry3D.h">        <Filter>Geometries\headers</Filter>      </ClInclude> +    <ClInclude Include="include\astra\ParallelVecProjectionGeometry2D.h"> +      <Filter>Geometries\headers</Filter> +    </ClInclude>      <ClInclude Include="include\astra\ParallelVecProjectionGeometry3D.h">        <Filter>Geometries\headers</Filter>      </ClInclude> @@ -615,6 +623,9 @@      <ClInclude Include="cuda\2d\fbp_filters.h">        <Filter>CUDA\cuda headers</Filter>      </ClInclude> +    <ClInclude Include="cuda\2d\fbp.h"> +      <Filter>CUDA\cuda headers</Filter> +    </ClInclude>      <ClInclude Include="cuda\2d\fft.h">        <Filter>CUDA\cuda headers</Filter>      </ClInclude> @@ -675,11 +686,6 @@      <ClInclude Include="cuda\3d\util3d.h">        <Filter>CUDA\cuda headers</Filter>      </ClInclude> -    <ClInclude Include="include\astra\GeometryUtil2D.h" /> -    <ClInclude Include="include\astra\GeometryUtil3D.h" /> -    <ClInclude Include="include\astra\ParallelVecProjectionGeometry2D.h"> -      <Filter>Geometries\headers</Filter> -    </ClInclude>    </ItemGroup>    <ItemGroup>      <None Include="include\astra\DataProjectorPolicies.inl"> diff --git a/build/linux/Makefile.in b/build/linux/Makefile.in index c72e20f..2af705f 100644 --- a/build/linux/Makefile.in +++ b/build/linux/Makefile.in @@ -139,6 +139,7 @@ BASE_OBJECTS=\  	src/Float32VolumeData3DMemory.lo \  	src/ForwardProjectionAlgorithm.lo \  	src/Fourier.lo \ +	src/GeometryUtil2D.lo \  	src/GeometryUtil3D.lo \  	src/Globals.lo \  	src/Logging.lo \ @@ -197,6 +198,7 @@ CUDA_OBJECTS=\  	cuda/2d/par_bp.lo \  	cuda/2d/fan_fp.lo \  	cuda/2d/fan_bp.lo \ +	cuda/2d/fbp.lo \  	cuda/2d/sirt.lo \  	cuda/2d/sart.lo \  	cuda/2d/cgls.lo \ diff --git a/build/msvc/gen.py b/build/msvc/gen.py index cc69a62..57f4539 100644 --- a/build/msvc/gen.py +++ b/build/msvc/gen.py @@ -156,6 +156,7 @@ P_astra["filters"]["CUDA\\cuda source"] = [  "cuda\\2d\\em.cu",  "cuda\\2d\\fan_bp.cu",  "cuda\\2d\\fan_fp.cu", +"cuda\\2d\\fbp.cu",  "cuda\\2d\\fft.cu",  "cuda\\2d\\par_bp.cu",  "cuda\\2d\\par_fp.cu", @@ -225,9 +226,11 @@ P_astra["filters"]["Geometries\\source"] = [  "src\\ConeVecProjectionGeometry3D.cpp",  "src\\FanFlatProjectionGeometry2D.cpp",  "src\\FanFlatVecProjectionGeometry2D.cpp", +"src\\GeometryUtil2D.cpp",  "src\\GeometryUtil3D.cpp",  "src\\ParallelProjectionGeometry2D.cpp",  "src\\ParallelProjectionGeometry3D.cpp", +"src\\ParallelVecProjectionGeometry2D.cpp",  "src\\ParallelVecProjectionGeometry3D.cpp",  "src\\ProjectionGeometry2D.cpp",  "src\\ProjectionGeometry3D.cpp", @@ -285,6 +288,7 @@ P_astra["filters"]["CUDA\\cuda headers"] = [  "cuda\\2d\\fan_bp.h",  "cuda\\2d\\fan_fp.h",  "cuda\\2d\\fbp_filters.h", +"cuda\\2d\\fbp.h",  "cuda\\2d\\fft.h",  "cuda\\2d\\par_bp.h",  "cuda\\2d\\par_fp.h", @@ -366,6 +370,7 @@ P_astra["filters"]["Geometries\\headers"] = [  "include\\astra\\GeometryUtil3D.h",  "include\\astra\\ParallelProjectionGeometry2D.h",  "include\\astra\\ParallelProjectionGeometry3D.h", +"include\\astra\\ParallelVecProjectionGeometry2D.h",  "include\\astra\\ParallelVecProjectionGeometry3D.h",  "include\\astra\\ProjectionGeometry2D.h",  "include\\astra\\ProjectionGeometry3D.h", diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu index dc74e51..bde4f42 100644 --- a/cuda/2d/algo.cu +++ b/cuda/2d/algo.cu @@ -35,13 +35,13 @@ $Id$  #include "fan_bp.h"  #include "util.h"  #include "arith.h" +#include "astra.h"  namespace astraCUDA {  ReconAlgo::ReconAlgo()  { -	angles = 0; -	TOffsets = 0; +	parProjs = 0;  	fanProjs = 0;  	shouldAbort = false; @@ -66,8 +66,7 @@ ReconAlgo::~ReconAlgo()  void ReconAlgo::reset()  { -	delete[] angles; -	delete[] TOffsets; +	delete[] parProjs;  	delete[] fanProjs;  	if (freeGPUMemory) { @@ -77,8 +76,7 @@ void ReconAlgo::reset()  		cudaFree(D_volumeData);  	} -	angles = 0; -	TOffsets = 0; +	parProjs = 0;  	fanProjs = 0;  	shouldAbort = false; @@ -124,46 +122,40 @@ bool ReconAlgo::enableSinogramMask()  } -bool ReconAlgo::setGeometry(const SDimensions& _dims, const float* _angles) +bool ReconAlgo::setGeometry(const astra::CVolumeGeometry2D* pVolGeom, +                            const astra::CProjectionGeometry2D* pProjGeom)  { -	dims = _dims; +	bool ok; -	angles = new float[dims.iProjAngles]; +	ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); -	memcpy(angles, _angles, sizeof(angles[0]) * dims.iProjAngles); +	if (!ok) +		return false; +	delete[] parProjs; +	parProjs = 0;  	delete[] fanProjs;  	fanProjs = 0; -	return true; -} - -bool ReconAlgo::setFanGeometry(const SDimensions& _dims, -                               const SFanProjection* _projs) -{ -	dims = _dims; -	fanProjs = new SFanProjection[dims.iProjAngles]; - -	memcpy(fanProjs, _projs, sizeof(fanProjs[0]) * dims.iProjAngles); - -	delete[] angles; -	angles = 0; +	fOutputScale = 1.0f; +	ok = convertAstraGeometry(pVolGeom, pProjGeom, parProjs, fanProjs, fOutputScale); +	if (!ok) +		return false;  	return true;  } - -bool ReconAlgo::setTOffsets(const float* _TOffsets) +bool ReconAlgo::setSuperSampling(int raysPerDet, int raysPerPixelDim)  { -	// TODO: determine if they're all zero? -	TOffsets = new float[dims.iProjAngles]; -	memcpy(TOffsets, _TOffsets, sizeof(angles[0]) * dims.iProjAngles); +	if (raysPerDet <= 0 || raysPerPixelDim <= 0) +		return false; + +	dims.iRaysPerDet = raysPerDet; +	dims.iRaysPerPixelDim = raysPerPixelDim;  	return true;  } - -  bool ReconAlgo::setVolumeMask(float* _D_maskData, unsigned int _maskPitch)  {  	assert(useVolumeMask); @@ -324,14 +316,14 @@ bool ReconAlgo::callFP(float* D_volumeData, unsigned int volumePitch,                         float* D_projData, unsigned int projPitch,                         float outputScale)  { -	if (angles) { +	if (parProjs) {  		assert(!fanProjs);  		return FP(D_volumeData, volumePitch, D_projData, projPitch, -		          dims, angles, TOffsets, outputScale); +		          dims, parProjs, fOutputScale * outputScale);  	} else {  		assert(fanProjs);  		return FanFP(D_volumeData, volumePitch, D_projData, projPitch, -		             dims, fanProjs, outputScale); +		             dims, fanProjs, fOutputScale * outputScale);  	}  } @@ -339,10 +331,10 @@ bool ReconAlgo::callBP(float* D_volumeData, unsigned int volumePitch,                         float* D_projData, unsigned int projPitch,                         float outputScale)  { -	if (angles) { +	if (parProjs) {  		assert(!fanProjs);  		return BP(D_volumeData, volumePitch, D_projData, projPitch, -		          dims, angles, TOffsets, outputScale); +		          dims, parProjs, outputScale);  	} else {  		assert(fanProjs);  		return FanBP(D_volumeData, volumePitch, D_projData, projPitch, diff --git a/cuda/2d/algo.h b/cuda/2d/algo.h index 99959c8..0b6a066 100644 --- a/cuda/2d/algo.h +++ b/cuda/2d/algo.h @@ -31,6 +31,17 @@ $Id$  #include "util.h" +namespace astra { + +class CParallelProjectionGeometry2D; +class CParallelVecProjectionGeometry2D; +class CFanFlatProjectionGeometry2D; +class CFanFlatVecProjectionGeometry2D; +class CVolumeGeometry2D; +class CProjectionGeometry2D; + +} +  namespace astraCUDA {  class _AstraExport ReconAlgo { @@ -40,11 +51,10 @@ public:  	bool setGPUIndex(int iGPUIndex); -	bool setGeometry(const SDimensions& dims, const float* angles); -	bool setFanGeometry(const SDimensions& dims, const SFanProjection* projs); +	bool setGeometry(const astra::CVolumeGeometry2D* pVolGeom, +	                 const astra::CProjectionGeometry2D* pProjGeom); -	// setTOffsets should (optionally) be called after setGeometry -	bool setTOffsets(const float* TOffsets); +	bool setSuperSampling(int raysPerDet, int raysPerPixelDim);  	void signalAbort() { shouldAbort = true; } @@ -123,9 +133,9 @@ protected:  	SDimensions dims; -	float* angles; -	float* TOffsets; +	SParProjection* parProjs;  	SFanProjection* fanProjs; +	float fOutputScale;  	volatile bool shouldAbort; diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu index c1e6566..b39e0a9 100644 --- a/cuda/2d/astra.cu +++ b/cuda/2d/astra.cu @@ -42,8 +42,10 @@ $Id$  #include <fstream>  #include <cuda.h> +#include "../../include/astra/GeometryUtil2D.h"  #include "../../include/astra/VolumeGeometry2D.h"  #include "../../include/astra/ParallelProjectionGeometry2D.h" +#include "../../include/astra/ParallelVecProjectionGeometry2D.h"  #include "../../include/astra/FanFlatProjectionGeometry2D.h"  #include "../../include/astra/FanFlatVecProjectionGeometry2D.h" @@ -64,515 +66,6 @@ enum CUDAProjectionType {  }; -class AstraFBP_internal { -public: -	SDimensions dims; -	float* angles; -	float* TOffsets; -	astraCUDA::SFanProjection* fanProjections; - -	float fOriginSourceDistance; -	float fOriginDetectorDistance; - -	float fPixelSize; - -	bool bFanBeam; -	bool bShortScan; - -	bool initialized; -	bool setStartReconstruction; - -	float* D_sinoData; -	unsigned int sinoPitch; - -	float* D_volumeData; -	unsigned int volumePitch; - -	cufftComplex * m_pDevFilter; -}; - -AstraFBP::AstraFBP() -{ -	pData = new AstraFBP_internal(); - -	pData->angles = 0; -	pData->fanProjections = 0; -	pData->TOffsets = 0; -	pData->D_sinoData = 0; -	pData->D_volumeData = 0; - -	pData->dims.iVolWidth = 0; -	pData->dims.iProjAngles = 0; -	pData->dims.fDetScale = 1.0f; -	pData->dims.iRaysPerDet = 1; -	pData->dims.iRaysPerPixelDim = 1; - -	pData->initialized = false; -	pData->setStartReconstruction = false; - -	pData->m_pDevFilter = NULL; -} - -AstraFBP::~AstraFBP() -{ -	delete[] pData->angles; -	pData->angles = 0; - -	delete[] pData->TOffsets; -	pData->TOffsets = 0; - -	delete[] pData->fanProjections; -	pData->fanProjections = 0; - -	cudaFree(pData->D_sinoData); -	pData->D_sinoData = 0; - -	cudaFree(pData->D_volumeData); -	pData->D_volumeData = 0; - -	if(pData->m_pDevFilter != NULL) -	{ -		freeComplexOnDevice(pData->m_pDevFilter); -		pData->m_pDevFilter = NULL; -	} - -	delete pData; -	pData = 0; -} - -bool AstraFBP::setReconstructionGeometry(unsigned int iVolWidth, -                                          unsigned int iVolHeight, -                                          float fPixelSize) -{ -	if (pData->initialized) -		return false; - -	pData->dims.iVolWidth = iVolWidth; -	pData->dims.iVolHeight = iVolHeight; - -	pData->fPixelSize = fPixelSize; - -	return (iVolWidth > 0 && iVolHeight > 0 && fPixelSize > 0.0f); -} - -bool AstraFBP::setProjectionGeometry(unsigned int iProjAngles, -                                      unsigned int iProjDets, -                                      const float* pfAngles, -                                      float fDetSize) -{ -	if (pData->initialized) -		return false; - -	pData->dims.iProjAngles = iProjAngles; -	pData->dims.iProjDets = iProjDets; -	pData->dims.fDetScale = fDetSize / pData->fPixelSize; - -	if (iProjAngles == 0 || iProjDets == 0 || pfAngles == 0) -		return false; - -	pData->angles = new float[iProjAngles]; -	memcpy(pData->angles, pfAngles, iProjAngles * sizeof(pfAngles[0])); - -	pData->bFanBeam = false; - -	return true; -} - -bool AstraFBP::setFanGeometry(unsigned int iProjAngles, -                              unsigned int iProjDets, -                              const astraCUDA::SFanProjection *fanProjs, -                              const float* pfAngles, -                              float fOriginSourceDistance, -                              float fOriginDetectorDistance, -                              float fDetSize, -                              bool bShortScan) -{ -	// Slightly abusing setProjectionGeometry for this... -	if (!setProjectionGeometry(iProjAngles, iProjDets, pfAngles, fDetSize)) -		return false; - -	pData->fOriginSourceDistance = fOriginSourceDistance; -	pData->fOriginDetectorDistance = fOriginDetectorDistance; - -	pData->fanProjections = new astraCUDA::SFanProjection[iProjAngles]; -	memcpy(pData->fanProjections, fanProjs, iProjAngles * sizeof(fanProjs[0])); - -	pData->bFanBeam = true; -	pData->bShortScan = bShortScan; - -	return true; -} - - -bool AstraFBP::setPixelSuperSampling(unsigned int iPixelSuperSampling) -{ -	if (pData->initialized) -		return false; - -	if (iPixelSuperSampling == 0) -		return false; - -	pData->dims.iRaysPerPixelDim = iPixelSuperSampling; - -	return true; -} - - -bool AstraFBP::setTOffsets(const float* pfTOffsets) -{ -	if (pData->initialized) -		return false; - -	if (pfTOffsets == 0) -		return false; - -	pData->TOffsets = new float[pData->dims.iProjAngles]; -	memcpy(pData->TOffsets, pfTOffsets, pData->dims.iProjAngles * sizeof(pfTOffsets[0])); - -	return true; -} - -bool AstraFBP::init(int iGPUIndex) -{ -	if (pData->initialized) -	{ -		return false; -	} - -	if (pData->dims.iProjAngles == 0 || pData->dims.iVolWidth == 0) -	{ -		return false; -	} - -	if (iGPUIndex != -1) { -		cudaSetDevice(iGPUIndex); -		cudaError_t err = cudaGetLastError(); - -		// Ignore errors caused by calling cudaSetDevice multiple times -		if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess) -		{ -			return false; -		} -	} - -	bool ok = allocateVolumeData(pData->D_volumeData, pData->volumePitch, pData->dims); -	if (!ok) -	{ -		return false; -	} - -	ok = allocateProjectionData(pData->D_sinoData, pData->sinoPitch, pData->dims); -	if (!ok) -	{ -		cudaFree(pData->D_volumeData); -		pData->D_volumeData = 0; -		return false; -	} - -	pData->initialized = true; - -	return true; -} - -bool AstraFBP::setSinogram(const float* pfSinogram, -                            unsigned int iSinogramPitch) -{ -	if (!pData->initialized) -		return false; -	if (!pfSinogram) -		return false; - -	bool ok = copySinogramToDevice(pfSinogram, iSinogramPitch, -	                               pData->dims, -	                               pData->D_sinoData, pData->sinoPitch); -	if (!ok) -		return false; - -	// rescale sinogram to adjust for pixel size -	processSino<opMul>(pData->D_sinoData, -	                       1.0f/(pData->fPixelSize*pData->fPixelSize), -	                       pData->sinoPitch, pData->dims); - -	pData->setStartReconstruction = false; - -	return true; -} - -static int calcNextPowerOfTwo(int _iValue) -{ -	int iOutput = 1; - -	while(iOutput < _iValue) -	{ -		iOutput *= 2; -	} - -	return iOutput; -} - -bool AstraFBP::run() -{ -	if (!pData->initialized) -	{ -		return false; -	} - -	zeroVolumeData(pData->D_volumeData, pData->volumePitch, pData->dims); - -	bool ok = false; - -	if (pData->bFanBeam) { -		// Call FDK_PreWeight to handle fan beam geometry. We treat -		// this as a cone beam setup of a single slice: - -		// TODO: TOffsets affects this preweighting... - -		// We create a fake cudaPitchedPtr -		cudaPitchedPtr tmp; -		tmp.ptr = pData->D_sinoData; -		tmp.pitch = pData->sinoPitch * sizeof(float); -		tmp.xsize = pData->dims.iProjDets; -		tmp.ysize = pData->dims.iProjAngles; -		// and a fake Dimensions3D -		astraCUDA3d::SDimensions3D dims3d; -		dims3d.iVolX = pData->dims.iVolWidth; -		dims3d.iVolY = pData->dims.iVolHeight; -		dims3d.iVolZ = 1; -		dims3d.iProjAngles = pData->dims.iProjAngles; -		dims3d.iProjU = pData->dims.iProjDets; -		dims3d.iProjV = 1; -		dims3d.iRaysPerDetDim = dims3d.iRaysPerVoxelDim = 1; - -		astraCUDA3d::FDK_PreWeight(tmp, pData->fOriginSourceDistance, -		              pData->fOriginDetectorDistance, 0.0f, 0.0f, -		              pData->dims.fDetScale, 1.0f, // TODO: Are these correct? -		              pData->bShortScan, dims3d, pData->angles); -	} - -	if (pData->m_pDevFilter) { - -		int iFFTRealDetCount = calcNextPowerOfTwo(2 * pData->dims.iProjDets); -		int iFFTFourDetCount = calcFFTFourSize(iFFTRealDetCount); - -		cufftComplex * pDevComplexSinogram = NULL; - -		allocateComplexOnDevice(pData->dims.iProjAngles, iFFTFourDetCount, &pDevComplexSinogram); - -		runCudaFFT(pData->dims.iProjAngles, pData->D_sinoData, pData->sinoPitch, pData->dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount, pDevComplexSinogram); - -		applyFilter(pData->dims.iProjAngles, iFFTFourDetCount, pDevComplexSinogram, pData->m_pDevFilter); - -		runCudaIFFT(pData->dims.iProjAngles, pDevComplexSinogram, pData->D_sinoData, pData->sinoPitch, pData->dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount); - -		freeComplexOnDevice(pDevComplexSinogram); - -	} - -	float fOutputScale = (M_PI / 2.0f) / (float)pData->dims.iProjAngles; - -	if (pData->bFanBeam) { -		ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections, fOutputScale); - -	} else { -		ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets, fOutputScale); -	} -	if(!ok) -	{ -		return false; -	} - -	return true; -} - -bool AstraFBP::getReconstruction(float* pfReconstruction, unsigned int iReconstructionPitch) const -{ -	if (!pData->initialized) -		return false; - -	bool ok = copyVolumeFromDevice(pfReconstruction, iReconstructionPitch, -	                               pData->dims, -	                               pData->D_volumeData, pData->volumePitch); -	if (!ok) -		return false; - -	return true; -} - -int AstraFBP::calcFourierFilterSize(int _iDetectorCount) -{ -	int iFFTRealDetCount = calcNextPowerOfTwo(2 * _iDetectorCount); -	int iFreqBinCount = calcFFTFourSize(iFFTRealDetCount); - -	// CHECKME: Matlab makes this at least 64. Do we also need to? -	return iFreqBinCount; -} - -bool AstraFBP::setFilter(E_FBPFILTER _eFilter, const float * _pfHostFilter /* = NULL */, int _iFilterWidth /* = 0 */, float _fD /* = 1.0f */, float _fFilterParameter /* = -1.0f */) -{ -	if(pData->m_pDevFilter != 0) -	{ -		freeComplexOnDevice(pData->m_pDevFilter); -		pData->m_pDevFilter = 0; -	} - -	if (_eFilter == FILTER_NONE) -		return true; // leave pData->m_pDevFilter set to 0 - - -	int iFFTRealDetCount = calcNextPowerOfTwo(2 * pData->dims.iProjDets); -	int iFreqBinCount = calcFFTFourSize(iFFTRealDetCount); - -	cufftComplex * pHostFilter = new cufftComplex[pData->dims.iProjAngles * iFreqBinCount]; -	memset(pHostFilter, 0, sizeof(cufftComplex) * pData->dims.iProjAngles * iFreqBinCount); - -	allocateComplexOnDevice(pData->dims.iProjAngles, iFreqBinCount, &(pData->m_pDevFilter)); - -	switch(_eFilter) -	{ -		case FILTER_NONE: -			// handled above -			break; -		case FILTER_RAMLAK: -		case FILTER_SHEPPLOGAN: -		case FILTER_COSINE: -		case FILTER_HAMMING: -		case FILTER_HANN: -		case FILTER_TUKEY: -		case FILTER_LANCZOS: -		case FILTER_TRIANGULAR: -		case FILTER_GAUSSIAN: -		case FILTER_BARTLETTHANN: -		case FILTER_BLACKMAN: -		case FILTER_NUTTALL: -		case FILTER_BLACKMANHARRIS: -		case FILTER_BLACKMANNUTTALL: -		case FILTER_FLATTOP: -		case FILTER_PARZEN: -		{ -			genFilter(_eFilter, _fD, pData->dims.iProjAngles, pHostFilter, iFFTRealDetCount, iFreqBinCount, _fFilterParameter); -			uploadComplexArrayToDevice(pData->dims.iProjAngles, iFreqBinCount, pHostFilter, pData->m_pDevFilter); - -			break; -		} -		case FILTER_PROJECTION: -		{ -			// make sure the offered filter has the correct size -			assert(_iFilterWidth == iFreqBinCount); - -			for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++) -			{ -				float fValue = _pfHostFilter[iFreqBinIndex]; - -				for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++) -				{ -					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue; -					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f; -				} -			} -			uploadComplexArrayToDevice(pData->dims.iProjAngles, iFreqBinCount, pHostFilter, pData->m_pDevFilter); -			break; -		} -		case FILTER_SINOGRAM: -		{ -			// make sure the offered filter has the correct size -			assert(_iFilterWidth == iFreqBinCount); - -			for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++) -			{ -				for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++) -				{ -					float fValue = _pfHostFilter[iFreqBinIndex + iProjectionIndex * _iFilterWidth]; - -					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue; -					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f; -				} -			} -			uploadComplexArrayToDevice(pData->dims.iProjAngles, iFreqBinCount, pHostFilter, pData->m_pDevFilter); -			break; -		} -		case FILTER_RPROJECTION: -		{ -			int iProjectionCount = pData->dims.iProjAngles; -			int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount; -			float * pfHostRealFilter = new float[iRealFilterElementCount]; -			memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount); - -			int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount); -			int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2; -			int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth; - -			int iFilterShiftSize = _iFilterWidth / 2; - -			for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++) -			{ -				int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount; -				float fValue = _pfHostFilter[iDetectorIndex]; - -				for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++) -				{ -					pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue; -				} -			} - -			float* pfDevRealFilter = NULL; -			cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors -			cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice); -			delete[] pfHostRealFilter; - -			runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, pData->m_pDevFilter); - -			cudaFree(pfDevRealFilter); - -			break; -		} -		case FILTER_RSINOGRAM: -		{ -			int iProjectionCount = pData->dims.iProjAngles; -			int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount; -			float* pfHostRealFilter = new float[iRealFilterElementCount]; -			memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount); - -			int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount); -			int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2; -			int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth; - -			int iFilterShiftSize = _iFilterWidth / 2; - -			for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++) -			{ -				int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount; - -				for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++) -				{ -					float fValue = _pfHostFilter[iDetectorIndex + iProjectionIndex * _iFilterWidth]; -					pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue; -				} -			} - -			float* pfDevRealFilter = NULL; -			cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors -			cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice); -			delete[] pfHostRealFilter; - -			runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, pData->m_pDevFilter); - -			cudaFree(pfDevRealFilter); - -			break; -		} -		default: -		{ -			ASTRA_ERROR("AstraFBP::setFilter: Unknown filter type requested"); -			delete [] pHostFilter; -			return false; -		} -	} - -	delete [] pHostFilter; - -	return true; -} -  BPalgo::BPalgo()  { @@ -617,18 +110,17 @@ float BPalgo::computeDiffNorm()  bool astraCudaFP(const float* pfVolume, float* pfSinogram,                   unsigned int iVolWidth, unsigned int iVolHeight,                   unsigned int iProjAngles, unsigned int iProjDets, -                 const float *pfAngles, const float *pfOffsets, -                 float fDetSize, unsigned int iDetSuperSampling, +                 const SParProjection *pAngles, +                 unsigned int iDetSuperSampling,                   float fOutputScale, int iGPUIndex)  {  	SDimensions dims; -	if (iProjAngles == 0 || iProjDets == 0 || pfAngles == 0) +	if (iProjAngles == 0 || iProjDets == 0 || pAngles == 0)  		return false;  	dims.iProjAngles = iProjAngles;  	dims.iProjDets = iProjDets; -	dims.fDetScale = fDetSize;  	if (iDetSuperSampling == 0)  		return false; @@ -678,7 +170,7 @@ bool astraCudaFP(const float* pfVolume, float* pfSinogram,  	}  	zeroProjectionData(D_sinoData, sinoPitch, dims); -	ok = FP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, pfAngles, pfOffsets, fOutputScale); +	ok = FP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, pAngles, fOutputScale);  	if (!ok) {  		cudaFree(D_volumeData);  		cudaFree(D_sinoData); @@ -713,7 +205,6 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,  	dims.iProjAngles = iProjAngles;  	dims.iProjDets = iProjDets; -	dims.fDetScale = 1.0f; // TODO?  	if (iDetSuperSampling == 0)  		return false; @@ -789,118 +280,99 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,  } -bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, -                          const CParallelProjectionGeometry2D* pProjGeom, -                          float*& detectorOffsets, float*& projectionAngles, -                          float& detSize, float& outputScale) +// adjust pProjs to normalize volume geometry +template<typename ProjectionT> +static bool convertAstraGeometry_internal(const CVolumeGeometry2D* pVolGeom, +                          unsigned int iProjectionAngleCount, +                          ProjectionT*& pProjs, +                          float& fOutputScale)  { -	assert(pVolGeom); -	assert(pProjGeom); -	assert(pProjGeom->getProjectionAngles()); - +	// TODO: Make EPS relative  	const float EPS = 0.00001f; -	int nth = pProjGeom->getProjectionAngleCount(); -  	// Check if pixels are square  	if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthY()) > EPS)  		return false; +	float dx = -(pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2; +	float dy = -(pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2; -	// Scale volume pixels to 1x1 -	detSize = pProjGeom->getDetectorWidth() / pVolGeom->getPixelLengthX(); +	float factor = 1.0f / pVolGeom->getPixelLengthX(); -	// Copy angles -	float *angles = new float[nth]; -	for (int i = 0; i < nth; ++i) -		angles[i] = pProjGeom->getProjectionAngles()[i]; -	projectionAngles = angles; - -	// Check if we need to translate -	bool offCenter = false; -	if (abs(pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) > EPS || -	    abs(pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) > EPS) -	{ -		offCenter = true; +	for (int i = 0; i < iProjectionAngleCount; ++i) { +		// CHECKME: Order of scaling and translation +		pProjs[i].translate(dx, dy); +		pProjs[i].scale(factor);  	} +	// CHECKME: Check factor +	fOutputScale *= pVolGeom->getPixelLengthX() * pVolGeom->getPixelLengthY(); -	// If there are existing detector offsets, or if we need to translate, -	// we need to return offsets -	if (offCenter) -	{ -		float* offset = new float[nth]; +	return true; +} -		for (int i = 0; i < nth; ++i) -			offset[i] = 0.0f; -		if (offCenter) { -			float dx = (pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2; -			float dy = (pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2; -			// CHECKME: Is d in pixels or in units? +bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, +                          const CParallelProjectionGeometry2D* pProjGeom, +                          SParProjection*& pProjs, +                          float& fOutputScale) +{ +	assert(pVolGeom); +	assert(pProjGeom); +	assert(pProjGeom->getProjectionAngles()); -			for (int i = 0; i < nth; ++i) { -				float d = dx * cos(angles[i]) + dy * sin(angles[i]); -				offset[i] += d; -			} -		} +	int nth = pProjGeom->getProjectionAngleCount(); -		// CHECKME: Order of scaling and translation +	pProjs = genParProjections(nth, +	                           pProjGeom->getDetectorCount(), +	                           pProjGeom->getDetectorWidth(), +	                           pProjGeom->getProjectionAngles(), 0); -		// Scale volume pixels to 1x1 -		for (int i = 0; i < nth; ++i) { -			//offset[i] /= pVolGeom->getPixelLengthX(); -			//offset[i] *= detSize; -		} +	bool ok; +	fOutputScale = 1.0f; +	ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); -		detectorOffsets = offset; -	} else { -		detectorOffsets = 0; +	if (!ok) { +		delete[] pProjs; +		pProjs = 0;  	} -	outputScale = pVolGeom->getPixelLengthX(); -	outputScale *= outputScale; - -	return true; +	return ok;  } -static void convertAstraGeometry_internal(const CVolumeGeometry2D* pVolGeom, -                          unsigned int iProjectionAngleCount, -                          astraCUDA::SFanProjection*& pProjs, -                          float& outputScale) +bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, +                          const CParallelVecProjectionGeometry2D* pProjGeom, +                          SParProjection*& pProjs, +                          float& fOutputScale)  { -	// Translate -	float dx = (pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2; -	float dy = (pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2; +	assert(pVolGeom); +	assert(pProjGeom); +	assert(pProjGeom->getProjectionVectors()); -	for (int i = 0; i < iProjectionAngleCount; ++i) { -		pProjs[i].fSrcX -= dx; -		pProjs[i].fSrcY -= dy; -		pProjs[i].fDetSX -= dx; -		pProjs[i].fDetSY -= dy; +	int nth = pProjGeom->getProjectionAngleCount(); + +	pProjs = new SParProjection[nth]; + +	for (int i = 0; i < nth; ++i) { +		pProjs[i] = pProjGeom->getProjectionVectors()[i];  	} -	// CHECKME: Order of scaling and translation +	bool ok; +	fOutputScale = 1.0f; -	// Scale -	float factor = 1.0f / pVolGeom->getPixelLengthX(); -	for (int i = 0; i < iProjectionAngleCount; ++i) { -		pProjs[i].fSrcX *= factor; -		pProjs[i].fSrcY *= factor; -		pProjs[i].fDetSX *= factor; -		pProjs[i].fDetSY *= factor; -		pProjs[i].fDetUX *= factor; -		pProjs[i].fDetUY *= factor; +	ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); +	if (!ok) { +		delete[] pProjs; +		pProjs = 0;  	} -	// CHECKME: Check factor -	outputScale = pVolGeom->getPixelLengthX(); -//	outputScale *= outputScale; +	return ok;  } +  bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,                            const CFanFlatProjectionGeometry2D* pProjGeom,                            astraCUDA::SFanProjection*& pProjs, @@ -910,6 +382,7 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,  	assert(pProjGeom);  	assert(pProjGeom->getProjectionAngles()); +	// TODO: Make EPS relative  	const float EPS = 0.00001f;  	int nth = pProjGeom->getProjectionAngleCount(); @@ -928,23 +401,9 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,  	float fDetSize = pProjGeom->getDetectorWidth();  	const float *pfAngles = pProjGeom->getProjectionAngles(); -	pProjs = new SFanProjection[nth]; - -	float fSrcX0 = 0.0f; -	float fSrcY0 = -fOriginSourceDistance; -	float fDetUX0 = fDetSize; -	float fDetUY0 = 0.0f; -	float fDetSX0 = pProjGeom->getDetectorCount() * fDetUX0 / -2.0f; -	float fDetSY0 = fOriginDetectorDistance; - -#define ROTATE0(name,i,alpha) do { pProjs[i].f##name##X = f##name##X0 * cos(alpha) - f##name##Y0 * sin(alpha); pProjs[i].f##name##Y = f##name##X0 * sin(alpha) + f##name##Y0 * cos(alpha); } while(0) -	for (int i = 0; i < nth; ++i) { -		ROTATE0(Src, i, pfAngles[i]); -		ROTATE0(DetS, i, pfAngles[i]); -		ROTATE0(DetU, i, pfAngles[i]); -	} - -#undef ROTATE0 +	pProjs = genFanProjections(nth, pProjGeom->getDetectorCount(), +                               fOriginSourceDistance, fOriginDetectorDistance, +	                           fDetSize, pfAngles);  	convertAstraGeometry_internal(pVolGeom, nth, pProjs, outputScale); @@ -961,6 +420,7 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,  	assert(pProjGeom);  	assert(pProjGeom->getProjectionVectors()); +	// TODO: Make EPS relative  	const float EPS = 0.00001f;  	int nx = pVolGeom->getGridColCount(); @@ -983,6 +443,52 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,  } +bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, +                          const CProjectionGeometry2D* pProjGeom, +                          astraCUDA::SParProjection*& pParProjs, +                          astraCUDA::SFanProjection*& pFanProjs, +                          float& outputScale) +{ +	const CParallelProjectionGeometry2D* parProjGeom = dynamic_cast<const CParallelProjectionGeometry2D*>(pProjGeom); +	const CParallelVecProjectionGeometry2D* parVecProjGeom = dynamic_cast<const CParallelVecProjectionGeometry2D*>(pProjGeom); +	const CFanFlatProjectionGeometry2D* fanProjGeom = dynamic_cast<const CFanFlatProjectionGeometry2D*>(pProjGeom); +	const CFanFlatVecProjectionGeometry2D* fanVecProjGeom = dynamic_cast<const CFanFlatVecProjectionGeometry2D*>(pProjGeom); + +	bool ok = false; + +	if (parProjGeom) { +		ok = convertAstraGeometry(pVolGeom, parProjGeom, pParProjs, outputScale); +	} else if (parVecProjGeom) { +		ok = convertAstraGeometry(pVolGeom, parVecProjGeom, pParProjs, outputScale); +	} else if (fanProjGeom) { +		ok = convertAstraGeometry(pVolGeom, fanProjGeom, pFanProjs, outputScale); +	} else if (fanVecProjGeom) { +		ok = convertAstraGeometry(pVolGeom, fanVecProjGeom, pFanProjs, outputScale); +	} else { +		ok = false; +	} + +	return ok; +} + +bool convertAstraGeometry_dims(const CVolumeGeometry2D* pVolGeom, +                               const CProjectionGeometry2D* pProjGeom, +                               SDimensions& dims) +{ +	dims.iVolWidth = pVolGeom->getGridColCount(); +	dims.iVolHeight = pVolGeom->getGridRowCount(); + +	dims.iProjAngles = pProjGeom->getProjectionAngleCount(); +	dims.iProjDets = pProjGeom->getDetectorCount(); + +	dims.iRaysPerDet = 1; +	dims.iRaysPerPixelDim = 1; + +	return true; +} + + +  } diff --git a/cuda/2d/astra.h b/cuda/2d/astra.h index 617883b..8e91feb 100644 --- a/cuda/2d/astra.h +++ b/cuda/2d/astra.h @@ -29,7 +29,6 @@ $Id$  #ifndef _CUDA_ASTRA_H  #define _CUDA_ASTRA_H -#include "fft.h"  #include "fbp_filters.h"  #include "dims.h"  #include "algo.h" @@ -43,140 +42,12 @@ enum Cuda2DProjectionKernel {  };  class CParallelProjectionGeometry2D; +class CParallelVecProjectionGeometry2D;  class CFanFlatProjectionGeometry2D;  class CFanFlatVecProjectionGeometry2D;  class CVolumeGeometry2D; +class CProjectionGeometry2D; -class AstraFBP_internal; - -class _AstraExport AstraFBP { -public: -	// Constructor -	AstraFBP(); - -	// Destructor -	~AstraFBP(); - -	// Set the size of the reconstruction rectangle. -	// Volume pixels are currently assumed to be 1x1 squares. -	bool setReconstructionGeometry(unsigned int iVolWidth, -	                               unsigned int iVolHeight, -	                               float fPixelSize = 1.0f); - -	// Set the projection angles and number of detector pixels per angle. -	// pfAngles must be a float array of length iProjAngles. -	// fDetSize indicates the size of a detector pixel compared to a -	// volume pixel edge. -	// -	// pfAngles will only be read from during this call. -	bool setProjectionGeometry(unsigned int iProjAngles, -	                           unsigned int iProjDets, -	                           const float *pfAngles, -	                           float fDetSize = 1.0f); -	// Set the projection angles and number of detector pixels per angle. -	// pfAngles must be a float array of length iProjAngles. -	// fDetSize indicates the size of a detector pixel compared to a -	// volume pixel edge. -	// -	// pfAngles, fanProjs will only be read from during this call. -	bool setFanGeometry(unsigned int iProjAngles, -	                    unsigned int iProjDets, -	                    const astraCUDA::SFanProjection *fanProjs, -	                    const float *pfAngles, -	                    float fOriginSourceDistance, -	                    float fOriginDetectorDistance, -	                    float fDetSize = 1.0f, -	                    bool bShortScan = false); - -	// Set linear supersampling factor for the BP. -	// (The number of rays is the square of this) -	// -	// This may optionally be called before init(). -	bool setPixelSuperSampling(unsigned int iPixelSuperSampling); - -	// Set per-detector shifts. -	// -	// pfTOffsets will only be read from during this call. -	bool setTOffsets(const float *pfTOffsets); - -	// Returns the required size of a filter in the fourier domain -	// when multiplying it with the fft of the projection data. -	// Its value is equal to the smallest power of two larger than -	// or equal to twice the number of detectors in the spatial domain. -	// -	// _iDetectorCount is the number of detectors in the spatial domain. -	static int calcFourierFilterSize(int _iDetectorCount); - -	// Sets the filter type. Some filter types require the user to supply an -	// array containing the filter. -	// The number of elements in a filter in the fourier domain should be equal -	// to the value returned by calcFourierFilterSize(). -	// The following types require a filter: -	// -	// - FILTER_PROJECTION: -	// The filter size should be equal to the output of -	// calcFourierFilterSize(). The filtered sinogram is -	// multiplied with the supplied filter. -	// -	// - FILTER_SINOGRAM: -	// Same as FILTER_PROJECTION, but now the filter should contain a row for -	// every projection direction. -	// -	// - FILTER_RPROJECTION: -	// The filter should now contain one kernel (= ifft of filter), with the -	// peak in the center. The filter width -	// can be any value. If odd, the peak is assumed to be in the center, if -	// even, it is assumed to be at floor(filter-width/2). -	// -	// - FILTER_RSINOGRAM -	// Same as FILTER_RPROJECTION, but now the supplied filter should contain a -	// row for every projection direction. -	// -	// A large number of other filters (FILTER_RAMLAK, FILTER_SHEPPLOGAN, -	// FILTER_COSINE, FILTER_HAMMING, and FILTER_HANN) -	// have a D variable, which gives the cutoff point in the frequency domain. -	// Setting this value to 1.0 will include the whole filter -	bool setFilter(E_FBPFILTER _eFilter, -                   const float * _pfHostFilter = NULL, -                   int _iFilterWidth = 0, float _fD = 1.0f, float _fFilterParameter = -1.0f); - -	// Initialize CUDA, allocate GPU buffers and -	// precompute geometry-specific data. -	// -	// CUDA is set up to use GPU number iGPUIndex. -	// -	// This must be called after calling setReconstructionGeometry() and -	// setProjectionGeometry(). -	bool init(int iGPUIndex = 0); - -	// Setup input sinogram for a slice. -	// pfSinogram must be a float array of size iProjAngles*iSinogramPitch. -	// NB: iSinogramPitch is measured in floats, not in bytes. -	// -	// This must be called after init(), and before iterate(). It may be -	// called again after iterate()/getReconstruction() to start a new slice. -	// -	// pfSinogram will only be read from during this call. -	bool setSinogram(const float* pfSinogram, unsigned int iSinogramPitch); - -	// Runs an FBP reconstruction. -	// This must be called after setSinogram(). -	// -	// run can be called before setFilter, but will then use the default Ram-Lak filter -	bool run(); - -	// Get the reconstructed slice. -	// pfReconstruction must be a float array of size -	// iVolHeight*iReconstructionPitch. -	// NB: iReconstructionPitch is measured in floats, not in bytes. -	// -	// This may be called after run(). -	bool getReconstruction(float* pfReconstruction, -	                       unsigned int iReconstructionPitch) const; - -private: -	AstraFBP_internal* pData; -};  class _AstraExport BPalgo : public astraCUDA::ReconAlgo {  public: @@ -199,8 +70,8 @@ public:  _AstraExport bool astraCudaFP(const float* pfVolume, float* pfSinogram,                   unsigned int iVolWidth, unsigned int iVolHeight,                   unsigned int iProjAngles, unsigned int iProjDets, -                 const float *pfAngles, const float *pfOffsets, -                 float fDetSize = 1.0f, unsigned int iDetSuperSampling = 1, +                 const SParProjection *pAngles, +                 unsigned int iDetSuperSampling = 1,                   float fOutputScale = 1.0f, int iGPUIndex = 0);  _AstraExport bool astraCudaFanFP(const float* pfVolume, float* pfSinogram, @@ -213,8 +84,14 @@ _AstraExport bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,  _AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,                      const CParallelProjectionGeometry2D* pProjGeom, -                    float*& pfDetectorOffsets, float*& pfProjectionAngles, -                    float& fDetSize, float& fOutputScale); +                    astraCUDA::SParProjection*& pProjs, +                    float& fOutputScale); + +_AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, +                    const CParallelVecProjectionGeometry2D* pProjGeom, +                    astraCUDA::SParProjection*& pProjs, +                    float& fOutputScale); +  _AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,                      const CFanFlatProjectionGeometry2D* pProjGeom, @@ -226,6 +103,15 @@ _AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,                      astraCUDA::SFanProjection*& pProjs,                      float& outputScale); +_AstraExport bool convertAstraGeometry_dims(const CVolumeGeometry2D* pVolGeom, +                          const CProjectionGeometry2D* pProjGeom, +                          astraCUDA::SDimensions& dims); + +_AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, +                          const CProjectionGeometry2D* pProjGeom, +                          astraCUDA::SParProjection*& pParProjs, +                          astraCUDA::SFanProjection*& pFanProjs, +                          float& outputScale);  }  #endif diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu index f402914..3f06581 100644 --- a/cuda/2d/cgls.cu +++ b/cuda/2d/cgls.cu @@ -207,42 +207,6 @@ float CGLS::computeDiffNorm()  	return sqrt(s);  } -bool doCGLS(float* D_volumeData, unsigned int volumePitch, -            float* D_sinoData, unsigned int sinoPitch, -            const SDimensions& dims, /*const SAugmentedData& augs,*/ -            const float* angles, const float* TOffsets, unsigned int iterations) -{ -	CGLS cgls; -	bool ok = true; - -	ok &= cgls.setGeometry(dims, angles); -#if 0 -	if (D_maskData) -		ok &= cgls.enableVolumeMask(); -#endif -	if (TOffsets) -		ok &= cgls.setTOffsets(TOffsets); - -	if (!ok) -		return false; - -	ok = cgls.init(); -	if (!ok) -		return false; - -#if 0 -	if (D_maskData) -		ok &= cgls.setVolumeMask(D_maskData, maskPitch); -#endif - -	ok &= cgls.setBuffers(D_volumeData, volumePitch, D_sinoData, sinoPitch); -	if (!ok) -		return false; - -	ok = cgls.iterate(iterations); - -	return ok; -}  } diff --git a/cuda/2d/dims.h b/cuda/2d/dims.h index e870da5..f74d0e4 100644 --- a/cuda/2d/dims.h +++ b/cuda/2d/dims.h @@ -34,9 +34,11 @@ $Id$  namespace astraCUDA { +using astra::SParProjection;  using astra::SFanProjection; +  struct SDimensions {  	// Width, height of reconstruction volume  	unsigned int iVolWidth; @@ -48,9 +50,6 @@ struct SDimensions {  	// Number of detector pixels  	unsigned int iProjDets; -	// size of detector compared to volume pixels -	float fDetScale; -  	// in FP, number of rays to trace per detector pixel.  	// This should usually be set to 1.  	// If fDetScale > 1, this should be set to an integer of roughly diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu index 8593b08..47ec548 100644 --- a/cuda/2d/em.cu +++ b/cuda/2d/em.cu @@ -170,34 +170,6 @@ float EM::computeDiffNorm()  } -bool doEM(float* D_volumeData, unsigned int volumePitch, -          float* D_sinoData, unsigned int sinoPitch, -          const SDimensions& dims, const float* angles, -          const float* TOffsets, unsigned int iterations) -{ -	EM em; -	bool ok = true; - -	ok &= em.setGeometry(dims, angles); -	if (TOffsets) -		ok &= em.setTOffsets(TOffsets); - -	if (!ok) -		return false; - -	ok = em.init(); -	if (!ok) -		return false; - -	ok &= em.setBuffers(D_volumeData, volumePitch, D_sinoData, sinoPitch); -	if (!ok) -		return false; - -	ok = em.iterate(iterations); - -	return ok; -} -  }  #ifdef STANDALONE diff --git a/cuda/2d/fbp.cu b/cuda/2d/fbp.cu new file mode 100644 index 0000000..2feac7d --- /dev/null +++ b/cuda/2d/fbp.cu @@ -0,0 +1,347 @@ +/* +----------------------------------------------------------------------- +Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp +           2014-2015, CWI, Amsterdam + +Contact: astra@uantwerpen.be +Website: http://sf.net/projects/astra-toolbox + +This file is part of the ASTRA Toolbox. + + +The ASTRA Toolbox is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +The ASTRA Toolbox is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. + +----------------------------------------------------------------------- +$Id$ +*/ + +#include "fbp.h" +#include "fft.h" +#include "par_bp.h" +#include "fan_bp.h" + +// For fan-beam preweighting +#include "../3d/fdk.h" + +#include "astra/Logging.h" + +#include <cuda.h> + +namespace astraCUDA { + + + +static int calcNextPowerOfTwo(int n) +{ +	int x = 1; +	while (x < n) +		x *= 2; + +	return x; +} + +// static +int FBP::calcFourierFilterSize(int _iDetectorCount) +{ +	int iFFTRealDetCount = calcNextPowerOfTwo(2 * _iDetectorCount); +	int iFreqBinCount = calcFFTFourierSize(iFFTRealDetCount); + +	// CHECKME: Matlab makes this at least 64. Do we also need to? +	return iFreqBinCount; +} + + + + +FBP::FBP() : ReconAlgo() +{ +	D_filter = 0; + +} + +FBP::~FBP() +{ +	reset(); +} + +void FBP::reset() +{ +	if (D_filter) { +		freeComplexOnDevice((cufftComplex *)D_filter); +		D_filter = 0; +	} +} + +bool FBP::init() +{ +	return true; +} + +bool FBP::setFilter(astra::E_FBPFILTER _eFilter, const float * _pfHostFilter /* = NULL */, int _iFilterWidth /* = 0 */, float _fD /* = 1.0f */, float _fFilterParameter /* = -1.0f */) +{ +	if (D_filter) +	{ +		freeComplexOnDevice((cufftComplex*)D_filter); +		D_filter = 0; +	} + +	if (_eFilter == astra::FILTER_NONE) +		return true; // leave D_filter set to 0 + + +	int iFFTRealDetCount = calcNextPowerOfTwo(2 * dims.iProjDets); +	int iFreqBinCount = calcFFTFourierSize(iFFTRealDetCount); + +	cufftComplex * pHostFilter = new cufftComplex[dims.iProjAngles * iFreqBinCount]; +	memset(pHostFilter, 0, sizeof(cufftComplex) * dims.iProjAngles * iFreqBinCount); + +	allocateComplexOnDevice(dims.iProjAngles, iFreqBinCount, (cufftComplex**)&D_filter); + +	switch(_eFilter) +	{ +		case astra::FILTER_NONE: +			// handled above +			break; +		case astra::FILTER_RAMLAK: +		case astra::FILTER_SHEPPLOGAN: +		case astra::FILTER_COSINE: +		case astra::FILTER_HAMMING: +		case astra::FILTER_HANN: +		case astra::FILTER_TUKEY: +		case astra::FILTER_LANCZOS: +		case astra::FILTER_TRIANGULAR: +		case astra::FILTER_GAUSSIAN: +		case astra::FILTER_BARTLETTHANN: +		case astra::FILTER_BLACKMAN: +		case astra::FILTER_NUTTALL: +		case astra::FILTER_BLACKMANHARRIS: +		case astra::FILTER_BLACKMANNUTTALL: +		case astra::FILTER_FLATTOP: +		case astra::FILTER_PARZEN: +		{ +			genFilter(_eFilter, _fD, dims.iProjAngles, pHostFilter, iFFTRealDetCount, iFreqBinCount, _fFilterParameter); +			uploadComplexArrayToDevice(dims.iProjAngles, iFreqBinCount, pHostFilter, (cufftComplex*)D_filter); + +			break; +		} +		case astra::FILTER_PROJECTION: +		{ +			// make sure the offered filter has the correct size +			assert(_iFilterWidth == iFreqBinCount); + +			for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++) +			{ +				float fValue = _pfHostFilter[iFreqBinIndex]; + +				for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++) +				{ +					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue; +					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f; +				} +			} +			uploadComplexArrayToDevice(dims.iProjAngles, iFreqBinCount, pHostFilter, (cufftComplex*)D_filter); +			break; +		} +		case astra::FILTER_SINOGRAM: +		{ +			// make sure the offered filter has the correct size +			assert(_iFilterWidth == iFreqBinCount); + +			for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++) +			{ +				for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++) +				{ +					float fValue = _pfHostFilter[iFreqBinIndex + iProjectionIndex * _iFilterWidth]; + +					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue; +					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f; +				} +			} +			uploadComplexArrayToDevice(dims.iProjAngles, iFreqBinCount, pHostFilter, (cufftComplex*)D_filter); +			break; +		} +		case astra::FILTER_RPROJECTION: +		{ +			int iProjectionCount = dims.iProjAngles; +			int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount; +			float * pfHostRealFilter = new float[iRealFilterElementCount]; +			memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount); + +			int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount); +			int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2; +			int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth; + +			int iFilterShiftSize = _iFilterWidth / 2; + +			for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++) +			{ +				int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount; +				float fValue = _pfHostFilter[iDetectorIndex]; + +				for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++) +				{ +					pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue; +				} +			} + +			float* pfDevRealFilter = NULL; +			cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors +			cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice); +			delete[] pfHostRealFilter; + +			runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, (cufftComplex*)D_filter); + +			cudaFree(pfDevRealFilter); + +			break; +		} +		case astra::FILTER_RSINOGRAM: +		{ +			int iProjectionCount = dims.iProjAngles; +			int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount; +			float* pfHostRealFilter = new float[iRealFilterElementCount]; +			memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount); + +			int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount); +			int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2; +			int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth; + +			int iFilterShiftSize = _iFilterWidth / 2; + +			for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++) +			{ +				int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount; + +				for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++) +				{ +					float fValue = _pfHostFilter[iDetectorIndex + iProjectionIndex * _iFilterWidth]; +					pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue; +				} +			} + +			float* pfDevRealFilter = NULL; +			cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors +			cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice); +			delete[] pfHostRealFilter; + +			runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, (cufftComplex*)D_filter); + +			cudaFree(pfDevRealFilter); + +			break; +		} +		default: +		{ +			ASTRA_ERROR("FBP::setFilter: Unknown filter type requested"); +			delete [] pHostFilter; +			return false; +		} +	} + +	delete [] pHostFilter; + +	return true; +} + +bool FBP::iterate(unsigned int iterations) +{ +	zeroVolumeData(D_volumeData, volumePitch, dims); + +	bool ok = false; + +	if (fanProjs) { +		// Call FDK_PreWeight to handle fan beam geometry. We treat +		// this as a cone beam setup of a single slice: + +		// TODO: TOffsets affects this preweighting... + +		// TODO: We take the fan parameters from the last projection here +		// without checking if they're the same in all projections + +		float *pfAngles = new float[dims.iProjAngles]; + +		float fOriginSource, fOriginDetector, fDetSize, fOffset; +		for (unsigned int i = 0; i < dims.iProjAngles; ++i) { +			bool ok = astra::getFanParameters(fanProjs[i], dims.iProjDets, +			                                  pfAngles[i], +			                                  fOriginSource, fOriginDetector, +			                                  fDetSize, fOffset); +			if (!ok) { +				ASTRA_ERROR("FBP_CUDA: Failed to extract circular fan beam parameters from fan beam geometry"); +				return false; +			} +		} + +		// We create a fake cudaPitchedPtr +		cudaPitchedPtr tmp; +		tmp.ptr = D_sinoData; +		tmp.pitch = sinoPitch * sizeof(float); +		tmp.xsize = dims.iProjDets; +		tmp.ysize = dims.iProjAngles; +		// and a fake Dimensions3D +		astraCUDA3d::SDimensions3D dims3d; +		dims3d.iVolX = dims.iVolWidth; +		dims3d.iVolY = dims.iVolHeight; +		dims3d.iVolZ = 1; +		dims3d.iProjAngles = dims.iProjAngles; +		dims3d.iProjU = dims.iProjDets; +		dims3d.iProjV = 1; +		dims3d.iRaysPerDetDim = dims3d.iRaysPerVoxelDim = 1; + +		astraCUDA3d::FDK_PreWeight(tmp, fOriginSource, +		              fOriginDetector, 0.0f, 0.0f, +		              fDetSize, 1.0f, +		              m_bShortScan, dims3d, pfAngles); +	} else { +		// TODO: How should different detector pixel size in different +		// projections be handled? +	} + +	if (D_filter) { + +		int iFFTRealDetCount = calcNextPowerOfTwo(2 * dims.iProjDets); +		int iFFTFourDetCount = calcFFTFourierSize(iFFTRealDetCount); + +		cufftComplex * pDevComplexSinogram = NULL; + +		allocateComplexOnDevice(dims.iProjAngles, iFFTFourDetCount, &pDevComplexSinogram); + +		runCudaFFT(dims.iProjAngles, D_sinoData, sinoPitch, dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount, pDevComplexSinogram); + +		applyFilter(dims.iProjAngles, iFFTFourDetCount, pDevComplexSinogram, (cufftComplex*)D_filter); + +		runCudaIFFT(dims.iProjAngles, pDevComplexSinogram, D_sinoData, sinoPitch, dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount); + +		freeComplexOnDevice(pDevComplexSinogram); + +	} + +	float fOutputScale = (M_PI / 2.0f) / (float)dims.iProjAngles; + +	if (fanProjs) { +		ok = FanBP_FBPWeighted(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, fanProjs, fOutputScale); + +	} else { +		ok = BP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, parProjs, fOutputScale); +	} +	if(!ok) +	{ +		return false; +	} + +	return true; +} + + +} diff --git a/cuda/2d/fbp.h b/cuda/2d/fbp.h new file mode 100644 index 0000000..8b4d64d --- /dev/null +++ b/cuda/2d/fbp.h @@ -0,0 +1,98 @@ +/* +----------------------------------------------------------------------- +Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp +           2014-2015, CWI, Amsterdam + +Contact: astra@uantwerpen.be +Website: http://sf.net/projects/astra-toolbox + +This file is part of the ASTRA Toolbox. + + +The ASTRA Toolbox is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +The ASTRA Toolbox is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. + +----------------------------------------------------------------------- +$Id$ +*/ + +#include "algo.h" +#include "fbp_filters.h" + +namespace astraCUDA { + +class _AstraExport FBP : public ReconAlgo { +public: +	FBP(); +	~FBP(); + +	virtual bool useSinogramMask() { return false; } +	virtual bool useVolumeMask() { return false; } + +	// Returns the required size of a filter in the fourier domain +	// when multiplying it with the fft of the projection data. +	// Its value is equal to the smallest power of two larger than +	// or equal to twice the number of detectors in the spatial domain. +	// +	// _iDetectorCount is the number of detectors in the spatial domain. +	static int calcFourierFilterSize(int _iDetectorCount); + +	// Sets the filter type. Some filter types require the user to supply an +	// array containing the filter. +	// The number of elements in a filter in the fourier domain should be equal +	// to the value returned by calcFourierFilterSize(). +	// The following types require a filter: +	// +	// - FILTER_PROJECTION: +	// The filter size should be equal to the output of +	// calcFourierFilterSize(). The filtered sinogram is +	// multiplied with the supplied filter. +	// +	// - FILTER_SINOGRAM: +	// Same as FILTER_PROJECTION, but now the filter should contain a row for +	// every projection direction. +	// +	// - FILTER_RPROJECTION: +	// The filter should now contain one kernel (= ifft of filter), with the +	// peak in the center. The filter width +	// can be any value. If odd, the peak is assumed to be in the center, if +	// even, it is assumed to be at floor(filter-width/2). +	// +	// - FILTER_RSINOGRAM +	// Same as FILTER_RPROJECTION, but now the supplied filter should contain a +	// row for every projection direction. +	// +	// A large number of other filters (FILTER_RAMLAK, FILTER_SHEPPLOGAN, +	// FILTER_COSINE, FILTER_HAMMING, and FILTER_HANN) +	// have a D variable, which gives the cutoff point in the frequency domain. +	// Setting this value to 1.0 will include the whole filter +	bool setFilter(astra::E_FBPFILTER _eFilter, +                   const float * _pfHostFilter = NULL, +                   int _iFilterWidth = 0, float _fD = 1.0f, float _fFilterParameter = -1.0f); + +	bool setShortScan(bool ss) { m_bShortScan = ss; return true; } + +	virtual bool init(); + +	virtual bool iterate(unsigned int iterations); + +	virtual float computeDiffNorm() { return 0.0f; } // TODO + +protected: +	void reset(); + +	void* D_filter; // cufftComplex* +	bool m_bShortScan; +}; + +} diff --git a/cuda/2d/fbp_filters.h b/cuda/2d/fbp_filters.h index b206a5d..71c7572 100644 --- a/cuda/2d/fbp_filters.h +++ b/cuda/2d/fbp_filters.h @@ -29,6 +29,8 @@ $Id$  #ifndef FBP_FILTERS_H  #define FBP_FILTERS_H +namespace astra { +  enum E_FBPFILTER  {  	FILTER_NONE,			//< no filter (regular BP) @@ -55,4 +57,6 @@ enum E_FBPFILTER  	FILTER_RSINOGRAM,		//< sinogram filter in real space  }; +} +  #endif /* FBP_FILTERS_H */ diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index 2d259a9..8a7cc10 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -64,6 +64,8 @@ using namespace astra;    } } while (0) +namespace astraCUDA { +  __global__ static void applyFilter_kernel(int _iProjectionCount,                                            int _iFreqBinCount,                                            cufftComplex * _pSinogram, @@ -276,11 +278,11 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex,  // Because the input is real, the Fourier transform is symmetric.  // CUFFT only outputs the first half (ignoring the redundant second half),  // and expects the same as input for the IFFT. -int calcFFTFourSize(int _iFFTRealSize) +int calcFFTFourierSize(int _iFFTRealSize)  { -	int iFFTFourSize = _iFFTRealSize / 2 + 1; +	int iFFTFourierSize = _iFFTRealSize / 2 + 1; -	return iFFTFourSize; +	return iFFTFourierSize;  }  void genIdenFilter(int _iProjectionCount, cufftComplex * _pFilter, @@ -695,6 +697,10 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,  	delete[] pfW;  } + +} + +  #ifdef STANDALONE  __global__ static void doubleFourierOutput_kernel(int _iProjectionCount, diff --git a/cuda/2d/fft.h b/cuda/2d/fft.h index e985fc6..b29f17a 100644 --- a/cuda/2d/fft.h +++ b/cuda/2d/fft.h @@ -34,6 +34,8 @@ $Id$  #include "fbp_filters.h" +namespace astraCUDA { +  bool allocateComplexOnDevice(int _iProjectionCount,                               int _iDetectorCount,                               cufftComplex ** _ppDevComplex); @@ -57,13 +59,15 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex,  void applyFilter(int _iProjectionCount, int _iFreqBinCount,                   cufftComplex * _pSinogram, cufftComplex * _pFilter); -int calcFFTFourSize(int _iFFTRealSize); +int calcFFTFourierSize(int _iFFTRealSize); -void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount, +void genFilter(astra::E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,                 cufftComplex * _pFilter, int _iFFTRealDetectorCount,                 int _iFFTFourierDetectorCount, float _fParameter = -1.0f);  void genIdenFilter(int _iProjectionCount, cufftComplex * _pFilter,                     int _iFFTRealDetectorCount, int _iFFTFourierDetectorCount); +} +  #endif /* FFT_H */ diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu index d9f7325..cf0a684 100644 --- a/cuda/2d/par_bp.cu +++ b/cuda/2d/par_bp.cu @@ -53,8 +53,8 @@ const unsigned int g_blockSlices = 16;  const unsigned int g_MaxAngles = 2560; -__constant__ float gC_angle_sin[g_MaxAngles]; -__constant__ float gC_angle_cos[g_MaxAngles]; +__constant__ float gC_angle_scaled_sin[g_MaxAngles]; +__constant__ float gC_angle_scaled_cos[g_MaxAngles];  __constant__ float gC_angle_offset[g_MaxAngles];  static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int width, unsigned int height, cudaTextureAddressMode mode = cudaAddressModeBorder) @@ -73,7 +73,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi  	return true;  } -__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale) +__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)  {  	const int relX = threadIdx.x;  	const int relY = threadIdx.y; @@ -87,47 +87,30 @@ __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int star  	if (X >= dims.iVolWidth || Y >= dims.iVolHeight)  		return; -	const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f ) / dims.fDetScale; -	const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f ) / dims.fDetScale; +	const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f ); +	const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f );  	float* volData = (float*)D_volData;  	float fVal = 0.0f;  	float fA = startAngle + 0.5f; -	const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f; -	if (offsets) { - -		for (int angle = startAngle; angle < endAngle; ++angle) -		{ -			const float cos_theta = gC_angle_cos[angle]; -			const float sin_theta = gC_angle_sin[angle]; -			const float TOffset = gC_angle_offset[angle]; - -			const float fT = fT_base + fX * cos_theta - fY * sin_theta + TOffset; -			fVal += tex2D(gT_projTexture, fT, fA); -			fA += 1.0f; -		} - -	} else { - -		for (int angle = startAngle; angle < endAngle; ++angle) -		{ -			const float cos_theta = gC_angle_cos[angle]; -			const float sin_theta = gC_angle_sin[angle]; - -			const float fT = fT_base + fX * cos_theta - fY * sin_theta; -			fVal += tex2D(gT_projTexture, fT, fA); -			fA += 1.0f; -		} +	for (int angle = startAngle; angle < endAngle; ++angle) +	{ +		const float scaled_cos_theta = gC_angle_scaled_cos[angle]; +		const float scaled_sin_theta = gC_angle_scaled_sin[angle]; +		const float TOffset = gC_angle_offset[angle]; +		const float fT = fX * scaled_cos_theta - fY * scaled_sin_theta + TOffset; +		fVal += tex2D(gT_projTexture, fT, fA); +		fA += 1.0f;  	}  	volData[Y*volPitch+X] += fVal * fOutputScale;  }  // supersampling version -__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale) +__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)  {  	const int relX = threadIdx.x;  	const int relY = threadIdx.y; @@ -141,61 +124,35 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s  	if (X >= dims.iVolWidth || Y >= dims.iVolHeight)  		return; -	const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim) / dims.fDetScale; -	const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim) / dims.fDetScale; +	const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim); +	const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim); -	const float fSubStep = 1.0f/(dims.iRaysPerPixelDim * dims.fDetScale); +	const float fSubStep = 1.0f/(dims.iRaysPerPixelDim); // * dims.fDetScale);  	float* volData = (float*)D_volData;  	float fVal = 0.0f;  	float fA = startAngle + 0.5f; -	const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f;  	fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim); -	if (offsets) { - -		for (int angle = startAngle; angle < endAngle; ++angle) -		{ -			const float cos_theta = gC_angle_cos[angle]; -			const float sin_theta = gC_angle_sin[angle]; -			const float TOffset = gC_angle_offset[angle]; - -			float fT = fT_base + fX * cos_theta - fY * sin_theta + TOffset; - -			for (int iSubX = 0; iSubX < dims.iRaysPerPixelDim; ++iSubX) { -				float fTy = fT; -				fT += fSubStep * cos_theta; -				for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) { -					fVal += tex2D(gT_projTexture, fTy, fA); -					fTy -= fSubStep * sin_theta; -				} -			} -			fA += 1.0f; -		} - -	} else { +	for (int angle = startAngle; angle < endAngle; ++angle) +	{ +		const float cos_theta = gC_angle_scaled_cos[angle]; +		const float sin_theta = gC_angle_scaled_sin[angle]; +		const float TOffset = gC_angle_offset[angle]; -		for (int angle = startAngle; angle < endAngle; ++angle) -		{ -			const float cos_theta = gC_angle_cos[angle]; -			const float sin_theta = gC_angle_sin[angle]; +		float fT = fX * cos_theta - fY * sin_theta + TOffset; -			float fT = fT_base + fX * cos_theta - fY * sin_theta; - -			for (int iSubX = 0; iSubX < dims.iRaysPerPixelDim; ++iSubX) { -				float fTy = fT; -				fT += fSubStep * cos_theta; -				for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) { -					fVal += tex2D(gT_projTexture, fTy, fA); -					fTy -= fSubStep * sin_theta; -				} +		for (int iSubX = 0; iSubX < dims.iRaysPerPixelDim; ++iSubX) { +			float fTy = fT; +			fT += fSubStep * cos_theta; +			for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) { +				fVal += tex2D(gT_projTexture, fTy, fA); +				fTy -= fSubStep * sin_theta;  			} -			fA += 1.0f; -  		} - +		fA += 1.0f;  	}  	volData[Y*volPitch+X] += fVal * fOutputScale; @@ -212,12 +169,10 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset  	if (X >= dims.iVolWidth || Y >= dims.iVolHeight)  		return; -	const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f ) / dims.fDetScale; -	const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f ) / dims.fDetScale; - -	const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f; +	const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f ); +	const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f ); -	const float fT = fT_base + fX * angle_cos - fY * angle_sin + offset; +	const float fT = fX * angle_cos - fY * angle_sin + offset;  	const float fVal = tex2D(gT_projTexture, fT, 0.5f);  	D_volData[Y*volPitch+X] += fVal * fOutputScale; @@ -226,32 +181,35 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset  bool BP_internal(float* D_volumeData, unsigned int volumePitch,          float* D_projData, unsigned int projPitch, -        const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale) +        const SDimensions& dims, const SParProjection* angles, +        float fOutputScale)  { -	// TODO: process angles block by block  	assert(dims.iProjAngles <= g_MaxAngles); -	float* angle_sin = new float[dims.iProjAngles]; -	float* angle_cos = new float[dims.iProjAngles]; +	float* angle_scaled_sin = new float[dims.iProjAngles]; +	float* angle_scaled_cos = new float[dims.iProjAngles]; +	float* angle_offset = new float[dims.iProjAngles];  	bindProjDataTexture(D_projData, projPitch, dims.iProjDets, dims.iProjAngles);  	for (unsigned int i = 0; i < dims.iProjAngles; ++i) { -		angle_sin[i] = sinf(angles[i]); -		angle_cos[i] = cosf(angles[i]); +		double d = angles[i].fDetUX * angles[i].fRayY - angles[i].fDetUY * angles[i].fRayX; +		angle_scaled_cos[i] = angles[i].fRayY / d; +		angle_scaled_sin[i] = -angles[i].fRayX / d; // TODO: Check signs +		angle_offset[i] = (angles[i].fDetSY * angles[i].fRayX - angles[i].fDetSX * angles[i].fRayY) / d;  	} -	cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_sin, angle_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); -	cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_cos, angle_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); + +	cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_scaled_sin, angle_scaled_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); +	cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_scaled_cos, angle_scaled_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); +	cudaError_t e3 = cudaMemcpyToSymbol(gC_angle_offset, angle_offset, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);  	assert(e1 == cudaSuccess);  	assert(e2 == cudaSuccess); +	assert(e3 == cudaSuccess); -	if (TOffsets) { -		cudaError_t e3 = cudaMemcpyToSymbol(gC_angle_offset, TOffsets, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); -		assert(e3 == cudaSuccess); -	} -	delete[] angle_sin; -	delete[] angle_cos; +	delete[] angle_scaled_sin; +	delete[] angle_scaled_cos; +	delete[] angle_offset;  	dim3 dimBlock(g_blockSlices, g_blockSliceSize);  	dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices, @@ -263,9 +221,9 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,  	for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {  		if (dims.iRaysPerPixelDim > 1) -			devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale); +			devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);  		else -			devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale); +			devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);  	}  	cudaThreadSynchronize(); @@ -278,7 +236,7 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,  bool BP(float* D_volumeData, unsigned int volumePitch,          float* D_projData, unsigned int projPitch, -        const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale) +        const SDimensions& dims, const SParProjection* angles, float fOutputScale)  {  	for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {  		SDimensions subdims = dims; @@ -290,9 +248,7 @@ bool BP(float* D_volumeData, unsigned int volumePitch,  		bool ret;  		ret = BP_internal(D_volumeData, volumePitch,  		                  D_projData + iAngle * projPitch, projPitch, -		                  subdims, angles + iAngle, -		                  TOffsets ? TOffsets + iAngle : 0, -		                  fOutputScale); +		                  subdims, angles + iAngle, fOutputScale);  		if (!ret)  			return false;  	} @@ -303,25 +259,23 @@ bool BP(float* D_volumeData, unsigned int volumePitch,  bool BP_SART(float* D_volumeData, unsigned int volumePitch,               float* D_projData, unsigned int projPitch,               unsigned int angle, const SDimensions& dims, -             const float* angles, const float* TOffsets, float fOutputScale) +             const SParProjection* angles, float fOutputScale)  {  	// Only one angle.  	// We need to Clamp to the border pixels instead of to zero, because  	// SART weights with ray length.  	bindProjDataTexture(D_projData, projPitch, dims.iProjDets, 1, cudaAddressModeClamp); -	float angle_sin = sinf(angles[angle]); -	float angle_cos = cosf(angles[angle]); - -	float offset = 0.0f; -	if (TOffsets) -		offset = TOffsets[angle]; +	double d = angles[angle].fDetUX * angles[angle].fRayY - angles[angle].fDetUY * angles[angle].fRayX; +	float angle_scaled_cos = angles[angle].fRayY / d; +	float angle_scaled_sin = -angles[angle].fRayX / d; // TODO: Check signs +	float angle_offset = (angles[angle].fDetSY * angles[angle].fRayX - angles[angle].fDetSX * angles[angle].fRayY) / d;  	dim3 dimBlock(g_blockSlices, g_blockSliceSize);  	dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,  	             (dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize); -	devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims, fOutputScale); +	devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, angle_offset, angle_scaled_sin, angle_scaled_cos, dims, fOutputScale);  	cudaThreadSynchronize();  	cudaTextForceKernelsCompletion(); diff --git a/cuda/2d/par_bp.h b/cuda/2d/par_bp.h index 64bcd34..102cb2b 100644 --- a/cuda/2d/par_bp.h +++ b/cuda/2d/par_bp.h @@ -35,13 +35,13 @@ namespace astraCUDA {  _AstraExport bool BP(float* D_volumeData, unsigned int volumePitch,          float* D_projData, unsigned int projPitch, -        const SDimensions& dims, const float* angles, -        const float* TOffsets, float fOutputScale); +        const SDimensions& dims, const SParProjection* angles, +        float fOutputScale);  _AstraExport bool BP_SART(float* D_volumeData, unsigned int volumePitch,               float* D_projData, unsigned int projPitch,               unsigned int angle, const SDimensions& dims, -             const float* angles, const float* TOffsets, float fOutputScale); +             const SParProjection* angles, float fOutputScale);  } diff --git a/cuda/2d/par_fp.cu b/cuda/2d/par_fp.cu index bb8b909..3c010b3 100644 --- a/cuda/2d/par_fp.cu +++ b/cuda/2d/par_fp.cu @@ -30,6 +30,7 @@ $Id$  #include <cassert>  #include <iostream>  #include <list> +#include <cmath>  #include "util.h"  #include "arith.h" @@ -51,6 +52,7 @@ namespace astraCUDA {  static const unsigned g_MaxAngles = 2560;  __constant__ float gC_angle[g_MaxAngles];  __constant__ float gC_angle_offset[g_MaxAngles]; +__constant__ float gC_angle_detsize[g_MaxAngles];  // optimization parameters @@ -63,10 +65,6 @@ static const unsigned int g_blockSlices = 64;  #define iPREC_FACTOR 16 -// if necessary, a buffer of zeroes of size g_MaxAngles -static float* g_pfZeroes = 0; - -  static bool bindVolumeDataTexture(float* data, cudaArray*& dataArray, unsigned int pitch, unsigned int width, unsigned int height)  {  	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>(); @@ -89,9 +87,10 @@ static bool bindVolumeDataTexture(float* data, cudaArray*& dataArray, unsigned i  	return true;  } +  // projection for angles that are roughly horizontal -// theta between 45 and 135 degrees -__global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, int regionOffset, const SDimensions dims, float outputScale) +// (detector roughly vertical) +__global__ void FPhorizontal_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale)  {  	const int relDet = threadIdx.x;  	const int relAngle = threadIdx.y; @@ -106,33 +105,7 @@ __global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned  	const float sin_theta = __sinf(theta);  	// compute start detector for this block/angle: -	// (The same for all threadIdx.x) -	// ------------------------------------- - -	const int midSlice = startSlice + g_blockSlices / 2; - -	// ASSUMPTION: fDetScale >= 1.0f -	// problem: detector regions get skipped because slice blocks aren't large -	// enough -	const unsigned int g_blockSliceSize = g_detBlockSize; - -	// project (midSlice,midRegion) on this thread's detector - -	const float fBase = 0.5f*dims.iProjDets - 0.5f + -		( -		    (midSlice - 0.5f*dims.iVolWidth + 0.5f) * cos_theta -		  - (g_blockSliceSize/2 - 0.5f*dims.iVolHeight + 0.5f) * sin_theta -		  + gC_angle_offset[angle] -		) / dims.fDetScale; -	int iBase = (int)floorf(fBase * fPREC_FACTOR); -	int iInc = (int)floorf(g_blockSliceSize * sin_theta / dims.fDetScale * -fPREC_FACTOR); - -	// ASSUMPTION: 16 > regionOffset / fDetScale -	const int detRegion = (iBase + (blockIdx.y - regionOffset)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16; -	const int detPrevRegion = (iBase + (blockIdx.y - regionOffset - 1)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16; - -	if (blockIdx.y > 0 && detRegion == detPrevRegion) -		return; +	const int detRegion = blockIdx.y;  	const int detector = detRegion * g_detBlockSize + relDet; @@ -142,7 +115,7 @@ __global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned  	if (detector < 0 || detector >= dims.iProjDets)  		return; -	const float fDetStep = -dims.fDetScale / sin_theta; +	const float fDetStep = -gC_angle_detsize[angle] / sin_theta;  	float fSliceStep = cos_theta / sin_theta;  	float fDistCorr;  	if (sin_theta > 0.0f) @@ -192,9 +165,10 @@ __global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned  	D_projData[angle*projPitch+detector] += fVal * fDistCorr;  } +  // projection for angles that are roughly vertical -// theta between 0 and 45, or 135 and 180 degrees -__global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, int regionOffset, const SDimensions dims, float outputScale) +// (detector roughly horizontal) +__global__ void FPvertical_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale)  {  	const int relDet = threadIdx.x;  	const int relAngle = threadIdx.y; @@ -209,33 +183,7 @@ __global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned i  	const float sin_theta = __sinf(theta);  	// compute start detector for this block/angle: -	// (The same for all threadIdx.x) -	// ------------------------------------- - -	const int midSlice = startSlice + g_blockSlices / 2; - -	// project (midSlice,midRegion) on this thread's detector - -	// ASSUMPTION: fDetScale >= 1.0f -	// problem: detector regions get skipped because slice blocks aren't large -	// enough -	const unsigned int g_blockSliceSize = g_detBlockSize; - -	const float fBase = 0.5f*dims.iProjDets - 0.5f + -		( -		    (g_blockSliceSize/2 - 0.5f*dims.iVolWidth + 0.5f) * cos_theta -		  - (midSlice - 0.5f*dims.iVolHeight + 0.5f) * sin_theta -		  + gC_angle_offset[angle] -		) / dims.fDetScale; -	int iBase = (int)floorf(fBase * fPREC_FACTOR); -	int iInc = (int)floorf(g_blockSliceSize * cos_theta / dims.fDetScale * fPREC_FACTOR); - -	// ASSUMPTION: 16 > regionOffset / fDetScale -	const int detRegion = (iBase + (blockIdx.y - regionOffset)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16; -	const int detPrevRegion = (iBase + (blockIdx.y - regionOffset-1)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16; - -	if (blockIdx.y > 0 && detRegion == detPrevRegion) -		return; +	const int detRegion = blockIdx.y;  	const int detector = detRegion * g_detBlockSize + relDet; @@ -245,7 +193,7 @@ __global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned i  	if (detector < 0 || detector >= dims.iProjDets)  		return; -	const float fDetStep = dims.fDetScale / cos_theta; +	const float fDetStep = gC_angle_detsize[angle] / cos_theta;  	float fSliceStep = sin_theta / cos_theta;  	float fDistCorr;  	if (cos_theta < 0.0f) @@ -293,183 +241,67 @@ __global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned i  	D_projData[angle*projPitch+detector] += fVal * fDistCorr;  } -// projection for angles that are roughly horizontal -// (detector roughly vertical) -__global__ void FPhorizontal_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale) -{ -	const int relDet = threadIdx.x; -	const int relAngle = threadIdx.y; - -	int angle = startAngle + blockIdx.x * g_anglesPerBlock + relAngle; - -	if (angle >= endAngle) -		return; - -	const float theta = gC_angle[angle]; -	const float cos_theta = __cosf(theta); -	const float sin_theta = __sinf(theta); - -	// compute start detector for this block/angle: -	const int detRegion = blockIdx.y; - -	const int detector = detRegion * g_detBlockSize + relDet; - -	// Now project the part of the ray to angle,detector through -	// slices startSlice to startSlice+g_blockSlices-1 - -	if (detector < 0 || detector >= dims.iProjDets) -		return; - -	const float fDetStep = -dims.fDetScale / sin_theta; -	float fSliceStep = cos_theta / sin_theta; -	float fDistCorr; -	if (sin_theta > 0.0f) -		fDistCorr = -fDetStep; -	else -		fDistCorr = fDetStep; -	fDistCorr *= outputScale; - -	float fVal = 0.0f; -	// project detector on slice -	float fP = (detector - 0.5f*dims.iProjDets + 0.5f - gC_angle_offset[angle]) * fDetStep + (startSlice - 0.5f*dims.iVolWidth + 0.5f) * fSliceStep + 0.5f*dims.iVolHeight - 0.5f + 0.5f; -	float fS = startSlice + 0.5f; -	int endSlice = startSlice + g_blockSlices; -	if (endSlice > dims.iVolWidth) -		endSlice = dims.iVolWidth; -	if (dims.iRaysPerDet > 1) { -		fP += (-0.5f*dims.iRaysPerDet + 0.5f)/dims.iRaysPerDet * fDetStep; -		const float fSubDetStep = fDetStep / dims.iRaysPerDet; -		fDistCorr /= dims.iRaysPerDet; -		fSliceStep -= dims.iRaysPerDet * fSubDetStep; +// Coordinates of center of detector pixel number t: +// x = (t - 0.5*nDets + 0.5 - fOffset) * fSize * cos(fAngle) +// y = - (t - 0.5*nDets + 0.5 - fOffset) * fSize * sin(fAngle) -		for (int slice = startSlice; slice < endSlice; ++slice) -		{ -			for (int iSubT = 0; iSubT < dims.iRaysPerDet; ++iSubT) { -				fVal += tex2D(gT_volumeTexture, fS, fP); -				fP += fSubDetStep; -			} -			fP += fSliceStep; -			fS += 1.0f; -		} - -	} else { -		for (int slice = startSlice; slice < endSlice; ++slice) -		{ -			fVal += tex2D(gT_volumeTexture, fS, fP); -			fP += fSliceStep; -			fS += 1.0f; -		} - - -	} - -	D_projData[angle*projPitch+detector] += fVal * fDistCorr; -} - - -// projection for angles that are roughly vertical -// (detector roughly horizontal) -__global__ void FPvertical_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale) +static void convertAndUploadAngles(const SParProjection *projs, unsigned int nth, unsigned int ndets)  { -	const int relDet = threadIdx.x; -	const int relAngle = threadIdx.y; - -	int angle = startAngle + blockIdx.x * g_anglesPerBlock + relAngle; - -	if (angle >= endAngle) -		return; - -	const float theta = gC_angle[angle]; -	const float cos_theta = __cosf(theta); -	const float sin_theta = __sinf(theta); - -	// compute start detector for this block/angle: -	const int detRegion = blockIdx.y; - -	const int detector = detRegion * g_detBlockSize + relDet; +	float *angles = new float[nth]; +	float *offset = new float[nth]; +	float *detsizes = new float[nth]; -	// Now project the part of the ray to angle,detector through -	// slices startSlice to startSlice+g_blockSlices-1 +	for (int i = 0; i < nth; ++i) { -	if (detector < 0 || detector >= dims.iProjDets) -		return; +#warning TODO: Replace by getParParamaters -	const float fDetStep = dims.fDetScale / cos_theta; -	float fSliceStep = sin_theta / cos_theta; -	float fDistCorr; -	if (cos_theta < 0.0f) -		fDistCorr = -fDetStep; -	else -		fDistCorr = fDetStep; -	fDistCorr *= outputScale; +		// Take part of DetU orthogonal to Ray +		double ux = projs[i].fDetUX; +		double uy = projs[i].fDetUY; -	float fVal = 0.0f; -	float fP = (detector - 0.5f*dims.iProjDets + 0.5f - gC_angle_offset[angle]) * fDetStep + (startSlice - 0.5f*dims.iVolHeight + 0.5f) * fSliceStep + 0.5f*dims.iVolWidth - 0.5f + 0.5f; -	float fS = startSlice+0.5f; -	int endSlice = startSlice + g_blockSlices; -	if (endSlice > dims.iVolHeight) -		endSlice = dims.iVolHeight; +		double t = (ux * projs[i].fRayX + uy * projs[i].fRayY) / (projs[i].fRayX * projs[i].fRayX + projs[i].fRayY * projs[i].fRayY); -	if (dims.iRaysPerDet > 1) { +		ux -= t * projs[i].fRayX; +		uy -= t * projs[i].fRayY; -		fP += (-0.5f*dims.iRaysPerDet + 0.5f)/dims.iRaysPerDet * fDetStep; -		const float fSubDetStep = fDetStep / dims.iRaysPerDet; -		fDistCorr /= dims.iRaysPerDet; +		double angle = atan2(uy, ux); -		fSliceStep -= dims.iRaysPerDet * fSubDetStep; +		angles[i] = angle; -		for (int slice = startSlice; slice < endSlice; ++slice) -		{ -			for (int iSubT = 0; iSubT < dims.iRaysPerDet; ++iSubT) { -				fVal += tex2D(gT_volumeTexture, fP, fS); -				fP += fSubDetStep; -			} -			fP += fSliceStep; -			fS += 1.0f; -		} +		double norm2 = uy * uy + ux * ux; -	} else { - -		for (int slice = startSlice; slice < endSlice; ++slice) -		{ -			fVal += tex2D(gT_volumeTexture, fP, fS); -			fP += fSliceStep; -			fS += 1.0f; -		} +		detsizes[i] = sqrt(norm2); +		// CHECKME: SIGNS? +		offset[i] = -0.5*ndets - (projs[i].fDetSY*uy + projs[i].fDetSX*ux) / norm2;  	} -	D_projData[angle*projPitch+detector] += fVal * fDistCorr; +	cudaMemcpyToSymbol(gC_angle, angles, nth*sizeof(float), 0, cudaMemcpyHostToDevice);  +	cudaMemcpyToSymbol(gC_angle_offset, offset, nth*sizeof(float), 0, cudaMemcpyHostToDevice);  +	cudaMemcpyToSymbol(gC_angle_detsize, detsizes, nth*sizeof(float), 0, cudaMemcpyHostToDevice);   }  bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch,                 float* D_projData, unsigned int projPitch, -               const SDimensions& dims, const float* angles, -               const float* TOffsets, float outputScale) +               const SDimensions& dims, const SParProjection* angles, +               float outputScale)  { -	// TODO: load angles into constant memory in smaller blocks  	assert(dims.iProjAngles <= g_MaxAngles); +	assert(angles); +  	cudaArray* D_dataArray;  	bindVolumeDataTexture(D_volumeData, D_dataArray, volumePitch, dims.iVolWidth, dims.iVolHeight); -	cudaMemcpyToSymbol(gC_angle, angles, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); -	if (TOffsets) { -		cudaMemcpyToSymbol(gC_angle_offset, TOffsets, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); -	} else { -		if (!g_pfZeroes) { -			g_pfZeroes = new float[g_MaxAngles]; -			memset(g_pfZeroes, 0, g_MaxAngles * sizeof(float)); -		} -		cudaMemcpyToSymbol(gC_angle_offset, g_pfZeroes, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); -	} +	convertAndUploadAngles(angles, dims.iProjAngles, dims.iProjDets); +  	dim3 dimBlock(g_detBlockSize, g_anglesPerBlock); // detector block size, angles @@ -492,7 +324,7 @@ bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch,  		// Maybe we should detect corner cases and put them in the optimal  		// group of angles.  		if (a != dims.iProjAngles) -			vertical = (fabsf(sinf(angles[a])) <= fabsf(cosf(angles[a]))); +			vertical = (fabsf(angles[a].fRayX) <= fabsf(angles[a].fRayY));  		if (a == dims.iProjAngles || vertical != blockVertical) {  			// block done @@ -536,8 +368,8 @@ bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch,  bool FP_simple(float* D_volumeData, unsigned int volumePitch,                 float* D_projData, unsigned int projPitch, -               const SDimensions& dims, const float* angles, -               const float* TOffsets, float outputScale) +               const SDimensions& dims, const SParProjection* angles, +               float outputScale)  {  	for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {  		SDimensions subdims = dims; @@ -550,7 +382,7 @@ bool FP_simple(float* D_volumeData, unsigned int volumePitch,  		ret = FP_simple_internal(D_volumeData, volumePitch,  		                         D_projData + iAngle * projPitch, projPitch,  		                         subdims, angles + iAngle, -		                         TOffsets ? TOffsets + iAngle : 0, outputScale); +		                         outputScale);  		if (!ret)  			return false;  	} @@ -559,106 +391,12 @@ bool FP_simple(float* D_volumeData, unsigned int volumePitch,  bool FP(float* D_volumeData, unsigned int volumePitch,          float* D_projData, unsigned int projPitch, -        const SDimensions& dims, const float* angles, -        const float* TOffsets, float outputScale) +        const SDimensions& dims, const SParProjection* angles, +        float outputScale)  {  	return FP_simple(D_volumeData, volumePitch, D_projData, projPitch, -	                 dims, angles, TOffsets, outputScale); - -	// TODO: Fix bug in this non-simple FP with large detscale and TOffsets -#if 0 - -	// TODO: load angles into constant memory in smaller blocks -	assert(dims.iProjAngles <= g_MaxAngles); - -	// TODO: compute region size dynamically to resolve these two assumptions -	// ASSUMPTION: 16 > regionOffset / fDetScale -	const unsigned int g_blockSliceSize = g_detBlockSize; -	assert(16 > (g_blockSlices / g_blockSliceSize) / dims.fDetScale); -	// ASSUMPTION: fDetScale >= 1.0f -	assert(dims.fDetScale > 0.9999f); - -	cudaArray* D_dataArray; -	bindVolumeDataTexture(D_volumeData, D_dataArray, volumePitch, dims.iVolWidth, dims.iVolHeight); - -	cudaMemcpyToSymbol(gC_angle, angles, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); - -	if (TOffsets) { -		cudaMemcpyToSymbol(gC_angle_offset, TOffsets, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); -	} else { -		if (!g_pfZeroes) { -			g_pfZeroes = new float[g_MaxAngles]; -			memset(g_pfZeroes, 0, g_MaxAngles * sizeof(float)); -		} -		cudaMemcpyToSymbol(gC_angle_offset, g_pfZeroes, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); -	} - -	int regionOffset = g_blockSlices / g_blockSliceSize; - -	dim3 dimBlock(g_detBlockSize, g_anglesPerBlock); // region size, angles - -	std::list<cudaStream_t> streams; - +	                 dims, angles, outputScale); -	// Run over all angles, grouping them into groups of the same -	// orientation (roughly horizontal vs. roughly vertical). -	// Start a stream of grids for each such group. - -	// TODO: Check if it's worth it to store this info instead -	// of recomputing it every FP. - -	unsigned int blockStart = 0; -	unsigned int blockEnd = 0; -	bool blockVertical = false; -	for (unsigned int a = 0; a <= dims.iProjAngles; ++a) { -		bool vertical; -		// TODO: Having <= instead of < below causes a 5% speedup. -		// Maybe we should detect corner cases and put them in the optimal -		// group of angles. -		if (a != dims.iProjAngles) -			vertical = (fabsf(sinf(angles[a])) <= fabsf(cosf(angles[a]))); -		if (a == dims.iProjAngles || vertical != blockVertical) { -			// block done - -			blockEnd = a; -			if (blockStart != blockEnd) { -				unsigned int length = dims.iVolHeight; -				if (blockVertical) -					length = dims.iVolWidth; -				dim3 dimGrid((blockEnd-blockStart+g_anglesPerBlock-1)/g_anglesPerBlock, -				             (length+g_blockSliceSize-1)/g_blockSliceSize+2*regionOffset); // angle blocks, regions -				// TODO: check if we can't immediately -				//       destroy the stream after use -				cudaStream_t stream; -				cudaStreamCreate(&stream); -				streams.push_back(stream); -				//printf("angle block: %d to %d, %d\n", blockStart, blockEnd, blockVertical); -				if (!blockVertical) -					for (unsigned int i = 0; i < dims.iVolWidth; i += g_blockSlices) -						FPhorizontal<<<dimGrid, dimBlock, 0, stream>>>(D_projData, projPitch, i, blockStart, blockEnd, regionOffset, dims, outputScale); -				else -					for (unsigned int i = 0; i < dims.iVolHeight; i += g_blockSlices) -						FPvertical<<<dimGrid, dimBlock, 0, stream>>>(D_projData, projPitch, i, blockStart, blockEnd, regionOffset, dims, outputScale); -			} -			blockVertical = vertical; -			blockStart = a; -		} -	} - -	for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter) -		cudaStreamDestroy(*iter); - -	streams.clear(); - -	cudaThreadSynchronize(); - -	cudaTextForceKernelsCompletion(); - -	cudaFreeArray(D_dataArray); -		 - -	return true; -#endif  } diff --git a/cuda/2d/par_fp.h b/cuda/2d/par_fp.h index 010d064..5fd593c 100644 --- a/cuda/2d/par_fp.h +++ b/cuda/2d/par_fp.h @@ -33,8 +33,8 @@ namespace astraCUDA {  _AstraExport bool FP(float* D_volumeData, unsigned int volumePitch,          float* D_projData, unsigned int projPitch, -        const SDimensions& dims, const float* angles, -        const float* TOffsets, float fOutputScale); +        const SDimensions& dims, const SParProjection* angles, +        float fOutputScale);  } diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu index c8608a3..a1085cd 100644 --- a/cuda/2d/sart.cu +++ b/cuda/2d/sart.cu @@ -254,10 +254,10 @@ bool SART::callFP_SART(float* D_volumeData, unsigned int volumePitch,  {  	SDimensions d = dims;  	d.iProjAngles = 1; -	if (angles) { +	if (parProjs) {  		assert(!fanProjs);  		return FP(D_volumeData, volumePitch, D_projData, projPitch, -		          d, &angles[angle], TOffsets, outputScale); +		          d, &parProjs[angle], outputScale);  	} else {  		assert(fanProjs);  		return FanFP(D_volumeData, volumePitch, D_projData, projPitch, @@ -269,10 +269,10 @@ bool SART::callBP_SART(float* D_volumeData, unsigned int volumePitch,                         float* D_projData, unsigned int projPitch,                         unsigned int angle, float outputScale)  { -	if (angles) { +	if (parProjs) {  		assert(!fanProjs);  		return BP_SART(D_volumeData, volumePitch, D_projData, projPitch, -		               angle, dims, angles, TOffsets, outputScale); +		               angle, dims, parProjs, outputScale);  	} else {  		assert(fanProjs);  		return FanBP_SART(D_volumeData, volumePitch, D_projData, projPitch, diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu index 4baaccb..9dc4f64 100644 --- a/cuda/2d/sirt.cu +++ b/cuda/2d/sirt.cu @@ -152,7 +152,7 @@ bool SIRT::precomputeWeights()  bool SIRT::doSlabCorrections()  {  	// This function compensates for effectively infinitely large slab-like -	// objects of finite thickness 1. +	// objects of finite thickness 1 in a parallel beam geometry.  	// Each ray through the object has an intersection of length d/cos(alpha).  	// The length of the ray actually intersecting the reconstruction volume is @@ -170,6 +170,10 @@ bool SIRT::doSlabCorrections()  	if (useVolumeMask || useSinogramMask)  		return false; +	// Parallel-beam only +	if (!parProjs) +		return false; +  	// multiply by line weights  	processSino<opDiv>(D_sinoData, D_lineWeight, projPitch, dims); @@ -181,7 +185,10 @@ bool SIRT::doSlabCorrections()  	float bound = cosf(1.3963f);  	float* t = (float*)D_sinoData;  	for (int i = 0; i < dims.iProjAngles; ++i) { -		float f = fabs(cosf(angles[i])); +		// TODO: Checkme +		// TODO: Replace by getParParameters +		double angle = atan2(parProjs[i].fRayX, -parProjs[i].fRayY); +		float f = fabs(cosf(angle));  		if (f < bound)  			f = bound; @@ -189,7 +196,6 @@ bool SIRT::doSlabCorrections()  		processSino<opMul>(t, f, sinoPitch, subdims);  		t += sinoPitch;  	} -  	return true;  } @@ -299,40 +305,6 @@ float SIRT::computeDiffNorm()  } -bool doSIRT(float* D_volumeData, unsigned int volumePitch, -            float* D_sinoData, unsigned int sinoPitch, -            float* D_maskData, unsigned int maskPitch, -            const SDimensions& dims, const float* angles, -            const float* TOffsets, unsigned int iterations) -{ -	SIRT sirt; -	bool ok = true; - -	ok &= sirt.setGeometry(dims, angles); -	if (D_maskData) -		ok &= sirt.enableVolumeMask(); -	if (TOffsets) -		ok &= sirt.setTOffsets(TOffsets); - -	if (!ok) -		return false; - -	ok = sirt.init(); -	if (!ok) -		return false; - -	if (D_maskData) -		ok &= sirt.setVolumeMask(D_maskData, maskPitch); - -	ok &= sirt.setBuffers(D_volumeData, volumePitch, D_sinoData, sinoPitch); -	if (!ok) -		return false; - -	ok = sirt.iterate(iterations); - -	return ok; -} -  }  #ifdef STANDALONE diff --git a/cuda/3d/fdk.cu b/cuda/3d/fdk.cu index 0e13be1..e7418a2 100644 --- a/cuda/3d/fdk.cu +++ b/cuda/3d/fdk.cu @@ -353,7 +353,7 @@ bool FDK_Filter(cudaPitchedPtr D_projData,  	// The filtering is a regular ramp filter per detector line.  	int iPaddedDetCount = calcNextPowerOfTwo(2 * dims.iProjU); -	int iHalfFFTSize = calcFFTFourSize(iPaddedDetCount); +	int iHalfFFTSize = astraCUDA::calcFFTFourierSize(iPaddedDetCount);  	int projPitch = D_projData.pitch/sizeof(float); @@ -361,22 +361,22 @@ bool FDK_Filter(cudaPitchedPtr D_projData,  	float* D_sinoData = (float*)D_projData.ptr;  	cufftComplex * D_sinoFFT = NULL; -	allocateComplexOnDevice(dims.iProjAngles, iHalfFFTSize, &D_sinoFFT); +	astraCUDA::allocateComplexOnDevice(dims.iProjAngles, iHalfFFTSize, &D_sinoFFT);  	bool ok = true;  	for (int v = 0; v < dims.iProjV; ++v) { -		ok = runCudaFFT(dims.iProjAngles, D_sinoData, projPitch, +		ok = astraCUDA::runCudaFFT(dims.iProjAngles, D_sinoData, projPitch,  		                dims.iProjU, iPaddedDetCount, iHalfFFTSize,  		                D_sinoFFT);  		if (!ok) break; -		applyFilter(dims.iProjAngles, iHalfFFTSize, D_sinoFFT, D_filter); +		astraCUDA::applyFilter(dims.iProjAngles, iHalfFFTSize, D_sinoFFT, D_filter); -		ok = runCudaIFFT(dims.iProjAngles, D_sinoFFT, D_sinoData, projPitch, +		ok = astraCUDA::runCudaIFFT(dims.iProjAngles, D_sinoFFT, D_sinoData, projPitch,  		                 dims.iProjU, iPaddedDetCount, iHalfFFTSize);  		if (!ok) break; @@ -384,7 +384,7 @@ bool FDK_Filter(cudaPitchedPtr D_projData,  		D_sinoData += (dims.iProjAngles * projPitch);  	} -	freeComplexOnDevice(D_sinoFFT); +	astraCUDA::freeComplexOnDevice(D_sinoFFT);  	return ok;  } @@ -401,7 +401,7 @@ bool FDK(cudaPitchedPtr D_volumeData,  	// TODO: Check errors  	cufftComplex * D_filter;  	int iPaddedDetCount = calcNextPowerOfTwo(2 * dims.iProjU); -	int iHalfFFTSize = calcFFTFourSize(iPaddedDetCount); +	int iHalfFFTSize = astraCUDA::calcFFTFourierSize(iPaddedDetCount);  	ok = FDK_PreWeight(D_projData, fSrcOrigin, fDetOrigin,  	                fSrcZ, fDetZ, fDetUSize, fDetVSize, @@ -412,11 +412,11 @@ bool FDK(cudaPitchedPtr D_volumeData,  	cufftComplex *pHostFilter = new cufftComplex[dims.iProjAngles * iHalfFFTSize];  	memset(pHostFilter, 0, sizeof(cufftComplex) * dims.iProjAngles * iHalfFFTSize); -	genFilter(FILTER_RAMLAK, 1.0f, dims.iProjAngles, pHostFilter, iPaddedDetCount, iHalfFFTSize); +	astraCUDA::genFilter(astra::FILTER_RAMLAK, 1.0f, dims.iProjAngles, pHostFilter, iPaddedDetCount, iHalfFFTSize); -	allocateComplexOnDevice(dims.iProjAngles, iHalfFFTSize, &D_filter); -	uploadComplexArrayToDevice(dims.iProjAngles, iHalfFFTSize, pHostFilter, D_filter); +	astraCUDA::allocateComplexOnDevice(dims.iProjAngles, iHalfFFTSize, &D_filter); +	astraCUDA::uploadComplexArrayToDevice(dims.iProjAngles, iHalfFFTSize, pHostFilter, D_filter);  	delete [] pHostFilter; @@ -430,7 +430,7 @@ bool FDK(cudaPitchedPtr D_volumeData,  	                bShortScan, dims, angles);  	// Clean up filter -	freeComplexOnDevice(D_filter); +	astraCUDA::freeComplexOnDevice(D_filter);  	if (!ok) diff --git a/include/astra/CudaFilteredBackProjectionAlgorithm.h b/include/astra/CudaFilteredBackProjectionAlgorithm.h index cf1f19f..180e001 100644 --- a/include/astra/CudaFilteredBackProjectionAlgorithm.h +++ b/include/astra/CudaFilteredBackProjectionAlgorithm.h @@ -26,28 +26,24 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.  $Id$  */ -#ifndef CUDAFILTEREDBACKPROJECTIONALGORITHM2_H -#define CUDAFILTEREDBACKPROJECTIONALGORITHM2_H +#ifndef CUDAFILTEREDBACKPROJECTIONALGORITHM_H +#define CUDAFILTEREDBACKPROJECTIONALGORITHM_H  #include <astra/Float32ProjectionData2D.h>  #include <astra/Float32VolumeData2D.h> -#include <astra/ReconstructionAlgorithm2D.h> +#include <astra/CudaReconstructionAlgorithm2D.h>  #include "../../cuda/2d/astra.h"  namespace astra  { -class _AstraExport CCudaFilteredBackProjectionAlgorithm : public CReconstructionAlgorithm2D +class _AstraExport CCudaFilteredBackProjectionAlgorithm : public CCudaReconstructionAlgorithm2D  {  public:  	static std::string type;  private: -	CFloat32ProjectionData2D * m_pSinogram; -	CFloat32VolumeData2D * m_pReconstruction; -	int m_iGPUIndex; -	int m_iPixelSuperSampling;  	E_FBPFILTER m_eFilter;  	float * m_pfFilter;  	int m_iFilterWidth;	// number of elements per projection direction in filter @@ -64,15 +60,6 @@ public:  	virtual bool initialize(const Config& _cfg);  	bool initialize(CFloat32ProjectionData2D * _pSinogram, CFloat32VolumeData2D * _pReconstruction, E_FBPFILTER _eFilter, const float * _pfFilter = NULL, int _iFilterWidth = 0, int _iGPUIndex = -1, float _fFilterParameter = -1.0f); -	virtual void run(int _iNrIterations = 0); - -	static int calcIdealRealFilterWidth(int _iDetectorCount); -	static int calcIdealFourierFilterWidth(int _iDetectorCount); -	 -	//debug -	static void testGenFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount, cufftComplex * _pFilter, int _iFFTRealDetectorCount, int _iFFTFourierDetectorCount); -	static int getGPUCount(); -  	/** Get a description of the class.  	 *  	 * @return description string @@ -82,12 +69,7 @@ public:  protected:  	bool check(); -	AstraFBP* m_pFBP; - -	bool m_bAstraFBPInit; - -	void initializeFromProjector(); -	virtual bool requiresProjector() const { return false; } +	virtual void initCUDAAlgorithm();  };  // inline functions @@ -95,4 +77,4 @@ inline std::string CCudaFilteredBackProjectionAlgorithm::description() const { r  } -#endif /* CUDAFILTEREDBACKPROJECTIONALGORITHM2_H */ +#endif /* CUDAFILTEREDBACKPROJECTIONALGORITHM_H */ diff --git a/include/astra/GeometryUtil2D.h b/include/astra/GeometryUtil2D.h index 680cecd..2f03062 100644 --- a/include/astra/GeometryUtil2D.h +++ b/include/astra/GeometryUtil2D.h @@ -32,27 +32,72 @@ $Id$  namespace astra {  struct SParProjection { -    // the ray direction -    float fRayX, fRayY; +	// the ray direction +	float fRayX, fRayY; + +	// the start of the (linear) detector +	float fDetSX, fDetSY; + +	// the length of a single detector pixel +	float fDetUX, fDetUY; + + +	void translate(double dx, double dy) { +		fDetSX += dx; +		fDetSY += dy; +	} +	void scale(double factor) { +		fRayX *= factor; +		fRayY *= factor; +		fDetSX *= factor; +		fDetSY *= factor; +		fDetUX *= factor; +		fDetUY *= factor; +	} +}; -    // the start of the (linear) detector -    float fDetSX, fDetSY; -    // the length of a single detector pixel -    float fDetUX, fDetUY; +struct SFanProjection { +	// the source +	float fSrcX, fSrcY; + +	// the start of the (linear) detector +	float fDetSX, fDetSY; + +	// the length of a single detector pixel +	float fDetUX, fDetUY; + +	void translate(double dx, double dy) { +		fSrcX += dx; +		fSrcY += dy; +		fDetSX += dx; +		fDetSY += dy; +	} +	void scale(double factor) { +		fSrcX *= factor; +		fSrcY *= factor; +		fDetSX *= factor; +		fDetSY *= factor; +		fDetUX *= factor; +		fDetUY *= factor; +	}  }; -struct SFanProjection { -        // the source -        float fSrcX, fSrcY; -        // the start of the (linear) detector -        float fDetSX, fDetSY; +SParProjection* genParProjections(unsigned int iProjAngles, +                                  unsigned int iProjDets, +                                  double fDetSize, +                                  const float *pfAngles, +                                  const float *pfExtraOffsets); -        // the length of a single detector pixel -        float fDetUX, fDetUY; -}; +SFanProjection* genFanProjections(unsigned int iProjAngles, +                                  unsigned int iProjDets, +                                  double fOriginSource, double fOriginDetector, +                                  double fDetSize, +                                  const float *pfAngles); + +bool getFanParameters(const SFanProjection &proj, unsigned int iProjDets, float &fAngle, float &fOriginSource, float &fOriginDetector, float &fDetSize, float &fOffset);  } diff --git a/include/astra/ParallelProjectionGeometry2D.h b/include/astra/ParallelProjectionGeometry2D.h index 36b4b6f..ca67ba7 100644 --- a/include/astra/ParallelProjectionGeometry2D.h +++ b/include/astra/ParallelProjectionGeometry2D.h @@ -84,8 +84,7 @@ public:  	CParallelProjectionGeometry2D(int _iProjectionAngleCount,   								  int _iDetectorCount,   								  float32 _fDetectorWidth,  -								  const float32* _pfProjectionAngles, -								  const float32* _pfExtraDetectorOffsets = 0); +								  const float32* _pfProjectionAngles);  	/** Copy constructor.   	 */ @@ -117,8 +116,7 @@ public:  	bool initialize(int _iProjectionAngleCount,   					int _iDetectorCount,   					float32 _fDetectorWidth,  -					const float32* _pfProjectionAngles, -					const float32* _pfExtraDetectorOffsets = 0); +					const float32* _pfProjectionAngles);  	/** Create a hard copy.   	*/ diff --git a/include/astra/ProjectionGeometry2D.h b/include/astra/ProjectionGeometry2D.h index d26e6a7..b656d97 100644 --- a/include/astra/ProjectionGeometry2D.h +++ b/include/astra/ProjectionGeometry2D.h @@ -90,8 +90,7 @@ protected:  	CProjectionGeometry2D(int _iProjectionAngleCount,   						  int _iDetectorCount,   						  float32 _fDetectorWidth,  -						  const float32* _pfProjectionAngles, -						  const float32* _pfExtraDetectorOffsets = 0); +						  const float32* _pfProjectionAngles);  	/** Copy constructor.   	 */ @@ -117,8 +116,7 @@ protected:  	bool _initialize(int _iProjectionAngleCount,  					 int _iDetectorCount,  					 float32 _fDetectorWidth, -					 const float32* _pfProjectionAngles, -					 const float32* _pfExtraDetectorOffsets = 0); +					 const float32* _pfProjectionAngles);  public: diff --git a/samples/matlab/s014_FBP.m b/samples/matlab/s014_FBP.m index b73149c..faf91fb 100644 --- a/samples/matlab/s014_FBP.m +++ b/samples/matlab/s014_FBP.m @@ -9,7 +9,7 @@  % -----------------------------------------------------------------------  vol_geom = astra_create_vol_geom(256, 256); -proj_geom = astra_create_proj_geom('parallel', 1.0, 384, linspace2(0,pi,180)); +proj_geom = astra_create_proj_geom('fanflat', 1.0, 384, linspace2(0,2*pi,1800), 500, 0);  % As before, create a sinogram from a phantom  P = phantom(256); diff --git a/src/CudaFilteredBackProjectionAlgorithm.cpp b/src/CudaFilteredBackProjectionAlgorithm.cpp index aa97eec..9d23253 100644 --- a/src/CudaFilteredBackProjectionAlgorithm.cpp +++ b/src/CudaFilteredBackProjectionAlgorithm.cpp @@ -33,6 +33,7 @@ $Id$  #include "astra/AstraObjectManager.h"  #include "astra/CudaProjector2D.h"  #include "../cuda/2d/astra.h" +#include "../cuda/2d/fbp.h"  #include "astra/Logging.h" @@ -44,8 +45,7 @@ string CCudaFilteredBackProjectionAlgorithm::type = "FBP_CUDA";  CCudaFilteredBackProjectionAlgorithm::CCudaFilteredBackProjectionAlgorithm()  {  	m_bIsInitialized = false; -	CReconstructionAlgorithm2D::_clear(); -	m_pFBP = 0; +	CCudaReconstructionAlgorithm2D::_clear();  	m_pfFilter = NULL;  	m_fFilterParameter = -1.0f;  	m_fFilterD = 1.0f; @@ -53,35 +53,8 @@ CCudaFilteredBackProjectionAlgorithm::CCudaFilteredBackProjectionAlgorithm()  CCudaFilteredBackProjectionAlgorithm::~CCudaFilteredBackProjectionAlgorithm()  { -	if(m_pfFilter != NULL) -	{ -		delete [] m_pfFilter; -		m_pfFilter = NULL; -	} - -	if(m_pFBP != NULL) -	{ -		delete m_pFBP; -		m_pFBP = NULL; -	} -} - -void CCudaFilteredBackProjectionAlgorithm::initializeFromProjector() -{ -	m_iPixelSuperSampling = 1; -	m_iGPUIndex = -1; - -	// Projector -	CCudaProjector2D* pCudaProjector = dynamic_cast<CCudaProjector2D*>(m_pProjector); -	if (!pCudaProjector) { -		if (m_pProjector) { -			ASTRA_WARN("non-CUDA Projector2D passed to FBP_CUDA"); -		} -	} else { -		m_iPixelSuperSampling = pCudaProjector->getVoxelSuperSampling(); -		m_iGPUIndex = pCudaProjector->getGPUIndex(); -	} - +	delete[] m_pfFilter; +	m_pfFilter = NULL;  }  bool CCudaFilteredBackProjectionAlgorithm::initialize(const Config& _cfg) @@ -92,54 +65,28 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)  	// if already initialized, clear first  	if (m_bIsInitialized)  	{ +#warning FIXME Necessary?  		clear();  	} -	// Projector -	XMLNode node = _cfg.self.getSingleNode("ProjectorId"); -	CCudaProjector2D* pCudaProjector = 0; -	if (node) { -		int id = node.getContentInt(); -		CProjector2D *projector = CProjector2DManager::getSingleton().get(id); -		pCudaProjector = dynamic_cast<CCudaProjector2D*>(projector); -		if (!pCudaProjector) { -			ASTRA_WARN("non-CUDA Projector2D passed"); -		} -	} -	CC.markNodeParsed("ProjectorId"); - - -	// sinogram data -	node = _cfg.self.getSingleNode("ProjectionDataId"); -	ASTRA_CONFIG_CHECK(node, "CudaFBP", "No ProjectionDataId tag specified."); -	int id = node.getContentInt(); -	m_pSinogram = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id)); -	CC.markNodeParsed("ProjectionDataId"); +	m_bIsInitialized = CCudaReconstructionAlgorithm2D::initialize(_cfg); +	if (!m_bIsInitialized) +		return false; -	// reconstruction data -	node = _cfg.self.getSingleNode("ReconstructionDataId"); -	ASTRA_CONFIG_CHECK(node, "CudaFBP", "No ReconstructionDataId tag specified."); -	id = node.getContentInt(); -	m_pReconstruction = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id)); -	CC.markNodeParsed("ReconstructionDataId");  	// filter type -	node = _cfg.self.getSingleNode("FilterType"); +	XMLNode node = _cfg.self.getSingleNode("FilterType");  	if (node) -	{  		m_eFilter = _convertStringToFilter(node.getContent().c_str()); -	}  	else -	{  		m_eFilter = FILTER_RAMLAK; -	}  	CC.markNodeParsed("FilterType");  	// filter  	node = _cfg.self.getSingleNode("FilterSinogramId");  	if (node)  	{ -		id = node.getContentInt(); +		int id = node.getContentInt();  		const CFloat32ProjectionData2D * pFilterData = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id));  		m_iFilterWidth = pFilterData->getGeometry()->getDetectorCount();  		int iFilterProjectionCount = pFilterData->getGeometry()->getProjectionAngleCount(); @@ -188,20 +135,9 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)  	initializeFromProjector(); -	// Deprecated options -	m_iPixelSuperSampling = (int)_cfg.self.getOptionNumerical("PixelSuperSampling", m_iPixelSuperSampling); -	CC.markOptionParsed("PixelSuperSampling"); -	// GPU number -	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1); -	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUIndex", m_iGPUIndex); -	CC.markOptionParsed("GPUIndex"); -	if (!_cfg.self.hasOption("GPUIndex")) -		CC.markOptionParsed("GPUindex"); - - -	m_pFBP = new AstraFBP; -	m_bAstraFBPInit = false; +	m_pAlgo = new astraCUDA::FBP(); +	m_bAlgoInit = false;  	return check();  } @@ -226,9 +162,8 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(CFloat32ProjectionData2D *  	// success  	m_bIsInitialized = true; -	m_pFBP = new AstraFBP; - -	m_bAstraFBPInit = false; +	m_pAlgo = new astraCUDA::FBP(); +	m_bAlgoInit = false;  	if(_pfFilter != NULL)  	{ @@ -256,107 +191,26 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(CFloat32ProjectionData2D *  	return check();  } -void CCudaFilteredBackProjectionAlgorithm::run(int _iNrIterations /* = 0 */) -{ -	// check initialized -	ASTRA_ASSERT(m_bIsInitialized); - -	if (!m_bAstraFBPInit) { - -		const CVolumeGeometry2D& volgeom = *m_pReconstruction->getGeometry(); -		const CParallelProjectionGeometry2D* parprojgeom = dynamic_cast<CParallelProjectionGeometry2D*>(m_pSinogram->getGeometry()); -		const CFanFlatProjectionGeometry2D* fanprojgeom = dynamic_cast<CFanFlatProjectionGeometry2D*>(m_pSinogram->getGeometry()); - -		bool ok = true; - -		// TODO: non-square pixels? -		ok &= m_pFBP->setReconstructionGeometry(volgeom.getGridColCount(), -		                                         volgeom.getGridRowCount(), -		                                         volgeom.getPixelLengthX()); -		int iDetectorCount; -		if (parprojgeom) { - -			float *offsets, *angles, detSize, outputScale; - -			ok = convertAstraGeometry(&volgeom, parprojgeom, offsets, angles, detSize, outputScale); - - -			ok &= m_pFBP->setProjectionGeometry(parprojgeom->getProjectionAngleCount(), -			                                     parprojgeom->getDetectorCount(), -			                                     angles, -			                                     parprojgeom->getDetectorWidth()); -			iDetectorCount = parprojgeom->getDetectorCount(); -			// TODO: Are detSize and outputScale handled correctly? - -			if (offsets) -				ok &= m_pFBP->setTOffsets(offsets); -			ASTRA_ASSERT(ok); - -			delete[] offsets; -			delete[] angles; - -		} else if (fanprojgeom) { - -			astraCUDA::SFanProjection* projs; -			float outputScale; - -			// FIXME: Implement this, and clean up the interface to AstraFBP. -			if (abs(volgeom.getWindowMinX() + volgeom.getWindowMaxX()) > 0.00001 * volgeom.getPixelLengthX()) { -				// Off-center volume geometry isn't supported yet -				ASTRA_ASSERT(false); -			} -			if (abs(volgeom.getWindowMinY() + volgeom.getWindowMaxY()) > 0.00001 * volgeom.getPixelLengthY()) { -				// Off-center volume geometry isn't supported yet -				ASTRA_ASSERT(false); -			} - -			ok = convertAstraGeometry(&volgeom, fanprojgeom, projs, outputScale); - -			// CHECKME: outputScale? - -			ok &= m_pFBP->setFanGeometry(fanprojgeom->getProjectionAngleCount(), -			                             fanprojgeom->getDetectorCount(), -			                             projs, -			                             fanprojgeom->getProjectionAngles(), -			                             fanprojgeom->getOriginSourceDistance(), -			                             fanprojgeom->getOriginDetectorDistance(), - -		                                     fanprojgeom->getDetectorWidth(), -			                             m_bShortScan); - -			iDetectorCount = fanprojgeom->getDetectorCount(); - -			delete[] projs; -		} else { -			assert(false); -		} - -		ok &= m_pFBP->setPixelSuperSampling(m_iPixelSuperSampling); - -		ASTRA_ASSERT(ok); +void CCudaFilteredBackProjectionAlgorithm::initCUDAAlgorithm() +{ +	CCudaReconstructionAlgorithm2D::initCUDAAlgorithm(); -		ok &= m_pFBP->init(m_iGPUIndex); -		ASTRA_ASSERT(ok); +	astraCUDA::FBP* pFBP = dynamic_cast<astraCUDA::FBP*>(m_pAlgo); -		ok &= m_pFBP->setSinogram(m_pSinogram->getDataConst(), iDetectorCount); +	bool ok = pFBP->setFilter(m_eFilter, m_pfFilter, m_iFilterWidth, m_fFilterD, m_fFilterParameter); +	if (!ok) { +		ASTRA_ERROR("CCudaFilteredBackProjectionAlgorithm: Failed to set filter");  		ASTRA_ASSERT(ok); - -		ok &= m_pFBP->setFilter(m_eFilter, m_pfFilter, m_iFilterWidth, m_fFilterD, m_fFilterParameter); -		ASTRA_ASSERT(ok); - -		m_bAstraFBPInit = true;  	} -	bool ok = m_pFBP->run(); -	ASTRA_ASSERT(ok); - -	const CVolumeGeometry2D& volgeom = *m_pReconstruction->getGeometry(); -	ok &= m_pFBP->getReconstruction(m_pReconstruction->getData(), volgeom.getGridColCount()); - -	ASTRA_ASSERT(ok); +	ok &= pFBP->setShortScan(m_bShortScan); +	if (!ok) { +		ASTRA_ERROR("CCudaFilteredBackProjectionAlgorithm: Failed to set short-scan mode"); +	}  } +  bool CCudaFilteredBackProjectionAlgorithm::check()  {  	// check pointers @@ -395,16 +249,6 @@ static int calcNextPowerOfTwo(int _iValue)  	return iOutput;  } -int CCudaFilteredBackProjectionAlgorithm::calcIdealRealFilterWidth(int _iDetectorCount) -{ -	return calcNextPowerOfTwo(_iDetectorCount); -} - -int CCudaFilteredBackProjectionAlgorithm::calcIdealFourierFilterWidth(int _iDetectorCount) -{ -	return (calcNextPowerOfTwo(_iDetectorCount) / 2 + 1); -} -  static bool stringCompareLowerCase(const char * _stringA, const char * _stringB)  {  	int iCmpReturn = 0; @@ -518,12 +362,3 @@ E_FBPFILTER CCudaFilteredBackProjectionAlgorithm::_convertStringToFilter(const c  	return output;  } -void CCudaFilteredBackProjectionAlgorithm::testGenFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount, cufftComplex * _pFilter, int _iFFTRealDetectorCount, int _iFFTFourierDetectorCount) -{ -	genFilter(_eFilter, _fD, _iProjectionCount, _pFilter, _iFFTRealDetectorCount, _iFFTFourierDetectorCount); -} - -int CCudaFilteredBackProjectionAlgorithm::getGPUCount() -{ -	return 0; -} diff --git a/src/CudaForwardProjectionAlgorithm.cpp b/src/CudaForwardProjectionAlgorithm.cpp index 80f2e02..bdd7dd0 100644 --- a/src/CudaForwardProjectionAlgorithm.cpp +++ b/src/CudaForwardProjectionAlgorithm.cpp @@ -221,56 +221,48 @@ void CCudaForwardProjectionAlgorithm::run(int)  	// check initialized  	assert(m_bIsInitialized); -	CVolumeGeometry2D* pVolGeom = m_pVolume->getGeometry(); -	const CParallelProjectionGeometry2D* parProjGeom = dynamic_cast<CParallelProjectionGeometry2D*>(m_pSinogram->getGeometry()); -	const CFanFlatProjectionGeometry2D* fanProjGeom = dynamic_cast<CFanFlatProjectionGeometry2D*>(m_pSinogram->getGeometry()); -	const CFanFlatVecProjectionGeometry2D* fanVecProjGeom = dynamic_cast<CFanFlatVecProjectionGeometry2D*>(m_pSinogram->getGeometry()); +	bool ok; -	bool ok = false; -	if (parProjGeom) { +	const CVolumeGeometry2D* pVolGeom = m_pVolume->getGeometry(); +	const CProjectionGeometry2D* pProjGeom = m_pSinogram->getGeometry(); +	astraCUDA::SDimensions dims; -		float *offsets, *angles, detSize, outputScale; -		ok = convertAstraGeometry(pVolGeom, parProjGeom, offsets, angles, detSize, outputScale); +	ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); -		ASTRA_ASSERT(ok); // FIXME +	if (!ok) +		return; -		// FIXME: Output scaling +	astraCUDA::SParProjection* pParProjs = 0; +	astraCUDA::SFanProjection* pFanProjs = 0; +	float fOutputScale = 1.0f; -		ok = astraCudaFP(m_pVolume->getDataConst(), m_pSinogram->getData(), -		                 pVolGeom->getGridColCount(), pVolGeom->getGridRowCount(), -		                 parProjGeom->getProjectionAngleCount(), -		                 parProjGeom->getDetectorCount(), -		                 angles, offsets, detSize, -		                 m_iDetectorSuperSampling, 1.0f * outputScale, m_iGPUIndex); - -		delete[] offsets; -		delete[] angles; +	ok = convertAstraGeometry(pVolGeom, pProjGeom, pParProjs, pFanProjs, fOutputScale); +	if (!ok) +		return; -	} else if (fanProjGeom || fanVecProjGeom) { +	if (pParProjs) { +		assert(!pFanProjs); -		astraCUDA::SFanProjection* projs; -		float outputScale; +		ok = astraCudaFP(m_pVolume->getDataConst(), m_pSinogram->getData(), +		                 pVolGeom->getGridColCount(), pVolGeom->getGridRowCount(), +		                 pProjGeom->getProjectionAngleCount(), +		                 pProjGeom->getDetectorCount(), +		                 pParProjs, +		                 m_iDetectorSuperSampling, 1.0f * fOutputScale, m_iGPUIndex); -		if (fanProjGeom) { -			ok = convertAstraGeometry(pVolGeom, fanProjGeom, projs, outputScale); -		} else { -			ok = convertAstraGeometry(pVolGeom, fanVecProjGeom, projs, outputScale); -		} +		delete[] pParProjs; -		ASTRA_ASSERT(ok); +	} else { +		assert(pFanProjs);  		ok = astraCudaFanFP(m_pVolume->getDataConst(), m_pSinogram->getData(),  		                    pVolGeom->getGridColCount(), pVolGeom->getGridRowCount(), -		                    m_pSinogram->getGeometry()->getProjectionAngleCount(), -		                    m_pSinogram->getGeometry()->getDetectorCount(), -		                    projs, -		                    m_iDetectorSuperSampling, outputScale, m_iGPUIndex); - -		delete[] projs; - -	} else { +		                    pProjGeom->getProjectionAngleCount(), +		                    pProjGeom->getDetectorCount(), +		                    pFanProjs, +		                    m_iDetectorSuperSampling, fOutputScale, m_iGPUIndex); -		ASTRA_ASSERT(false); +		delete[] pFanProjs;  	} diff --git a/src/CudaReconstructionAlgorithm2D.cpp b/src/CudaReconstructionAlgorithm2D.cpp index 2798434..326ef1e 100644 --- a/src/CudaReconstructionAlgorithm2D.cpp +++ b/src/CudaReconstructionAlgorithm2D.cpp @@ -250,69 +250,14 @@ bool CCudaReconstructionAlgorithm2D::setupGeometry()  	ok = m_pAlgo->setGPUIndex(m_iGPUIndex);  	if (!ok) return false; -	astraCUDA::SDimensions dims; -  	const CVolumeGeometry2D& volgeom = *m_pReconstruction->getGeometry(); +	const CProjectionGeometry2D& projgeom = *m_pSinogram->getGeometry(); -	// TODO: non-square pixels? -	dims.iVolWidth = volgeom.getGridColCount(); -	dims.iVolHeight = volgeom.getGridRowCount(); -	float fPixelSize = volgeom.getPixelLengthX(); - -	dims.iRaysPerDet = m_iDetectorSuperSampling; -	dims.iRaysPerPixelDim = m_iPixelSuperSampling; - - -	const CParallelProjectionGeometry2D* parProjGeom = dynamic_cast<CParallelProjectionGeometry2D*>(m_pSinogram->getGeometry()); -	const CFanFlatProjectionGeometry2D* fanProjGeom = dynamic_cast<CFanFlatProjectionGeometry2D*>(m_pSinogram->getGeometry()); -	const CFanFlatVecProjectionGeometry2D* fanVecProjGeom = dynamic_cast<CFanFlatVecProjectionGeometry2D*>(m_pSinogram->getGeometry()); - -	if (parProjGeom) { - -		float *offsets, *angles, detSize, outputScale; - -		ok = convertAstraGeometry(&volgeom, parProjGeom, offsets, angles, detSize, outputScale); - -		dims.iProjAngles = parProjGeom->getProjectionAngleCount(); -		dims.iProjDets = parProjGeom->getDetectorCount(); -		dims.fDetScale = parProjGeom->getDetectorWidth() / fPixelSize; - -		ok = m_pAlgo->setGeometry(dims, parProjGeom->getProjectionAngles()); -		ok &= m_pAlgo->setTOffsets(offsets); - -		// CHECKME: outputScale? detSize? - -		delete[] offsets; -		delete[] angles; - -	} else if (fanProjGeom || fanVecProjGeom) { - -		astraCUDA::SFanProjection* projs; -		float outputScale; - -		if (fanProjGeom) { -			ok = convertAstraGeometry(&volgeom, fanProjGeom, projs, outputScale); -		} else { -			ok = convertAstraGeometry(&volgeom, fanVecProjGeom, projs, outputScale); -		} - -		dims.iProjAngles = m_pSinogram->getGeometry()->getProjectionAngleCount(); -		dims.iProjDets = m_pSinogram->getGeometry()->getDetectorCount(); -		dims.fDetScale = m_pSinogram->getGeometry()->getDetectorWidth() / fPixelSize; - -		ok = m_pAlgo->setFanGeometry(dims, projs); - -		// CHECKME: outputScale? - -		delete[] projs; - -	} else { - -		ASTRA_ASSERT(false); - -	} +	ok = m_pAlgo->setGeometry(&volgeom, &projgeom);  	if (!ok) return false; +	ok = m_pAlgo->setSuperSampling(m_iDetectorSuperSampling, m_iPixelSuperSampling); +	if (!ok) return false;  	if (m_bUseReconstructionMask)  		ok &= m_pAlgo->enableVolumeMask(); diff --git a/src/FanFlatProjectionGeometry2D.cpp b/src/FanFlatProjectionGeometry2D.cpp index 3aab582..4eec9c4 100644 --- a/src/FanFlatProjectionGeometry2D.cpp +++ b/src/FanFlatProjectionGeometry2D.cpp @@ -28,6 +28,8 @@ $Id$  #include "astra/FanFlatProjectionGeometry2D.h" +#include "astra/GeometryUtil2D.h" +  #include <cstring>  #include <sstream> @@ -218,16 +220,12 @@ Config* CFanFlatProjectionGeometry2D::getConfiguration() const  //----------------------------------------------------------------------------------------  CFanFlatVecProjectionGeometry2D* CFanFlatProjectionGeometry2D::toVectorGeometry()  { -	SFanProjection* vectors = new SFanProjection[m_iProjectionAngleCount]; -	for (int i = 0; i < m_iProjectionAngleCount; ++i) -	{ -		vectors[i].fSrcX =  sinf(m_pfProjectionAngles[i]) * m_fOriginSourceDistance; -		vectors[i].fSrcY = -cosf(m_pfProjectionAngles[i]) * m_fOriginSourceDistance; -		vectors[i].fDetUX = cosf(m_pfProjectionAngles[i]) * m_fDetectorWidth; -		vectors[i].fDetUY = sinf(m_pfProjectionAngles[i]) * m_fDetectorWidth; -		vectors[i].fDetSX = -sinf(m_pfProjectionAngles[i]) * m_fOriginDetectorDistance - 0.5f * m_iDetectorCount * vectors[i].fDetUX; -		vectors[i].fDetSY =  cosf(m_pfProjectionAngles[i]) * m_fOriginDetectorDistance - 0.5f * m_iDetectorCount * vectors[i].fDetUY; -	} +	SFanProjection* vectors = genFanProjections(m_iProjectionAngleCount, +	                                            m_iDetectorCount, +	                                            m_fOriginSourceDistance, +	                                            m_fOriginDetectorDistance, +	                                            m_fDetectorWidth, +	                                            m_pfProjectionAngles);  	CFanFlatVecProjectionGeometry2D* vecGeom = new CFanFlatVecProjectionGeometry2D();  	vecGeom->initialize(m_iProjectionAngleCount, m_iDetectorCount, vectors); diff --git a/src/GeometryUtil2D.cpp b/src/GeometryUtil2D.cpp new file mode 100644 index 0000000..1bca2bc --- /dev/null +++ b/src/GeometryUtil2D.cpp @@ -0,0 +1,161 @@ +/* +----------------------------------------------------------------------- +Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp +           2014-2015, CWI, Amsterdam + +Contact: astra@uantwerpen.be +Website: http://sf.net/projects/astra-toolbox + +This file is part of the ASTRA Toolbox. + + +The ASTRA Toolbox is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +The ASTRA Toolbox is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. + +----------------------------------------------------------------------- +$Id$ +*/ + +#include "astra/GeometryUtil2D.h" + +#include <cmath> + +namespace astra { + +SParProjection* genParProjections(unsigned int iProjAngles, +                                  unsigned int iProjDets, +                                  double fDetSize, +                                  const float *pfAngles, +                                  const float *pfExtraOffsets) +{ +	SParProjection base; +	base.fRayX = 0.0f; +	base.fRayY = 1.0f; + +	base.fDetSX = iProjDets * fDetSize * -0.5f; +	base.fDetSY = 0.0f; + +	base.fDetUX = fDetSize; +	base.fDetUY = 0.0f; + +	SParProjection* p = new SParProjection[iProjAngles]; + +#define ROTATE0(name,i,alpha) do { p[i].f##name##X = base.f##name##X * cos(alpha) - base.f##name##Y * sin(alpha); p[i].f##name##Y = base.f##name##X * sin(alpha) + base.f##name##Y * cos(alpha); } while(0) + +	for (unsigned int i = 0; i < iProjAngles; ++i) { +		if (pfExtraOffsets) { +			// TODO +		} + +		ROTATE0(Ray, i, pfAngles[i]); +		ROTATE0(DetS, i, pfAngles[i]); +		ROTATE0(DetU, i, pfAngles[i]); + +		if (pfExtraOffsets) { +			float d = pfExtraOffsets[i]; +			p[i].fDetSX -= d * p[i].fDetUX; +			p[i].fDetSY -= d * p[i].fDetUY; +		} +	} + +#undef ROTATE0 + +	return p; +} + + +SFanProjection* genFanProjections(unsigned int iProjAngles, +                                  unsigned int iProjDets, +                                  double fOriginSource, double fOriginDetector, +                                  double fDetSize, +                                  const float *pfAngles) +//                                  const float *pfExtraOffsets) +{ +	SFanProjection *pProjs = new SFanProjection[iProjAngles]; + +	float fSrcX0 = 0.0f; +	float fSrcY0 = -fOriginSource; +	float fDetUX0 = fDetSize; +	float fDetUY0 = 0.0f; +	float fDetSX0 = iProjDets * fDetUX0 / -2.0f; +	float fDetSY0 = fOriginDetector; + +#define ROTATE0(name,i,alpha) do { pProjs[i].f##name##X = f##name##X0 * cos(alpha) - f##name##Y0 * sin(alpha); pProjs[i].f##name##Y = f##name##X0 * sin(alpha) + f##name##Y0 * cos(alpha); } while(0) +	for (unsigned int i = 0; i < iProjAngles; ++i) { +		ROTATE0(Src, i, pfAngles[i]); +		ROTATE0(DetS, i, pfAngles[i]); +		ROTATE0(DetU, i, pfAngles[i]); +	} + +#undef ROTATE0 + +	return pProjs; +} + +// Convert a SParProjection back into its set of "standard" circular parallel +// beam parameters. This is always possible. +bool getParParameters(const SParProjection &proj /* , ... */) +{ +	// angle +	// det size +	// offset + +	// (see convertAndUploadAngles in par_fp.cu) + +	return true; +} + +// Convert a SFanProjection back into its set of "standard" circular fan beam +// parameters. This will return false if it can not be represented in this way. +bool getFanParameters(const SFanProjection &proj, unsigned int iProjDets, float &fAngle, float &fOriginSource, float &fOriginDetector, float &fDetSize, float &fOffset) +{ +	// angle +	// det size +	// offset +	// origin-source +	// origin-detector + +	// Need to check if line source-origin is orthogonal to vector ux,uy +	// (including the case source==origin) + +	// (equivalent: source and origin project to same point on detector) + +	double dp = proj.fSrcX * proj.fDetUX + proj.fSrcY * proj.fDetUY; + +	double rel = (proj.fSrcX*proj.fSrcX + proj.fSrcY*proj.fSrcY) * (proj.fDetUX*proj.fDetUX + proj.fDetUY*proj.fDetUY); +	rel = sqrt(rel); + +	if (std::abs(dp) > rel * 0.0001) +		return false; + +	fOriginSource = sqrt(proj.fSrcX*proj.fSrcX + proj.fSrcY*proj.fSrcY); + +	fDetSize = sqrt(proj.fDetUX*proj.fDetUX + proj.fDetUY*proj.fDetUY); + +	// project origin on detector line ( == project source on detector line) + +	double t = (- proj.fDetSX) * proj.fDetUX + (- proj.fDetSY) * proj.fDetUY; + +	fOffset = (float)t - 0.5*iProjDets; + +	// TODO: CHECKME +	fOriginDetector = sqrt((proj.fDetSX + t * proj.fDetUX)*(proj.fDetSX + t * proj.fDetUX) + (proj.fDetSY + t * proj.fDetUY)*(proj.fDetSY + t * proj.fDetUY)); + +	//float fAngle = atan2(proj.fDetSX + t * proj.fDetUX - proj.fSrcX, proj.fDetSY + t * proj.fDetUY); // TODO: Fix order + sign +	fAngle = atan2(proj.fDetUY, proj.fDetUX); // TODO: Check order + sign + +	return true; +} + + +} diff --git a/src/ParallelProjectionGeometry2D.cpp b/src/ParallelProjectionGeometry2D.cpp index 105e24b..e1f93aa 100644 --- a/src/ParallelProjectionGeometry2D.cpp +++ b/src/ParallelProjectionGeometry2D.cpp @@ -28,6 +28,8 @@ $Id$  #include "astra/ParallelProjectionGeometry2D.h" +#include "astra/GeometryUtil2D.h" +  #include <cstring>  using namespace std; @@ -48,15 +50,13 @@ CParallelProjectionGeometry2D::CParallelProjectionGeometry2D() :  CParallelProjectionGeometry2D::CParallelProjectionGeometry2D(int _iProjectionAngleCount,   															 int _iDetectorCount,   															 float32 _fDetectorWidth,  -															 const float32* _pfProjectionAngles, -															 const float32* _pfExtraDetectorOffsets) +															 const float32* _pfProjectionAngles)  {  	_clear();  	initialize(_iProjectionAngleCount,  				_iDetectorCount,   				_fDetectorWidth,  -				_pfProjectionAngles, -				_pfExtraDetectorOffsets); +				_pfProjectionAngles);  }  //---------------------------------------------------------------------------------------- @@ -115,14 +115,12 @@ bool CParallelProjectionGeometry2D::initialize(const Config& _cfg)  bool CParallelProjectionGeometry2D::initialize(int _iProjectionAngleCount,   											   int _iDetectorCount,   											   float32 _fDetectorWidth,  -											   const float32* _pfProjectionAngles, -											   const float32* _pfExtraDetectorOffsets) +											   const float32* _pfProjectionAngles)  {  	_initialize(_iProjectionAngleCount,   			    _iDetectorCount,   			    _fDetectorWidth,  -			    _pfProjectionAngles, -				_pfExtraDetectorOffsets); +			    _pfProjectionAngles);  	// success  	m_bInitialized = _check(); @@ -178,11 +176,6 @@ Config* CParallelProjectionGeometry2D::getConfiguration() const  	cfg->self.addChildNode("DetectorCount", getDetectorCount());  	cfg->self.addChildNode("DetectorWidth", getDetectorWidth());  	cfg->self.addChildNode("ProjectionAngles", m_pfProjectionAngles, m_iProjectionAngleCount); -	if(m_pfExtraDetectorOffset!=NULL){ -		XMLNode opt = cfg->self.addChildNode("Option"); -		opt.addAttribute("key","ExtraDetectorOffset"); -		opt.setContent(m_pfExtraDetectorOffset, m_iProjectionAngleCount); -	}  	return cfg;  } @@ -203,17 +196,11 @@ CVector3D CParallelProjectionGeometry2D::getProjectionDirection(int _iProjection  //----------------------------------------------------------------------------------------  CParallelVecProjectionGeometry2D* CParallelProjectionGeometry2D::toVectorGeometry()  { -	SParProjection* vectors = new SParProjection[m_iProjectionAngleCount]; -	for (int i = 0; i < m_iProjectionAngleCount; ++i) -	{ -		vectors[i].fRayX = sinf(m_pfProjectionAngles[i]); -		vectors[i].fRayY = -cosf(m_pfProjectionAngles[i]); -		vectors[i].fDetUX = cosf(m_pfProjectionAngles[i]) * m_fDetectorWidth; -		vectors[i].fDetUY = sinf(m_pfProjectionAngles[i]) * m_fDetectorWidth;		 -		vectors[i].fDetSX = -0.5f * m_iDetectorCount * vectors[i].fDetUX; -		vectors[i].fDetSY = -0.5f * m_iDetectorCount * vectors[i].fDetUY; -	} - +	SParProjection* vectors = genParProjections(m_iProjectionAngleCount, +	                                            m_iDetectorCount, +	                                            m_fDetectorWidth, +	                                            m_pfProjectionAngles, 0); +	// TODO: ExtraOffsets?  	CParallelVecProjectionGeometry2D* vecGeom = new CParallelVecProjectionGeometry2D();  	vecGeom->initialize(m_iProjectionAngleCount, m_iDetectorCount, vectors);  	delete[] vectors; diff --git a/src/ProjectionGeometry2D.cpp b/src/ProjectionGeometry2D.cpp index 03fbd22..a306b48 100644 --- a/src/ProjectionGeometry2D.cpp +++ b/src/ProjectionGeometry2D.cpp @@ -45,11 +45,10 @@ CProjectionGeometry2D::CProjectionGeometry2D() : configCheckData(0)  CProjectionGeometry2D::CProjectionGeometry2D(int _iAngleCount,   											 int _iDetectorCount,   											 float32 _fDetectorWidth,  -											 const float32* _pfProjectionAngles, -											 const float32* _pfExtraDetectorOffsets) : configCheckData(0) +											 const float32* _pfProjectionAngles) : configCheckData(0)  {  	_clear(); -	_initialize(_iAngleCount, _iDetectorCount, _fDetectorWidth, _pfProjectionAngles,_pfExtraDetectorOffsets); +	_initialize(_iAngleCount, _iDetectorCount, _fDetectorWidth, _pfProjectionAngles);  }  //---------------------------------------------------------------------------------------- @@ -156,8 +155,7 @@ bool CProjectionGeometry2D::initialize(const Config& _cfg)  bool CProjectionGeometry2D::_initialize(int _iProjectionAngleCount,   									    int _iDetectorCount,   									    float32 _fDetectorWidth,  -									    const float32* _pfProjectionAngles, -										const float32* _pfExtraDetectorOffsets) +									    const float32* _pfProjectionAngles)  {  	if (m_bInitialized) {  		clear(); | 
