diff options
-rw-r--r-- | cuda/3d/mem3d.h | 2 | ||||
-rw-r--r-- | include/astra/CompositeGeometryManager.h | 16 | ||||
-rw-r--r-- | matlab/mex/astra_mex_c.cpp | 45 | ||||
-rw-r--r-- | python/astra/__init__.py | 8 | ||||
-rw-r--r-- | python/astra/astra.py | 4 | ||||
-rw-r--r-- | python/astra/astra_c.pyx | 21 | ||||
-rw-r--r-- | samples/matlab/s020_3d_multiGPU.m | 38 | ||||
-rw-r--r-- | samples/python/s019_experimental_multires.py (renamed from samples/python/s018_experimental_multires.py) | 0 | ||||
-rw-r--r-- | samples/python/s020_3d_multiGPU.py | 57 | ||||
-rw-r--r-- | src/CompositeGeometryManager.cpp | 476 | ||||
-rw-r--r-- | src/ConeProjectionGeometry3D.cpp | 3 |
11 files changed, 518 insertions, 152 deletions
diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h index 82bad19..acb72cb 100644 --- a/cuda/3d/mem3d.h +++ b/cuda/3d/mem3d.h @@ -87,6 +87,8 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos) bool freeGPUMemory(MemHandle3D handle); +bool setGPUIndex(int index); + bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel); diff --git a/include/astra/CompositeGeometryManager.h b/include/astra/CompositeGeometryManager.h index 6610151..49d02a7 100644 --- a/include/astra/CompositeGeometryManager.h +++ b/include/astra/CompositeGeometryManager.h @@ -50,9 +50,16 @@ class CProjectionGeometry3D; class CProjector3D; +struct SGPUParams { + std::vector<int> GPUIndices; + size_t memory; +}; + class _AstraExport CCompositeGeometryManager { public: + CCompositeGeometryManager(); + class CPart; typedef std::list<boost::shared_ptr<CPart> > TPartList; class CPart { @@ -139,10 +146,19 @@ public: bool doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData); bool doBP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData); + void setGPUIndices(const std::vector<int>& GPUIndices); + + static void setGlobalGPUParams(const SGPUParams& params); + protected: bool splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split); + std::vector<int> m_GPUIndices; + size_t m_iMaxSize; + + + static SGPUParams* s_params; }; } diff --git a/matlab/mex/astra_mex_c.cpp b/matlab/mex/astra_mex_c.cpp index d34334c..fdf4f33 100644 --- a/matlab/mex/astra_mex_c.cpp +++ b/matlab/mex/astra_mex_c.cpp @@ -38,6 +38,7 @@ $Id$ #include "astra/Globals.h" #ifdef ASTRA_CUDA #include "../cuda/2d/darthelper.h" +#include "astra/CompositeGeometryManager.h" #endif using namespace std; using namespace astra; @@ -83,12 +84,46 @@ void astra_mex_use_cuda(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs * Set active GPU */ void astra_mex_set_gpu_index(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) -{ +{ #ifdef ASTRA_CUDA - if (nrhs >= 2) { - bool ret = astraCUDA::setGPUIndex((int)mxGetScalar(prhs[1])); - if (!ret) - mexPrintf("Failed to set GPU %d\n", (int)mxGetScalar(prhs[1])); + bool usage = false; + if (nrhs != 2 && nrhs != 4) { + usage = true; + } + + astra::SGPUParams params; + params.memory = 0; + + if (!usage && nrhs >= 4) { + std::string s = mexToString(prhs[2]); + if (s != "memory") { + usage = true; + } else { + params.memory = (size_t)mxGetScalar(prhs[3]); + } + } + + if (!usage && nrhs >= 2) { + int n = mxGetN(prhs[1]) * mxGetM(prhs[1]); + params.GPUIndices.resize(n); + double* pdMatlabData = mxGetPr(prhs[1]); + for (int i = 0; i < n; ++i) + params.GPUIndices[i] = (int)pdMatlabData[i]; + + + astra::CCompositeGeometryManager::setGlobalGPUParams(params); + + + // Set first GPU + if (n >= 1) { + bool ret = astraCUDA::setGPUIndex((int)pdMatlabData[0]); + if (!ret) + mexPrintf("Failed to set GPU %d\n", (int)pdMatlabData[0]); + } + } + + if (usage) { + mexPrintf("Usage: astra_mex('set_gpu_index', index/indices [, 'memory', memory])"); } #endif } diff --git a/python/astra/__init__.py b/python/astra/__init__.py index 10ed74d..515d9a2 100644 --- a/python/astra/__init__.py +++ b/python/astra/__init__.py @@ -39,7 +39,7 @@ from . import log from .optomo import OpTomo import os -try: - astra.set_gpu_index(int(os.environ['ASTRA_GPU_INDEX'])) -except KeyError: - pass + +if 'ASTRA_GPU_INDEX' in os.environ: + L = [ int(x) for x in os.environ['ASTRA_GPU_INDEX'].split(',') ] + astra.set_gpu_index(L) diff --git a/python/astra/astra.py b/python/astra/astra.py index 26b1ff0..9328b6b 100644 --- a/python/astra/astra.py +++ b/python/astra/astra.py @@ -49,10 +49,10 @@ def version(printToScreen=False): """ return a.version(printToScreen) -def set_gpu_index(idx): +def set_gpu_index(idx, memory=0): """Set default GPU index to use. :param idx: GPU index :type idx: :class:`int` """ - a.set_gpu_index(idx) + a.set_gpu_index(idx, memory) diff --git a/python/astra/astra_c.pyx b/python/astra/astra_c.pyx index 6b246b6..2a9c816 100644 --- a/python/astra/astra_c.pyx +++ b/python/astra/astra_c.pyx @@ -31,6 +31,7 @@ import six from .utils import wrap_from_bytes from libcpp.string cimport string +from libcpp.vector cimport vector from libcpp cimport bool cdef extern from "astra/Globals.h" namespace "astra": int getVersion() @@ -43,6 +44,12 @@ IF HAVE_CUDA==True: ELSE: def setGPUIndex(): pass +cdef extern from "astra/CompositeGeometryManager.h" namespace "astra": + cdef cppclass SGPUParams: + vector[int] GPUIndices + size_t memory +cdef extern from "astra/CompositeGeometryManager.h" namespace "astra::CCompositeGeometryManager": + void setGlobalGPUParams(SGPUParams&) def credits(): six.print_("""The ASTRA Toolbox has been developed at the University of Antwerp and CWI, Amsterdam by @@ -70,8 +77,16 @@ def version(printToScreen=False): else: return getVersion() -def set_gpu_index(idx): +def set_gpu_index(idx, memory=0): + import types + import collections + cdef SGPUParams params if use_cuda()==True: - ret = setGPUIndex(idx) + if not isinstance(idx, collections.Iterable) or isinstance(idx, types.StringTypes): + idx = (idx,) + params.memory = memory + params.GPUIndices = idx + setGlobalGPUParams(params) + ret = setGPUIndex(params.GPUIndices[0]) if not ret: - six.print_("Failed to set GPU " + str(idx)) + six.print_("Failed to set GPU " + str(params.GPUIndices[0])) diff --git a/samples/matlab/s020_3d_multiGPU.m b/samples/matlab/s020_3d_multiGPU.m new file mode 100644 index 0000000..bade325 --- /dev/null +++ b/samples/matlab/s020_3d_multiGPU.m @@ -0,0 +1,38 @@ +% ----------------------------------------------------------------------- +% This file is part of the ASTRA Toolbox +% +% Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp +% 2014-2015, CWI, Amsterdam +% License: Open Source under GPLv3 +% Contact: astra@uantwerpen.be +% Website: http://sf.net/projects/astra-toolbox +% ----------------------------------------------------------------------- + + +% Set up multi-GPU usage. +% This only works for 3D GPU forward projection and back projection. +astra_mex('set_gpu_index', [0 1]); + +% Optionally, you can also restrict the amount of GPU memory ASTRA will use. +% The line commented below sets this to 1GB. +%astra_mex('set_gpu_index', [0 1], 'memory', 1024*1024*1024); + +vol_geom = astra_create_vol_geom(1024, 1024, 1024); + +angles = linspace2(0, pi, 1024); +proj_geom = astra_create_proj_geom('parallel3d', 1.0, 1.0, 1024, 1024, angles); + +% Create a simple hollow cube phantom +cube = zeros(1024,1024,1024); +cube(129:896,129:896,129:896) = 1; +cube(257:768,257:768,257:768) = 0; + +% Create projection data from this +[proj_id, proj_data] = astra_create_sino3d_cuda(cube, proj_geom, vol_geom); + +% Backproject projection data +[bproj_id, bproj_data] = astra_create_backprojection3d_cuda(proj_data, proj_geom, vol_geom); + +astra_mex_data3d('delete', proj_id); +astra_mex_data3d('delete', bproj_id); + diff --git a/samples/python/s018_experimental_multires.py b/samples/python/s019_experimental_multires.py index cf38e53..cf38e53 100644 --- a/samples/python/s018_experimental_multires.py +++ b/samples/python/s019_experimental_multires.py diff --git a/samples/python/s020_3d_multiGPU.py b/samples/python/s020_3d_multiGPU.py new file mode 100644 index 0000000..d6799c4 --- /dev/null +++ b/samples/python/s020_3d_multiGPU.py @@ -0,0 +1,57 @@ +#----------------------------------------------------------------------- +#Copyright 2013 Centrum Wiskunde & Informatica, Amsterdam +# +#Author: Daniel M. Pelt +#Contact: D.M.Pelt@cwi.nl +#Website: http://dmpelt.github.io/pyastratoolbox/ +# +# +#This file is part of the Python interface to the +#All Scale Tomographic Reconstruction Antwerp Toolbox ("ASTRA Toolbox"). +# +#The Python interface to the ASTRA Toolbox is free software: you can redistribute it and/or modify +#it under the terms of the GNU General Public License as published by +#the Free Software Foundation, either version 3 of the License, or +#(at your option) any later version. +# +#The Python interface to the ASTRA Toolbox is distributed in the hope that it will be useful, +#but WITHOUT ANY WARRANTY; without even the implied warranty of +#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +#GNU General Public License for more details. +# +#You should have received a copy of the GNU General Public License +#along with the Python interface to the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. +# +#----------------------------------------------------------------------- + +import astra +import numpy as np + +# Set up multi-GPU usage. +# This only works for 3D GPU forward projection and back projection. +astra.astra.set_gpu_index([0,1]) + +# Optionally, you can also restrict the amount of GPU memory ASTRA will use. +# The line commented below sets this to 1GB. +#astra.astra.set_gpu_index([0,1], memory=1024*1024*1024) + +vol_geom = astra.create_vol_geom(1024, 1024, 1024) + +angles = np.linspace(0, np.pi, 1024,False) +proj_geom = astra.create_proj_geom('parallel3d', 1.0, 1.0, 1024, 1024, angles) + +# Create a simple hollow cube phantom +cube = np.zeros((1024,1024,1024)) +cube[128:895,128:895,128:895] = 1 +cube[256:767,256:767,256:767] = 0 + +# Create projection data from this +proj_id, proj_data = astra.create_sino3d_gpu(cube, proj_geom, vol_geom) + +# Backproject projection data +bproj_id, bproj_data = astra.create_backprojection3d_gpu(proj_data, proj_geom, vol_geom) + +# Clean up. Note that GPU memory is tied up in the algorithm object, +# and main RAM in the data objects. +astra.data3d.delete(proj_id) +astra.data3d.delete(bproj_id) diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp index 41f6319..d1b713e 100644 --- a/src/CompositeGeometryManager.cpp +++ b/src/CompositeGeometryManager.cpp @@ -44,11 +44,31 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. #include "../cuda/3d/mem3d.h" #include <cstring> +#include <sstream> + +#ifndef USE_PTHREADS +#include <boost/thread/mutex.hpp> +#include <boost/thread.hpp> +#endif namespace astra { + +SGPUParams* CCompositeGeometryManager::s_params = 0; + +CCompositeGeometryManager::CCompositeGeometryManager() +{ + m_iMaxSize = 0; + + if (s_params) { + m_iMaxSize = s_params->memory; + m_GPUIndices = s_params->GPUIndices; + } +} + + // JOB: -// +// // VolumePart // ProjectionPart // FP-or-BP @@ -76,7 +96,6 @@ namespace astra { // (First approach: 0.5/0.5) - bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split) { split.clear(); @@ -305,37 +324,37 @@ CCompositeGeometryManager::CPart* CCompositeGeometryManager::CVolumePart::reduce static size_t ceildiv(size_t a, size_t b) { - return (a + b - 1) / b; + return (a + b - 1) / b; } static size_t computeVerticalSplit(size_t maxBlock, int div, size_t sliceCount) { - size_t blockSize = maxBlock; - size_t blockCount = ceildiv(sliceCount, blockSize); + size_t blockSize = maxBlock; + size_t blockCount = ceildiv(sliceCount, blockSize); - // Increase number of blocks to be divisible by div - size_t divCount = div * ceildiv(blockCount, div); + // Increase number of blocks to be divisible by div + size_t divCount = div * ceildiv(blockCount, div); - // If divCount is above sqrt(number of slices), then - // we can't guarantee divisibility by div, but let's try anyway - if (ceildiv(sliceCount, ceildiv(sliceCount, divCount)) % div == 0) { - blockCount = divCount; - } else { - // If divisibility isn't achievable, we may want to optimize - // differently. - // TODO: Figure out how to model and optimize this. - } + // If divCount is above sqrt(number of slices), then + // we can't guarantee divisibility by div, but let's try anyway + if (ceildiv(sliceCount, ceildiv(sliceCount, divCount)) % div == 0) { + blockCount = divCount; + } else { + // If divisibility isn't achievable, we may want to optimize + // differently. + // TODO: Figure out how to model and optimize this. + } - // Final adjustment to make blocks more evenly sized - // (This can't make the blocks larger) - blockSize = ceildiv(sliceCount, blockCount); + // Final adjustment to make blocks more evenly sized + // (This can't make the blocks larger) + blockSize = ceildiv(sliceCount, blockCount); - ASTRA_DEBUG("%ld %ld -> %ld * %ld", sliceCount, maxBlock, blockCount, blockSize); + ASTRA_DEBUG("%ld %ld -> %ld * %ld", sliceCount, maxBlock, blockCount, blockSize); - assert(blockSize <= maxBlock); - assert((divCount * divCount > sliceCount) || (blockCount % div) == 0); + assert(blockSize <= maxBlock); + assert((divCount * divCount > sliceCount) || (blockCount % div) == 0); - return blockSize; + return blockSize; } template<class V, class P> @@ -848,6 +867,260 @@ bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, const std::vector +static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter) +{ + CCompositeGeometryManager::CPart* output = iter->first; + const CCompositeGeometryManager::TJobList& L = iter->second; + + assert(!L.empty()); + + bool zero = L.begin()->eMode == CCompositeGeometryManager::SJob::MODE_SET; + + size_t outx, outy, outz; + output->getDims(outx, outy, outz); + + if (L.begin()->eType == CCompositeGeometryManager::SJob::JOB_NOP) { + // just zero output? + if (zero) { + for (size_t z = 0; z < outz; ++z) { + for (size_t y = 0; y < outy; ++y) { + float* ptr = output->pData->getData(); + ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth(); + ptr += (y + output->subY) * (size_t)output->pData->getWidth(); + ptr += output->subX; + memset(ptr, 0, sizeof(float) * outx); + } + } + } + return true; + } + + + astraCUDA3d::SSubDimensions3D dstdims; + dstdims.nx = output->pData->getWidth(); + dstdims.pitch = dstdims.nx; + dstdims.ny = output->pData->getHeight(); + dstdims.nz = output->pData->getDepth(); + dstdims.subnx = outx; + dstdims.subny = outy; + dstdims.subnz = outz; + ASTRA_DEBUG("dstdims: %d,%d,%d in %d,%d,%d", dstdims.subnx, dstdims.subny, dstdims.subnz, dstdims.nx, dstdims.ny, dstdims.nz); + dstdims.subx = output->subX; + dstdims.suby = output->subY; + dstdims.subz = output->subZ; + float *dst = output->pData->getData(); + + astraCUDA3d::MemHandle3D outputMem = astraCUDA3d::allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO); + bool ok = outputMem; + + for (CCompositeGeometryManager::TJobList::const_iterator i = L.begin(); i != L.end(); ++i) { + const CCompositeGeometryManager::SJob &j = *i; + + assert(j.pInput); + + CCudaProjector3D *projector = dynamic_cast<CCudaProjector3D*>(j.pProjector); + Cuda3DProjectionKernel projKernel = ker3d_default; + int detectorSuperSampling = 1; + int voxelSuperSampling = 1; + if (projector) { + projKernel = projector->getProjectionKernel(); + detectorSuperSampling = projector->getDetectorSuperSampling(); + voxelSuperSampling = projector->getVoxelSuperSampling(); + } + + size_t inx, iny, inz; + j.pInput->getDims(inx, iny, inz); + astraCUDA3d::MemHandle3D inputMem = astraCUDA3d::allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO); + + astraCUDA3d::SSubDimensions3D srcdims; + srcdims.nx = j.pInput->pData->getWidth(); + srcdims.pitch = srcdims.nx; + srcdims.ny = j.pInput->pData->getHeight(); + srcdims.nz = j.pInput->pData->getDepth(); + srcdims.subnx = inx; + srcdims.subny = iny; + srcdims.subnz = inz; + srcdims.subx = j.pInput->subX; + srcdims.suby = j.pInput->subY; + srcdims.subz = j.pInput->subZ; + const float *src = j.pInput->pData->getDataConst(); + + ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims); + if (!ok) ASTRA_ERROR("Error copying input data to GPU"); + + if (j.eType == CCompositeGeometryManager::SJob::JOB_FP) { + assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pInput.get())); + assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pOutput.get())); + + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FP"); + + ok = astraCUDA3d::FP(((CCompositeGeometryManager::CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CCompositeGeometryManager::CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel); + if (!ok) ASTRA_ERROR("Error performing sub-FP"); + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done"); + } else if (j.eType == CCompositeGeometryManager::SJob::JOB_BP) { + assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pOutput.get())); + assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pInput.get())); + + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing BP"); + + ok = astraCUDA3d::BP(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling); + if (!ok) ASTRA_ERROR("Error performing sub-BP"); + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done"); + } else { + assert(false); + } + + ok = astraCUDA3d::freeGPUMemory(inputMem); + if (!ok) ASTRA_ERROR("Error freeing GPU memory"); + + } + + ok = astraCUDA3d::copyFromGPUMemory(dst, outputMem, dstdims); + if (!ok) ASTRA_ERROR("Error copying output data from GPU"); + + ok = astraCUDA3d::freeGPUMemory(outputMem); + if (!ok) ASTRA_ERROR("Error freeing GPU memory"); + + return true; +} + + +class WorkQueue { +public: + WorkQueue(CCompositeGeometryManager::TJobSet &_jobs) : m_jobs(_jobs) { +#ifdef USE_PTHREADS + pthread_mutex_init(&m_mutex, 0); +#endif + m_iter = m_jobs.begin(); + } + bool receive(CCompositeGeometryManager::TJobSet::const_iterator &i) { + lock(); + + if (m_iter == m_jobs.end()) { + unlock(); + return false; + } + + i = m_iter++; + + unlock(); + + return true; + } +#ifdef USE_PTHREADS + void lock() { + // TODO: check mutex op return values + pthread_mutex_lock(&m_mutex); + } + void unlock() { + // TODO: check mutex op return values + pthread_mutex_unlock(&m_mutex); + } +#else + void lock() { + m_mutex.lock(); + } + void unlock() { + m_mutex.unlock(); + } +#endif + +private: + CCompositeGeometryManager::TJobSet &m_jobs; + CCompositeGeometryManager::TJobSet::const_iterator m_iter; +#ifdef USE_PTHREADS + pthread_mutex_t m_mutex; +#else + boost::mutex m_mutex; +#endif +}; + +struct WorkThreadInfo { + WorkQueue* m_queue; + unsigned int m_iGPU; +}; + +#ifndef USE_PTHREADS + +void runEntries_boost(WorkThreadInfo* info) +{ + ASTRA_DEBUG("Launching thread on GPU %d\n", info->m_iGPU); + CCompositeGeometryManager::TJobSet::const_iterator i; + while (info->m_queue->receive(i)) { + ASTRA_DEBUG("Running block on GPU %d\n", info->m_iGPU); + astraCUDA3d::setGPUIndex(info->m_iGPU); + boost::this_thread::interruption_point(); + doJob(i); + boost::this_thread::interruption_point(); + } + ASTRA_DEBUG("Finishing thread on GPU %d\n", info->m_iGPU); +} + + +#else + +void* runEntries_pthreads(void* data) { + WorkThreadInfo* info = (WorkThreadInfo*)data; + + ASTRA_DEBUG("Launching thread on GPU %d\n", info->m_iGPU); + + CCompositeGeometryManager::TJobSet::const_iterator i; + + while (info->m_queue->receive(i)) { + ASTRA_DEBUG("Running block on GPU %d\n", info->m_iGPU); + astraCUDA3d::setGPUIndex(info->m_iGPU); + pthread_testcancel(); + doJob(i); + pthread_testcancel(); + } + ASTRA_DEBUG("Finishing thread on GPU %d\n", info->m_iGPU); + + return 0; +} + +#endif + + +void runWorkQueue(WorkQueue &queue, const std::vector<int> & iGPUIndices) { + int iThreadCount = iGPUIndices.size(); + + std::vector<WorkThreadInfo> infos; +#ifdef USE_PTHREADS + std::vector<pthread_t> threads; +#else + std::vector<boost::thread*> threads; +#endif + infos.resize(iThreadCount); + threads.resize(iThreadCount); + + for (int i = 0; i < iThreadCount; ++i) { + infos[i].m_queue = &queue; + infos[i].m_iGPU = iGPUIndices[i]; +#ifdef USE_PTHREADS + pthread_create(&threads[i], 0, runEntries_pthreads, (void*)&infos[i]); +#else + threads[i] = new boost::thread(runEntries_boost, &infos[i]); +#endif + } + + // Wait for them to finish + for (int i = 0; i < iThreadCount; ++i) { +#ifdef USE_PTHREADS + pthread_join(threads[i], 0); +#else + threads[i]->join(); + delete threads[i]; + threads[i] = 0; +#endif + } +} + + +void CCompositeGeometryManager::setGPUIndices(const std::vector<int>& GPUIndices) +{ + m_GPUIndices = GPUIndices; +} + bool CCompositeGeometryManager::doJobs(TJobList &jobs) { ASTRA_DEBUG("CCompositeGeometryManager::doJobs"); @@ -859,140 +1132,53 @@ bool CCompositeGeometryManager::doJobs(TJobList &jobs) jobset[i->pOutput.get()].push_back(*i); } - size_t maxSize = astraCUDA3d::availableGPUMemory(); + size_t maxSize = m_iMaxSize; if (maxSize == 0) { - ASTRA_WARN("Unable to get available GPU memory. Defaulting to 1GB."); - maxSize = 1024 * 1024 * 1024; + // Get memory from first GPU. Not optimal... + if (!m_GPUIndices.empty()) + astraCUDA3d::setGPUIndex(m_GPUIndices[0]); + maxSize = astraCUDA3d::availableGPUMemory(); + if (maxSize == 0) { + ASTRA_WARN("Unable to get available GPU memory. Defaulting to 1GB."); + maxSize = 1024 * 1024 * 1024; + } else { + ASTRA_DEBUG("Detected %lu bytes of GPU memory", maxSize); + } } else { - ASTRA_DEBUG("Detected %lu bytes of GPU memory", maxSize); + ASTRA_DEBUG("Set to %lu bytes of GPU memory", maxSize); } maxSize = (maxSize * 9) / 10; maxSize /= sizeof(float); int div = 1; - - // TODO: Multi-GPU support + if (!m_GPUIndices.empty()) + div = m_GPUIndices.size(); // Split jobs to fit TJobSet split; splitJobs(jobset, maxSize, div, split); jobset.clear(); - // Run jobs - - for (TJobSet::iterator iter = split.begin(); iter != split.end(); ++iter) { - - CPart* output = iter->first; - TJobList& L = iter->second; - - assert(!L.empty()); + if (m_GPUIndices.size() <= 1) { - bool zero = L.begin()->eMode == SJob::MODE_SET; + // Run jobs + ASTRA_DEBUG("Running single-threaded"); - size_t outx, outy, outz; - output->getDims(outx, outy, outz); + if (!m_GPUIndices.empty()) + astraCUDA3d::setGPUIndex(m_GPUIndices[0]); - if (L.begin()->eType == SJob::JOB_NOP) { - // just zero output? - if (zero) { - for (size_t z = 0; z < outz; ++z) { - for (size_t y = 0; y < outy; ++y) { - float* ptr = output->pData->getData(); - ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth(); - ptr += (y + output->subY) * (size_t)output->pData->getWidth(); - ptr += output->subX; - memset(ptr, 0, sizeof(float) * outx); - } - } - } - continue; + for (TJobSet::const_iterator iter = split.begin(); iter != split.end(); ++iter) { + doJob(iter); } + } else { - astraCUDA3d::SSubDimensions3D dstdims; - dstdims.nx = output->pData->getWidth(); - dstdims.pitch = dstdims.nx; - dstdims.ny = output->pData->getHeight(); - dstdims.nz = output->pData->getDepth(); - dstdims.subnx = outx; - dstdims.subny = outy; - dstdims.subnz = outz; - ASTRA_DEBUG("dstdims: %d,%d,%d in %d,%d,%d", dstdims.subnx, dstdims.subny, dstdims.subnz, dstdims.nx, dstdims.ny, dstdims.nz); - dstdims.subx = output->subX; - dstdims.suby = output->subY; - dstdims.subz = output->subZ; - float *dst = output->pData->getData(); - - astraCUDA3d::MemHandle3D outputMem = astraCUDA3d::allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO); - bool ok = outputMem; - - for (TJobList::iterator i = L.begin(); i != L.end(); ++i) { - SJob &j = *i; - - assert(j.pInput); - - CCudaProjector3D *projector = dynamic_cast<CCudaProjector3D*>(j.pProjector); - Cuda3DProjectionKernel projKernel = ker3d_default; - int detectorSuperSampling = 1; - int voxelSuperSampling = 1; - if (projector) { - projKernel = projector->getProjectionKernel(); - detectorSuperSampling = projector->getDetectorSuperSampling(); - voxelSuperSampling = projector->getVoxelSuperSampling(); - } - - size_t inx, iny, inz; - j.pInput->getDims(inx, iny, inz); - astraCUDA3d::MemHandle3D inputMem = astraCUDA3d::allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO); - - astraCUDA3d::SSubDimensions3D srcdims; - srcdims.nx = j.pInput->pData->getWidth(); - srcdims.pitch = srcdims.nx; - srcdims.ny = j.pInput->pData->getHeight(); - srcdims.nz = j.pInput->pData->getDepth(); - srcdims.subnx = inx; - srcdims.subny = iny; - srcdims.subnz = inz; - srcdims.subx = j.pInput->subX; - srcdims.suby = j.pInput->subY; - srcdims.subz = j.pInput->subZ; - const float *src = j.pInput->pData->getDataConst(); - - ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims); - if (!ok) ASTRA_ERROR("Error copying input data to GPU"); - - if (j.eType == SJob::JOB_FP) { - assert(dynamic_cast<CVolumePart*>(j.pInput.get())); - assert(dynamic_cast<CProjectionPart*>(j.pOutput.get())); - - ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FP"); - - ok = astraCUDA3d::FP(((CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel); - if (!ok) ASTRA_ERROR("Error performing sub-FP"); - ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done"); - } else if (j.eType == SJob::JOB_BP) { - assert(dynamic_cast<CVolumePart*>(j.pOutput.get())); - assert(dynamic_cast<CProjectionPart*>(j.pInput.get())); - - ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing BP"); - - ok = astraCUDA3d::BP(((CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling); - if (!ok) ASTRA_ERROR("Error performing sub-BP"); - ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done"); - } else { - assert(false); - } + ASTRA_DEBUG("Running multi-threaded"); - ok = astraCUDA3d::freeGPUMemory(inputMem); - if (!ok) ASTRA_ERROR("Error freeing GPU memory"); + WorkQueue wq(split); - } + runWorkQueue(wq, m_GPUIndices); - ok = astraCUDA3d::copyFromGPUMemory(dst, outputMem, dstdims); - if (!ok) ASTRA_ERROR("Error copying output data from GPU"); - - ok = astraCUDA3d::freeGPUMemory(outputMem); - if (!ok) ASTRA_ERROR("Error freeing GPU memory"); } return true; @@ -1000,6 +1186,26 @@ bool CCompositeGeometryManager::doJobs(TJobList &jobs) + +//static +void CCompositeGeometryManager::setGlobalGPUParams(const SGPUParams& params) +{ + delete s_params; + + s_params = new SGPUParams; + *s_params = params; + + ASTRA_DEBUG("CompositeGeometryManager: Setting global GPU params:"); + std::ostringstream s; + s << "GPU indices:"; + for (unsigned int i = 0; i < params.GPUIndices.size(); ++i) + s << " " << params.GPUIndices[i]; + std::string ss = s.str(); + ASTRA_DEBUG(ss.c_str()); + ASTRA_DEBUG("Memory: %llu", params.memory); +} + + } #endif diff --git a/src/ConeProjectionGeometry3D.cpp b/src/ConeProjectionGeometry3D.cpp index 99b4bf4..96b04fb 100644 --- a/src/ConeProjectionGeometry3D.cpp +++ b/src/ConeProjectionGeometry3D.cpp @@ -256,9 +256,6 @@ void CConeProjectionGeometry3D::projectPoint(double fX, double fY, double fZ, // Scale fS to detector plane fU = detectorOffsetXToColIndexFloat( (fS * (m_fOriginSourceDistance + m_fOriginDetectorDistance)) / fD ); - - ASTRA_DEBUG("alpha: %f, D: %f, V: %f, S: %f, U: %f", alpha, fD, fV, fS, fU); - } void CConeProjectionGeometry3D::backprojectPointX(int iAngleIndex, double fU, double fV, |