summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authordkazanc <dkazanc@hotmail.com>2019-03-07 17:52:57 +0000
committerdkazanc <dkazanc@hotmail.com>2019-03-07 17:52:57 +0000
commit47693d15132130513f8d0f74fd4831a3bbf69159 (patch)
tree98094bf413ffd608b0632e01195eec6cfbc8ff55 /src
parentcfcc4be4413f65a0b9c4ef197687e3a167eff0e8 (diff)
downloadregularization-47693d15132130513f8d0f74fd4831a3bbf69159.tar.gz
regularization-47693d15132130513f8d0f74fd4831a3bbf69159.tar.bz2
regularization-47693d15132130513f8d0f74fd4831a3bbf69159.tar.xz
regularization-47693d15132130513f8d0f74fd4831a3bbf69159.zip
matlab cmake fixed, rof tv eps
Diffstat (limited to 'src')
-rw-r--r--src/Core/regularisers_CPU/FGP_TV_core.c21
-rw-r--r--src/Core/regularisers_CPU/ROF_TV_core.c9
-rwxr-xr-xsrc/Core/regularisers_GPU/TV_FGP_GPU_core.cu71
-rwxr-xr-xsrc/Core/regularisers_GPU/TV_FGP_GPU_core.h2
-rwxr-xr-xsrc/Core/regularisers_GPU/TV_ROF_GPU_core.cu156
-rwxr-xr-xsrc/Core/regularisers_GPU/TV_ROF_GPU_core.h2
-rwxr-xr-xsrc/Core/regularisers_GPU/TV_SB_GPU_core.cu30
-rw-r--r--src/Core/regularisers_GPU/dTV_FGP_GPU_core.cu30
-rw-r--r--src/Core/regularisers_GPU/shared.h9
-rwxr-xr-xsrc/Matlab/CMakeLists.txt6
-rw-r--r--src/Python/ccpi/filters/regularisers.py6
-rw-r--r--src/Python/setup-regularisers.py.in2
-rw-r--r--src/Python/src/gpu_regularisers.pyx72
13 files changed, 255 insertions, 161 deletions
diff --git a/src/Core/regularisers_CPU/FGP_TV_core.c b/src/Core/regularisers_CPU/FGP_TV_core.c
index 3c3cbd4..a6bb7e0 100644
--- a/src/Core/regularisers_CPU/FGP_TV_core.c
+++ b/src/Core/regularisers_CPU/FGP_TV_core.c
@@ -86,15 +86,17 @@ float TV_FGP_CPU_main(float *Input, float *Output, float *infovector, float lamb
/* check early stopping criteria */
if (epsil != 0.0f) {
+ re = 0.0f; re1 = 0.0f;
for(j=0; j<DimTotal; j++)
{
- re += pow(Output[j] - Output_prev[j],2);
- re1 += pow(Output[j],2);
+ re += powf(Output[j] - Output_prev[j],2);
+ re1 += powf(Output[j],2);
}
- re = sqrt(re)/sqrt(re1);
- if (re < epsil) count++;
- if (count > 4) break;
- copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
+ re = sqrtf(re)/sqrtf(re1);
+ if (re < epsil) count++;
+ if (count > 4) break;
+
+ copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), 1l);
}
}
if (epsil != 0.0f) free(Output_prev);
@@ -137,12 +139,13 @@ float TV_FGP_CPU_main(float *Input, float *Output, float *infovector, float lamb
/* calculate norm - stopping rules*/
if (epsil != 0.0f) {
+ re = 0.0f; re1 = 0.0f;
for(j=0; j<DimTotal; j++)
{
- re += pow(Output[j] - Output_prev[j],2);
- re1 += pow(Output[j],2);
+ re += powf(Output[j] - Output_prev[j],2);
+ re1 += powf(Output[j],2);
}
- re = sqrt(re)/sqrt(re1);
+ re = sqrtf(re)/sqrtf(re1);
/* stop if the norm residual is less than the tolerance EPS */
if (re < epsil) count++;
if (count > 4) break;
diff --git a/src/Core/regularisers_CPU/ROF_TV_core.c b/src/Core/regularisers_CPU/ROF_TV_core.c
index 1b7cf76..a7291d7 100644
--- a/src/Core/regularisers_CPU/ROF_TV_core.c
+++ b/src/Core/regularisers_CPU/ROF_TV_core.c
@@ -19,7 +19,7 @@
#include "ROF_TV_core.h"
-#define EPS 1.0e-15
+#define EPS 1.0e-8
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
@@ -76,12 +76,13 @@ float TV_ROF_CPU_main(float *Input, float *Output, float *infovector, float lamb
/* check early stopping criteria */
if (epsil != 0.0f) {
+ re = 0.0f; re1 = 0.0f;
for(j=0; j<DimTotal; j++)
{
- re += pow(Output[j] - Output_prev[j],2);
- re1 += pow(Output[j],2);
+ re += powf(Output[j] - Output_prev[j],2);
+ re1 += powf(Output[j],2);
}
- re = sqrt(re)/sqrt(re1);
+ re = sqrtf(re)/sqrtf(re1);
if (re < epsil) count++;
if (count > 4) break;
copyIm(Output, Output_prev, (long)(dimX), (long)(dimY), (long)(dimZ));
diff --git a/src/Core/regularisers_GPU/TV_FGP_GPU_core.cu b/src/Core/regularisers_GPU/TV_FGP_GPU_core.cu
index b371c5d..96b1173 100755
--- a/src/Core/regularisers_GPU/TV_FGP_GPU_core.cu
+++ b/src/Core/regularisers_GPU/TV_FGP_GPU_core.cu
@@ -19,6 +19,7 @@ limitations under the License.
#include "TV_FGP_GPU_core.h"
#include "shared.h"
+#include <thrust/functional.h>
#include <thrust/device_vector.h>
#include <thrust/transform_reduce.h>
@@ -31,10 +32,10 @@ limitations under the License.
* 4. eplsilon: tolerance constant
* 5. TV-type: methodTV - 'iso' (0) or 'l1' (1)
* 6. nonneg: 'nonnegativity (0 is OFF by default)
- * 7. print information: 0 (off) or 1 (on)
*
* Output:
- * [1] Filtered/regularized image
+ * [1] Filtered/regularized image/volume
+ * [2] Information vector which contains [iteration no., reached tolerance]
*
* This function is based on the Matlab's code and paper by
* [1] Amir Beck and Marc Teboulle, "Fast Gradient-Based Algorithms for Constrained Total Variation Image Denoising and Deblurring Problems"
@@ -49,7 +50,7 @@ limitations under the License.
#define BLKZSIZE 8
#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
-struct square { __host__ __device__ float operator()(float x) { return x * x; } };
+// struct square { __host__ __device__ float operator()(float x) { return x * x; } };
/************************************************/
/*****************2D modules*********************/
@@ -343,7 +344,7 @@ __global__ void FGPResidCalc3D_kernel(float *Input1, float *Input2, float* Outpu
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
////////////MAIN HOST FUNCTION ///////////////
-extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ)
+extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float *infovector, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int dimX, int dimY, int dimZ)
{
int deviceCount = -1; // number of devices
cudaGetDeviceCount(&deviceCount);
@@ -354,20 +355,21 @@ extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int
int count = 0, i;
float re, multip,multip2;
- float tk = 1.0f;
+ re = 0.0f;
+ float tk = 1.0f;
float tkp1=1.0f;
if (dimZ <= 1) {
/*2D verson*/
- int ImSize = dimX*dimY;
- float *d_input, *d_update=NULL, *d_update_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL;
+ int ImSize = dimX*dimY;
+ float *d_input, *d_update=NULL, *d_update_prev=NULL, *P1=NULL, *P2=NULL, *P1_prev=NULL, *P2_prev=NULL, *R1=NULL, *R2=NULL;
- dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
- dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
+ dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+ dim3 dimGrid(idivup(dimX,BLKXSIZE2D), idivup(dimY,BLKYSIZE2D));
/*allocate space for images on device*/
- checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) );
- checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&d_input,ImSize*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&d_update,ImSize*sizeof(float)) );
if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) );
checkCudaErrors( cudaMalloc((void**)&P1,ImSize*sizeof(float)) );
checkCudaErrors( cudaMalloc((void**)&P2,ImSize*sizeof(float)) );
@@ -416,40 +418,43 @@ extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int
Rupd_func2D_kernel<<<dimGrid,dimBlock>>>(P1, P1_prev, P2, P2_prev, R1, R2, tkp1, tk, multip2, dimX, dimY, ImSize);
checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
+ FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+
+ tk = tkp1;
+
if (epsil != 0.0f) {
/* calculate norm - stopping rules using the Thrust library */
- FGPResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1_prev, dimX, dimY, ImSize);
+ FGPResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, P1, dimX, dimY, ImSize);
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
- thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize);
- float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
+ // setup arguments
+ square<float> unary_op;
+ thrust::plus<float> binary_op;
+ thrust::device_vector<float> d_vec(P1, P1 + ImSize);
+ float reduction = std::sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), unary_op, 0.0f, binary_op));
thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);
- float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
-
+ float reduction2 = std::sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), unary_op, 0.0f, binary_op));
+
+ // compute norm
re = (reduction/reduction2);
if (re < epsil) count++;
- if (count > 4) break;
+ if (count > 4) break;
FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, ImSize);
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
- }
-
- FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(P1, P1_prev, dimX, dimY, ImSize);
- checkCudaErrors( cudaDeviceSynchronize() );
- checkCudaErrors(cudaPeekAtLastError() );
-
- FGPcopy_kernel2D<<<dimGrid,dimBlock>>>(P2, P2_prev, dimX, dimY, ImSize);
- checkCudaErrors( cudaDeviceSynchronize() );
- checkCudaErrors(cudaPeekAtLastError() );
-
- tk = tkp1;
- }
- if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i);
- /***************************************************************/
+ }
+
+ }
//copy result matrix from device to host memory
cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost);
@@ -542,7 +547,6 @@ extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int
tk = tkp1;
}
- if (printM == 1) printf("FGP-TV iterations stopped at iteration %i \n", i);
/***************************************************************/
//copy result matrix from device to host memory
cudaMemcpy(Output,d_update,ImSize*sizeof(float),cudaMemcpyDeviceToHost);
@@ -560,5 +564,8 @@ extern "C" int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int
cudaFree(R3);
}
//cudaDeviceReset();
+ /*adding info into info_vector */
+ infovector[0] = (float)(i); /*iterations number (if stopped earlier based on tolerance)*/
+ infovector[1] = re; /* reached tolerance */
return 0;
}
diff --git a/src/Core/regularisers_GPU/TV_FGP_GPU_core.h b/src/Core/regularisers_GPU/TV_FGP_GPU_core.h
index bf13508..597b6c6 100755
--- a/src/Core/regularisers_GPU/TV_FGP_GPU_core.h
+++ b/src/Core/regularisers_GPU/TV_FGP_GPU_core.h
@@ -4,6 +4,6 @@
#include "CCPiDefines.h"
#include <memory.h>
-extern "C" CCPI_EXPORT int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int dimX, int dimY, int dimZ);
+extern "C" CCPI_EXPORT int TV_FGP_GPU_main(float *Input, float *Output, float *infovector, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int dimX, int dimY, int dimZ);
#endif
diff --git a/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu b/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu
index 76f5be9..97f2351 100755
--- a/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu
+++ b/src/Core/regularisers_GPU/TV_ROF_GPU_core.cu
@@ -3,8 +3,8 @@ This work is part of the Core Imaging Library developed by
Visual Analytics and Imaging System Group of the Science Technology
Facilities Council, STFC
-Copyright 2017 Daniil Kazantsev
-Copyright 2017 Srikanth Nagella, Edoardo Pasca
+Copyright 2019 Daniil Kazantsev
+Copyright 2019 Srikanth Nagella, Edoardo Pasca
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -18,6 +18,10 @@ limitations under the License.
*/
#include "TV_ROF_GPU_core.h"
+#include "shared.h"
+#include <thrust/functional.h>
+#include <thrust/device_vector.h>
+#include <thrust/transform_reduce.h>
/* C-OMP implementation of ROF-TV denoising/regularization model [1] (2D/3D case)
*
@@ -26,16 +30,16 @@ limitations under the License.
* 2. lambda - regularization parameter [REQUIRED]
* 3. tau - marching step for explicit scheme, ~0.1 is recommended [REQUIRED]
* 4. Number of iterations, for explicit scheme >= 150 is recommended [REQUIRED]
-*
-* Output:
-* [1] Regularized image/volume
+* 5. eplsilon: tolerance constant
+ * Output:
+ * [1] Filtered/regularized image/volume
+ * [2] Information vector which contains [iteration no., reached tolerance]
+
+
* This function is based on the paper by
* [1] Rudin, Osher, Fatemi, "Nonlinear Total Variation based noise removal algorithms"
-*
-* D. Kazantsev, 2016-18
*/
-#include "shared.h"
#define BLKXSIZE 8
#define BLKYSIZE 8
@@ -43,7 +47,7 @@ limitations under the License.
#define BLKXSIZE2D 16
#define BLKYSIZE2D 16
-#define EPS 1.0e-12
+#define EPS 1.0e-8
#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
@@ -136,7 +140,7 @@ __host__ __device__ int sign (float x)
dv1 = D1[index] - D1[j2*N + i];
dv2 = D2[index] - D2[j*N + i2];
- Update[index] += tau*(2.0f*lambda*(dv1 + dv2) - (Update[index] - Input[index]));
+ Update[index] += tau*(lambda*(dv1 + dv2) - (Update[index] - Input[index]));
}
}
@@ -286,42 +290,116 @@ __host__ __device__ int sign (float x)
dv2 = D2[index] - D2[(dimX*dimY)*k + j*dimX+i2];
dv3 = D3[index] - D3[(dimX*dimY)*k2 + j*dimX+i];
- Update[index] += tau*(2.0f*lambda*(dv1 + dv2 + dv3) - (Update[index] - Input[index]));
+ Update[index] += tau*(lambda*(dv1 + dv2 + dv3) - (Update[index] - Input[index]));
}
}
+__global__ void ROFcopy_kernel2D(float *Input, float* Output, int N, int M, int num_total)
+{
+ int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+ int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if (index < num_total) {
+ Output[index] = Input[index];
+ }
+}
+
+
+__global__ void ROFResidCalc2D_kernel(float *Input1, float *Input2, float* Output, int N, int M, int num_total)
+{
+ int xIndex = blockDim.x * blockIdx.x + threadIdx.x;
+ int yIndex = blockDim.y * blockIdx.y + threadIdx.y;
+
+ int index = xIndex + N*yIndex;
+
+ if (index < num_total) {
+ Output[index] = Input1[index] - Input2[index];
+ }
+}
/////////////////////////////////////////////////
-// HOST FUNCTION
-extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z)
+///////////////// HOST FUNCTION /////////////////
+extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float lambdaPar, int iter, float tau, float epsil, int N, int M, int Z)
{
- // set up device
- int dev = 0;
- CHECK(cudaSetDevice(dev));
- float *d_input, *d_update, *d_D1, *d_D2;
+ int deviceCount = -1; // number of devices
+ cudaGetDeviceCount(&deviceCount);
+ if (deviceCount == 0) {
+ fprintf(stderr, "No CUDA devices found\n");
+ return -1;
+ }
+ float re;
+ re = 0.0f;
+ int ImSize, count, n;
+ count = 0; n = 0;
+ float *d_input, *d_update, *d_D1, *d_D2, *d_update_prev=NULL;
if (Z == 0) Z = 1;
- CHECK(cudaMalloc((void**)&d_input,N*M*Z*sizeof(float)));
- CHECK(cudaMalloc((void**)&d_update,N*M*Z*sizeof(float)));
- CHECK(cudaMalloc((void**)&d_D1,N*M*Z*sizeof(float)));
- CHECK(cudaMalloc((void**)&d_D2,N*M*Z*sizeof(float)));
+ ImSize = N*M*Z;
+ CHECK(cudaMalloc((void**)&d_input,ImSize*sizeof(float)));
+ CHECK(cudaMalloc((void**)&d_update,ImSize*sizeof(float)));
+ CHECK(cudaMalloc((void**)&d_D1,ImSize*sizeof(float)));
+ CHECK(cudaMalloc((void**)&d_D2,ImSize*sizeof(float)));
+ if (epsil != 0.0f) checkCudaErrors( cudaMalloc((void**)&d_update_prev,ImSize*sizeof(float)) );
- CHECK(cudaMemcpy(d_input,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
- CHECK(cudaMemcpy(d_update,Input,N*M*Z*sizeof(float),cudaMemcpyHostToDevice));
+ CHECK(cudaMemcpy(d_input,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
+ CHECK(cudaMemcpy(d_update,Input,ImSize*sizeof(float),cudaMemcpyHostToDevice));
- if (Z > 1) {
- // TV - 3D case
+ if (Z == 1) {
+ // TV - 2D case
+ dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
+ dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D));
+
+ for(n=0; n < iter; n++) {
+ /* calculate differences */
+ D1_func2D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M);
+ CHECK(cudaDeviceSynchronize());
+ D2_func2D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M);
+ CHECK(cudaDeviceSynchronize());
+ /*running main kernel*/
+ TV_kernel2D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_update, d_input, lambdaPar, tau, N, M);
+ CHECK(cudaDeviceSynchronize());
+
+ if (epsil != 0.0f) {
+ /* calculate norm - stopping rules using the Thrust library */
+ ROFResidCalc2D_kernel<<<dimGrid,dimBlock>>>(d_update, d_update_prev, d_D1, N, M, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors( cudaPeekAtLastError() );
+
+ // setup arguments
+ square<float> unary_op;
+ thrust::plus<float> binary_op;
+ thrust::device_vector<float> d_vec(d_D1, d_D1 + ImSize);
+ float reduction = std::sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), unary_op, 0.0f, binary_op));
+ thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);
+ float reduction2 = std::sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), unary_op, 0.0f, binary_op));
+
+ // compute norm
+ re = (reduction/reduction2);
+ if (re < epsil) count++;
+ if (count > 4) break;
+
+ ROFcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, N, M, ImSize);
+ checkCudaErrors( cudaDeviceSynchronize() );
+ checkCudaErrors(cudaPeekAtLastError() );
+ }
+
+ }
+ }
+ else {
+ // TV - 3D case
dim3 dimBlock(BLKXSIZE,BLKYSIZE,BLKZSIZE);
dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE),idivup(Z,BLKXSIZE));
float *d_D3;
CHECK(cudaMalloc((void**)&d_D3,N*M*Z*sizeof(float)));
- for(int n=0; n < iter; n++) {
+ for(n=0; n < iter; n++) {
/* calculate differences */
D1_func3D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M, Z);
CHECK(cudaDeviceSynchronize());
- D2_func3D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M, Z);
+ D2_func3D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M, Z);
CHECK(cudaDeviceSynchronize());
D3_func3D<<<dimGrid,dimBlock>>>(d_update, d_D3, N, M, Z);
CHECK(cudaDeviceSynchronize());
@@ -331,28 +409,16 @@ extern "C" int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int
}
CHECK(cudaFree(d_D3));
- }
- else {
- // TV - 2D case
- dim3 dimBlock(BLKXSIZE2D,BLKYSIZE2D);
- dim3 dimGrid(idivup(N,BLKXSIZE2D), idivup(M,BLKYSIZE2D));
-
- for(int n=0; n < iter; n++) {
- /* calculate differences */
- D1_func2D<<<dimGrid,dimBlock>>>(d_update, d_D1, N, M);
- CHECK(cudaDeviceSynchronize());
- D2_func2D<<<dimGrid,dimBlock>>>(d_update, d_D2, N, M);
- CHECK(cudaDeviceSynchronize());
- /*running main kernel*/
- TV_kernel2D<<<dimGrid,dimBlock>>>(d_D1, d_D2, d_update, d_input, lambdaPar, tau, N, M);
- CHECK(cudaDeviceSynchronize());
- }
- }
+ }
CHECK(cudaMemcpy(Output,d_update,N*M*Z*sizeof(float),cudaMemcpyDeviceToHost));
+ if (epsil != 0.0f) cudaFree(d_update_prev);
CHECK(cudaFree(d_input));
CHECK(cudaFree(d_update));
CHECK(cudaFree(d_D1));
CHECK(cudaFree(d_D2));
- //cudaDeviceReset();
+
+ infovector[0] = (float)(n); /*iterations number (if stopped earlier based on tolerance)*/
+ infovector[1] = re; /* reached tolerance */
+
return 0;
}
diff --git a/src/Core/regularisers_GPU/TV_ROF_GPU_core.h b/src/Core/regularisers_GPU/TV_ROF_GPU_core.h
index 3a09296..0a75124 100755
--- a/src/Core/regularisers_GPU/TV_ROF_GPU_core.h
+++ b/src/Core/regularisers_GPU/TV_ROF_GPU_core.h
@@ -3,6 +3,6 @@
#include "CCPiDefines.h"
#include <stdio.h>
-extern "C" CCPI_EXPORT int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
+extern "C" CCPI_EXPORT int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float lambdaPar, int iter, float tau, float epsil, int N, int M, int Z);
#endif
diff --git a/src/Core/regularisers_GPU/TV_SB_GPU_core.cu b/src/Core/regularisers_GPU/TV_SB_GPU_core.cu
index 1f494ee..c31f104 100755
--- a/src/Core/regularisers_GPU/TV_SB_GPU_core.cu
+++ b/src/Core/regularisers_GPU/TV_SB_GPU_core.cu
@@ -49,7 +49,7 @@ limitations under the License.
#define BLKZSIZE 8
#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
-struct square { __host__ __device__ float operator()(float x) { return x * x; } };
+// struct square { __host__ __device__ float operator()(float x) { return x * x; } };
/************************************************/
/*****************2D modules*********************/
@@ -430,14 +430,14 @@ extern "C" int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, f
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
- thrust::device_vector<float> d_vec(d_res, d_res + DimTotal);
- float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
- thrust::device_vector<float> d_vec2(d_update, d_update + DimTotal);
- float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
+// thrust::device_vector<float> d_vec(d_res, d_res + DimTotal);
+ // float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
+ // thrust::device_vector<float> d_vec2(d_update, d_update + DimTotal);
+ // float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
- re = (reduction/reduction2);
- if (re < epsil) count++;
- if (count > 4) break;
+ // re = (reduction/reduction2);
+ // if (re < epsil) count++;
+ // if (count > 4) break;
}
}
@@ -521,14 +521,14 @@ extern "C" int TV_SB_GPU_main(float *Input, float *Output, float mu, int iter, f
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
- thrust::device_vector<float> d_vec(d_res, d_res + DimTotal);
- float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
- thrust::device_vector<float> d_vec2(d_update, d_update + DimTotal);
- float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
+ // thrust::device_vector<float> d_vec(d_res, d_res + DimTotal);
+ // float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
+ // thrust::device_vector<float> d_vec2(d_update, d_update + DimTotal);
+// float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
- re = (reduction/reduction2);
- if (re < epsil) count++;
- if (count > 4) break;
+ // re = (reduction/reduction2);
+ // if (re < epsil) count++;
+ // if (count > 4) break;
}
}
if (printM == 1) printf("SB-TV iterations stopped at iteration %i \n", ll);
diff --git a/src/Core/regularisers_GPU/dTV_FGP_GPU_core.cu b/src/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
index 7503ec7..9db594e 100644
--- a/src/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
+++ b/src/Core/regularisers_GPU/dTV_FGP_GPU_core.cu
@@ -53,7 +53,7 @@ limitations under the License.
#define BLKZSIZE 8
#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
-struct square { __host__ __device__ float operator()(float x) { return x * x; } };
+//struct square { __host__ __device__ float operator()(float x) { return x * x; } };
/************************************************/
/*****************2D modules*********************/
@@ -552,14 +552,14 @@ extern "C" int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, fl
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
- thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize);
- float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
- thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);
- float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
+ // thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize);
+ // float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
+ // thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);
+ // float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
- re = (reduction/reduction2);
- if (re < epsil) count++;
- if (count > 4) break;
+ // re = (reduction/reduction2);
+ // if (re < epsil) count++;
+ // if (count > 4) break;
dTVcopy_kernel2D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, ImSize);
checkCudaErrors( cudaDeviceSynchronize() );
@@ -686,14 +686,14 @@ extern "C" int dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, fl
checkCudaErrors( cudaDeviceSynchronize() );
checkCudaErrors(cudaPeekAtLastError() );
- thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize);
- float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
- thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);
- float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
+ // thrust::device_vector<float> d_vec(P1_prev, P1_prev + ImSize);
+ // float reduction = sqrt(thrust::transform_reduce(d_vec.begin(), d_vec.end(), square(), 0.0f, thrust::plus<float>()));
+ // thrust::device_vector<float> d_vec2(d_update, d_update + ImSize);
+ // float reduction2 = sqrt(thrust::transform_reduce(d_vec2.begin(), d_vec2.end(), square(), 0.0f, thrust::plus<float>()));
- re = (reduction/reduction2);
- if (re < epsil) count++;
- if (count > 4) break;
+ // re = (reduction/reduction2);
+ // if (re < epsil) count++;
+ // if (count > 4) break;
dTVcopy_kernel3D<<<dimGrid,dimBlock>>>(d_update, d_update_prev, dimX, dimY, dimZ, ImSize);
checkCudaErrors( cudaDeviceSynchronize() );
diff --git a/src/Core/regularisers_GPU/shared.h b/src/Core/regularisers_GPU/shared.h
index fe98cd6..698dddd 100644
--- a/src/Core/regularisers_GPU/shared.h
+++ b/src/Core/regularisers_GPU/shared.h
@@ -1,4 +1,13 @@
/*shared macros*/
+template <typename T>
+struct square
+{
+ __host__ __device__
+ T operator()(const T& x) const {
+ return (float)(x*x);
+ }
+};
+
/*checks CUDA call, should be used in functions returning <int> value
diff --git a/src/Matlab/CMakeLists.txt b/src/Matlab/CMakeLists.txt
index b97f845..6c5e6be 100755
--- a/src/Matlab/CMakeLists.txt
+++ b/src/Matlab/CMakeLists.txt
@@ -38,9 +38,9 @@ find_package(Matlab REQUIRED COMPONENTS MAIN_PROGRAM MX_LIBRARY ENG_LIBRARY )
#set (MEX_TARGETS "CPU_TNV;CPU_ROF")
#list(APPEND MEX_TARGETS "CPU_TNV")
#list(APPEND MEX_TARGETS "CPU_ROF")
-
+
file(GLOB CPU_MEX_FILES
- "${CMAKE_SOURCE_DIR}/Matlab/mex_compile/regularisers_CPU/*.c"
+ "${CMAKE_SOURCE_DIR}/src/Matlab/mex_compile/regularisers_CPU/*.c"
#"${CMAKE_SOURCE_DIR}/Matlab/mex_compile/regularisers_GPU/*.c"
)
@@ -101,7 +101,7 @@ if (BUILD_CUDA)
find_package(CUDA)
if (CUDA_FOUND)
file(GLOB GPU_MEX_FILES
- "${CMAKE_SOURCE_DIR}/Matlab/mex_compile/regularisers_GPU/*.cpp"
+ "${CMAKE_SOURCE_DIR}/src/Matlab/mex_compile/regularisers_GPU/*.cpp"
)
list(LENGTH GPU_MEX_FILES num)
diff --git a/src/Python/ccpi/filters/regularisers.py b/src/Python/ccpi/filters/regularisers.py
index 67f432b..05120af 100644
--- a/src/Python/ccpi/filters/regularisers.py
+++ b/src/Python/ccpi/filters/regularisers.py
@@ -22,7 +22,8 @@ def ROF_TV(inputData, regularisation_parameter, iterations,
return TV_ROF_GPU(inputData,
regularisation_parameter,
iterations,
- time_marching_parameter)
+ time_marching_parameter,
+ tolerance_param)
else:
if not gpu_enabled and device == 'gpu':
raise ValueError ('GPU is not available')
@@ -44,8 +45,7 @@ def FGP_TV(inputData, regularisation_parameter,iterations,
iterations,
tolerance_param,
methodTV,
- nonneg,
- 1)
+ nonneg)
else:
if not gpu_enabled and device == 'gpu':
raise ValueError ('GPU is not available')
diff --git a/src/Python/setup-regularisers.py.in b/src/Python/setup-regularisers.py.in
index 39b820a..4c578e3 100644
--- a/src/Python/setup-regularisers.py.in
+++ b/src/Python/setup-regularisers.py.in
@@ -68,7 +68,7 @@ setup(
],
zip_safe = False,
- packages = {'ccpi','ccpi.filters', 'ccpi.supp'},
+ packages = {'ccpi', 'ccpi.filters', 'ccpi.supp'},
)
diff --git a/src/Python/src/gpu_regularisers.pyx b/src/Python/src/gpu_regularisers.pyx
index b52f669..db4fa67 100644
--- a/src/Python/src/gpu_regularisers.pyx
+++ b/src/Python/src/gpu_regularisers.pyx
@@ -20,8 +20,8 @@ cimport numpy as np
CUDAErrorMessage = 'CUDA error'
-cdef extern int TV_ROF_GPU_main(float* Input, float* Output, float lambdaPar, int iter, float tau, int N, int M, int Z);
-cdef extern int TV_FGP_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int printM, int N, int M, int Z);
+cdef extern int TV_ROF_GPU_main(float* Input, float* Output, float *infovector, float lambdaPar, int iter, float tau, float epsil, int N, int M, int Z);
+cdef extern int TV_FGP_GPU_main(float *Input, float *Output, float *infovector, float lambdaPar, int iter, float epsil, int methodTV, int nonneg, int N, int M, int Z);
cdef extern int TV_SB_GPU_main(float *Input, float *Output, float lambdaPar, int iter, float epsil, int methodTV, int printM, int N, int M, int Z);
cdef extern int TGV_GPU_main(float *Input, float *Output, float lambdaPar, float alpha1, float alpha0, int iterationsNumb, float L2, int dimX, int dimY, int dimZ);
cdef extern int LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, float lambdaLLT, int iterationsNumb, float tau, int N, int M, int Z);
@@ -34,17 +34,20 @@ cdef extern int PatchSelect_GPU_main(float *Input, unsigned short *H_i, unsigned
def TV_ROF_GPU(inputData,
regularisation_parameter,
iterations,
- time_marching_parameter):
+ time_marching_parameter,
+ tolerance_param):
if inputData.ndim == 2:
return ROFTV2D(inputData,
regularisation_parameter,
iterations,
- time_marching_parameter)
+ time_marching_parameter,
+ tolerance_param)
elif inputData.ndim == 3:
return ROFTV3D(inputData,
regularisation_parameter,
iterations,
- time_marching_parameter)
+ time_marching_parameter,
+ tolerance_param)
# Total-variation Fast-Gradient-Projection (FGP)
def TV_FGP_GPU(inputData,
@@ -52,24 +55,21 @@ def TV_FGP_GPU(inputData,
iterations,
tolerance_param,
methodTV,
- nonneg,
- printM):
+ nonneg):
if inputData.ndim == 2:
return FGPTV2D(inputData,
regularisation_parameter,
iterations,
tolerance_param,
methodTV,
- nonneg,
- printM)
+ nonneg)
elif inputData.ndim == 3:
return FGPTV3D(inputData,
regularisation_parameter,
iterations,
tolerance_param,
methodTV,
- nonneg,
- printM)
+ nonneg)
# Total-variation Split Bregman (SB)
def TV_SB_GPU(inputData,
regularisation_parameter,
@@ -179,7 +179,8 @@ def Diff4th_GPU(inputData,
def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
float regularisation_parameter,
int iterations,
- float time_marching_parameter):
+ float time_marching_parameter,
+ float tolerance_param):
cdef long dims[2]
dims[0] = inputData.shape[0]
@@ -187,22 +188,26 @@ def ROFTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
np.zeros([dims[0],dims[1]], dtype='float32')
+ cdef np.ndarray[np.float32_t, ndim=1, mode="c"] infovec = \
+ np.ones([2], dtype='float32')
# Running CUDA code here
if (TV_ROF_GPU_main(
- &inputData[0,0], &outputData[0,0],
+ &inputData[0,0], &outputData[0,0], &infovec[0],
regularisation_parameter,
- iterations ,
+ iterations,
time_marching_parameter,
+ tolerance_param,
dims[1], dims[0], 1)==0):
- return outputData;
+ return (outputData,infovec)
else:
raise ValueError(CUDAErrorMessage);
def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
float regularisation_parameter,
int iterations,
- float time_marching_parameter):
+ float time_marching_parameter,
+ float tolerance_param):
cdef long dims[3]
dims[0] = inputData.shape[0]
@@ -211,15 +216,18 @@ def ROFTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+ cdef np.ndarray[np.float32_t, ndim=1, mode="c"] infovec = \
+ np.ones([2], dtype='float32')
# Running CUDA code here
if (TV_ROF_GPU_main(
- &inputData[0,0,0], &outputData[0,0,0],
+ &inputData[0,0,0], &outputData[0,0,0], &infovec[0],
regularisation_parameter,
- iterations ,
+ iterations,
time_marching_parameter,
+ tolerance_param,
dims[2], dims[1], dims[0])==0):
- return outputData;
+ return (outputData,infovec)
else:
raise ValueError(CUDAErrorMessage);
#****************************************************************#
@@ -231,8 +239,7 @@ def FGPTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
int iterations,
float tolerance_param,
int methodTV,
- int nonneg,
- int printM):
+ int nonneg):
cdef long dims[2]
dims[0] = inputData.shape[0]
@@ -240,47 +247,48 @@ def FGPTV2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData,
cdef np.ndarray[np.float32_t, ndim=2, mode="c"] outputData = \
np.zeros([dims[0],dims[1]], dtype='float32')
+ cdef np.ndarray[np.float32_t, ndim=1, mode="c"] infovec = \
+ np.ones([2], dtype='float32')
# Running CUDA code here
- if (TV_FGP_GPU_main(&inputData[0,0], &outputData[0,0],
+ if (TV_FGP_GPU_main(&inputData[0,0], &outputData[0,0], &infovec[0],
regularisation_parameter,
- iterations,
+ iterations,
tolerance_param,
methodTV,
nonneg,
- printM,
dims[1], dims[0], 1)==0):
- return outputData;
+ return (outputData,infovec)
else:
raise ValueError(CUDAErrorMessage);
-
def FGPTV3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData,
float regularisation_parameter,
int iterations,
float tolerance_param,
int methodTV,
- int nonneg,
- int printM):
+ int nonneg):
cdef long dims[3]
dims[0] = inputData.shape[0]
dims[1] = inputData.shape[1]
dims[2] = inputData.shape[2]
+
cdef np.ndarray[np.float32_t, ndim=3, mode="c"] outputData = \
np.zeros([dims[0],dims[1],dims[2]], dtype='float32')
+ cdef np.ndarray[np.float32_t, ndim=1, mode="c"] infovec = \
+ np.ones([2], dtype='float32')
# Running CUDA code here
- if (TV_FGP_GPU_main(&inputData[0,0,0], &outputData[0,0,0],
- regularisation_parameter ,
+ if (TV_FGP_GPU_main(&inputData[0,0,0], &outputData[0,0,0], &infovec[0],
+ regularisation_parameter,
iterations,
tolerance_param,
methodTV,
nonneg,
- printM,
dims[2], dims[1], dims[0])==0):
- return outputData;
+ return (outputData,infovec)
else:
raise ValueError(CUDAErrorMessage);