diff options
author | Daniil Kazantsev <dkazanc3@googlemail.com> | 2018-12-04 16:18:38 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-12-04 16:18:38 +0000 |
commit | b6140c987875ff415b225f8800c4b582295db654 (patch) | |
tree | 5074abd308c3e2f4425ee27251d242f3273f1dd4 | |
parent | 8b8dfc68fa6b70ec7eefcdfb928fb383196bec97 (diff) | |
parent | c9ee9ecc84881595b04f19280c93bcd587171270 (diff) | |
download | regularization-b6140c987875ff415b225f8800c4b582295db654.tar.gz regularization-b6140c987875ff415b225f8800c4b582295db654.tar.bz2 regularization-b6140c987875ff415b225f8800c4b582295db654.tar.xz regularization-b6140c987875ff415b225f8800c4b582295db654.zip |
Merge pull request #77 from vais-ral/nltv_gpu
NLTV for GPU
-rw-r--r-- | Core/CMakeLists.txt | 1 | ||||
-rw-r--r-- | Core/regularisers_CPU/PatchSelect_core.c | 100 | ||||
-rw-r--r-- | Core/regularisers_GPU/PatchSelect_GPU_core.cu | 468 | ||||
-rw-r--r-- | Core/regularisers_GPU/PatchSelect_GPU_core.h | 8 | ||||
-rw-r--r-- | Readme.md | 2 | ||||
-rw-r--r-- | Wrappers/Matlab/demos/demoMatlab_denoise.m | 10 | ||||
-rw-r--r-- | Wrappers/Python/ccpi/filters/regularisers.py | 8 | ||||
-rwxr-xr-x | Wrappers/Python/conda-recipe/run_test.py | 2 | ||||
-rw-r--r-- | Wrappers/Python/demos/demo_cpu_regularisers.py | 18 | ||||
-rw-r--r-- | Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py | 55 | ||||
-rw-r--r-- | Wrappers/Python/demos/demo_gpu_regularisers.py | 74 | ||||
-rw-r--r-- | Wrappers/Python/setup-regularisers.py.in | 1 | ||||
-rw-r--r-- | Wrappers/Python/src/cpu_regularisers.pyx | 6 | ||||
-rw-r--r-- | Wrappers/Python/src/gpu_regularisers.pyx | 32 |
14 files changed, 716 insertions, 69 deletions
diff --git a/Core/CMakeLists.txt b/Core/CMakeLists.txt index df01bb7..d92db82 100644 --- a/Core/CMakeLists.txt +++ b/Core/CMakeLists.txt @@ -120,6 +120,7 @@ if (BUILD_CUDA) ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/dTV_FGP_GPU_core.cu ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/NonlDiff_GPU_core.cu ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/Diffus_4thO_GPU_core.cu + ${CMAKE_CURRENT_SOURCE_DIR}/regularisers_GPU/PatchSelect_GPU_core.cu ) if (UNIX) message ("I'd install into ${CMAKE_INSTALL_PREFIX}/lib") diff --git a/Core/regularisers_CPU/PatchSelect_core.c b/Core/regularisers_CPU/PatchSelect_core.c index 45cf8c8..cf5cdc7 100644 --- a/Core/regularisers_CPU/PatchSelect_core.c +++ b/Core/regularisers_CPU/PatchSelect_core.c @@ -44,6 +44,19 @@ * 4. Weights_ijk - associated weights */ +void swap(float *xp, float *yp) +{ + float temp = *xp; + *xp = *yp; + *yp = temp; +} + +void swapUS(unsigned short *xp, unsigned short *yp) +{ + unsigned short temp = *xp; + *xp = *yp; + *yp = temp; +} /**************************************************/ float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h, int switchM) @@ -51,8 +64,7 @@ float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, u int counterG; long i, j, k; float *Eucl_Vec, h2; - h2 = h*h; - + h2 = h*h; /****************2D INPUT ***************/ if (dimZ == 0) { /* generate a 2D Gaussian kernel for NLM procedure */ @@ -67,14 +79,14 @@ float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, u if (switchM == 1) { #pragma omp parallel for shared (A, Weights, H_i, H_j) private(i,j) for(i=0; i<(long)(dimX); i++) { - for(j=0; j<(long)(dimY); j++) { + for(j=0; j<(long)(dimY); j++) { Indeces2D_p(A, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2); }} } else { #pragma omp parallel for shared (A, Weights, H_i, H_j) private(i,j) for(i=0; i<(long)(dimX); i++) { - for(j=0; j<(long)(dimY); j++) { + for(j=0; j<(long)(dimY); j++) { Indeces2D(A, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2); }} } @@ -116,8 +128,8 @@ float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, u float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2) { long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, index, sizeWin_tot, counterG; - float *Weight_Vec, normsum, temp; - unsigned short *ind_i, *ind_j, temp_i, temp_j; + float *Weight_Vec, normsum; + unsigned short *ind_i, *ind_j; sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1); @@ -146,51 +158,43 @@ float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *W }} /* writing temporarily into vectors */ - if (normsum > EPS) { + if (normsum > EPS) { Weight_Vec[counter] = expf(-normsum/h2); ind_i[counter] = i1; - ind_j[counter] = j1; + ind_j[counter] = j1; counter++; } } - }} + }} /* do sorting to choose the most prominent weights [HIGH to LOW] */ /* and re-arrange indeces accordingly */ - for (x = 0; x < counter; x++) { - for (y = 0; y < counter; y++) { - if (Weight_Vec[y] < Weight_Vec[x]) { - temp = Weight_Vec[y+1]; - temp_i = ind_i[y+1]; - temp_j = ind_j[y+1]; - Weight_Vec[y+1] = Weight_Vec[y]; - Weight_Vec[y] = temp; - ind_i[y+1] = ind_i[y]; - ind_i[y] = temp_i; - ind_j[y+1] = ind_j[y]; - ind_j[y] = temp_j; - }}} - /*sorting loop finished*/ - - /*now select the NumNeighb more prominent weights and store into arrays */ + for (x = 0; x < counter-1; x++) { + for (y = 0; y < counter-x-1; y++) { + if (Weight_Vec[y] < Weight_Vec[y+1]) { + swap(&Weight_Vec[y], &Weight_Vec[y+1]); + swapUS(&ind_i[y], &ind_i[y+1]); + swapUS(&ind_j[y], &ind_j[y+1]); + } + } + } + /*sorting loop finished*/ + /*now select the NumNeighb more prominent weights and store into pre-allocated arrays */ for(x=0; x < NumNeighb; x++) { - index = (dimX*dimY*x) + j*dimX+i; + index = (dimX*dimY*x) + j*dimX+i; H_i[index] = ind_i[x]; H_j[index] = ind_j[x]; Weights[index] = Weight_Vec[x]; - } - + } free(ind_i); free(ind_j); free(Weight_Vec); return 1; } - - float Indeces2D_p(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2) { long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, index, sizeWin_tot, counterG; - float *Weight_Vec, normsum, temp; - unsigned short *ind_i, *ind_j, temp_i, temp_j; + float *Weight_Vec, normsum; + unsigned short *ind_i, *ind_j; sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1); @@ -228,32 +232,26 @@ float Indeces2D_p(float *Aorig, unsigned short *H_i, unsigned short *H_j, float } } }} - /* do sorting to choose the most prominent weights [HIGH to LOW] */ + /* do sorting to choose the most prominent weights [HIGH to LOW] */ /* and re-arrange indeces accordingly */ - for (x = 0; x < counter; x++) { - for (y = 0; y < counter; y++) { - if (Weight_Vec[y] < Weight_Vec[x]) { - temp = Weight_Vec[y+1]; - temp_i = ind_i[y+1]; - temp_j = ind_j[y+1]; - Weight_Vec[y+1] = Weight_Vec[y]; - Weight_Vec[y] = temp; - ind_i[y+1] = ind_i[y]; - ind_i[y] = temp_i; - ind_j[y+1] = ind_j[y]; - ind_j[y] = temp_j; - }}} + for (x = 0; x < counter-1; x++) { + for (y = 0; y < counter-x-1; y++) { + if (Weight_Vec[y] < Weight_Vec[y+1]) { + swap(&Weight_Vec[y], &Weight_Vec[y+1]); + swapUS(&ind_i[y], &ind_i[y+1]); + swapUS(&ind_j[y], &ind_j[y+1]); + } + } + } /*sorting loop finished*/ - /*now select the NumNeighb more prominent weights and store into arrays */ + /*now select the NumNeighb more prominent weights and store into pre-allocated arrays */ for(x=0; x < NumNeighb; x++) { - //index = (dimX*dimY*x) + j*dimX+i; - index = (dimX*dimY*x) + i*dimY+j; + index = (dimX*dimY*x) + i*dimY+j; H_i[index] = ind_i[x]; H_j[index] = ind_j[x]; Weights[index] = Weight_Vec[x]; - } - + } free(ind_i); free(ind_j); free(Weight_Vec); diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.cu b/Core/regularisers_GPU/PatchSelect_GPU_core.cu new file mode 100644 index 0000000..f558b0f --- /dev/null +++ b/Core/regularisers_GPU/PatchSelect_GPU_core.cu @@ -0,0 +1,468 @@ +/* + * This work is part of the Core Imaging Library developed by + * Visual Analytics and Imaging System Group of the Science Technology + * Facilities Council, STFC and Diamond Light Source Ltd. + * + * Copyright 2017 Daniil Kazantsev + * Copyright 2017 Srikanth Nagella, Edoardo Pasca + * Copyright 2018 Diamond Light Source Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "PatchSelect_GPU_core.h" +
+/* CUDA implementation of non-local weight pre-calculation for non-local priors + * Weights and associated indices are stored into pre-allocated arrays and passed + * to the regulariser + * + * + * Input Parameters: + * 1. 2D grayscale image (classical 3D version will not be supported but rather 2D + dim extension (TODO)) + * 2. Searching window (half-size of the main bigger searching window, e.g. 11) + * 3. Similarity window (half-size of the patch window, e.g. 2) + * 4. The number of neighbours to take (the most prominent after sorting neighbours will be taken) + * 5. noise-related parameter to calculate non-local weights + * + * Output [2D]: + * 1. AR_i - indeces of i neighbours + * 2. AR_j - indeces of j neighbours + * 3. Weights_ij - associated weights + */
+
+// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
+#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
+
+inline void __checkCudaErrors(cudaError err, const char *file, const int line)
+{
+ if (cudaSuccess != err)
+ {
+ fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n",
+ file, line, (int)err, cudaGetErrorString(err));
+ exit(EXIT_FAILURE);
+ }
+}
+
+#define BLKXSIZE 16
+#define BLKYSIZE 16
+#define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) )
+#define M_PI 3.14159265358979323846
+#define EPS 1.0e-8
+#define CONSTVECSIZE5 121 +#define CONSTVECSIZE7 225 +#define CONSTVECSIZE9 361 +#define CONSTVECSIZE11 529 +#define CONSTVECSIZE13 729 +
+__device__ void swap(float *xp, float *yp) +{ + float temp = *xp; + *xp = *yp; + *yp = temp; +} +__device__ void swapUS(unsigned short *xp, unsigned short *yp) +{ + unsigned short temp = *xp; + *xp = *yp; + *yp = temp; +} + +/********************************************************************************/
+__global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
+ + long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; + float normsum; + + float Weight_Vec[CONSTVECSIZE5]; + unsigned short ind_i[CONSTVECSIZE5]; + unsigned short ind_j[CONSTVECSIZE5]; +
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ long index = i*M+j; + + counter = 0; + for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) { + for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { + i1 = i+i_m; + j1 = j+j_m; + if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) { + normsum = 0.0f; counterG = 0; + for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) { + for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) { + i2 = i1 + i_c; + j2 = j1 + j_c; + i3 = i + i_c; + j3 = j + j_c; + if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) { + if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) { + normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); + counterG++; + }} + }} + /* writing temporarily into vectors */ + if (normsum > EPS) { + Weight_Vec[counter] = __expf(-normsum/h2); + ind_i[counter] = i1; + ind_j[counter] = j1; + counter++; + } + } + }} + + /* do sorting to choose the most prominent weights [HIGH to LOW] */ + /* and re-arrange indeces accordingly */ + for (x = 0; x < counter-1; x++) { + for (y = 0; y < counter-x-1; y++) { + if (Weight_Vec[y] < Weight_Vec[y+1]) { + swap(&Weight_Vec[y], &Weight_Vec[y+1]); + swapUS(&ind_i[y], &ind_i[y+1]); + swapUS(&ind_j[y], &ind_j[y+1]); + } + } + } + /*sorting loop finished*/ + /*now select the NumNeighb more prominent weights and store into arrays */ + for(x=0; x < NumNeighb; x++) { + index2 = (N*M*x) + index; + H_i_d[index2] = ind_i[x]; + H_j_d[index2] = ind_j[x]; + Weights_d[index2] = Weight_Vec[x]; + } +}
+/********************************************************************************/
+__global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
+ + long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; + float normsum; + + float Weight_Vec[CONSTVECSIZE7]; + unsigned short ind_i[CONSTVECSIZE7]; + unsigned short ind_j[CONSTVECSIZE7]; +
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ long index = i*M+j; + + counter = 0; + for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) { + for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { + i1 = i+i_m; + j1 = j+j_m; + if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) { + normsum = 0.0f; counterG = 0; + for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) { + for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) { + i2 = i1 + i_c; + j2 = j1 + j_c; + i3 = i + i_c; + j3 = j + j_c; + if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) { + if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) { + normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); + counterG++; + }} + }} + /* writing temporarily into vectors */ + if (normsum > EPS) { + Weight_Vec[counter] = __expf(-normsum/h2); + ind_i[counter] = i1; + ind_j[counter] = j1; + counter++; + } + } + }} + + /* do sorting to choose the most prominent weights [HIGH to LOW] */ + /* and re-arrange indeces accordingly */ + for (x = 0; x < counter-1; x++) { + for (y = 0; y < counter-x-1; y++) { + if (Weight_Vec[y] < Weight_Vec[y+1]) { + swap(&Weight_Vec[y], &Weight_Vec[y+1]); + swapUS(&ind_i[y], &ind_i[y+1]); + swapUS(&ind_j[y], &ind_j[y+1]); + } + } + } + /*sorting loop finished*/ + /*now select the NumNeighb more prominent weights and store into arrays */ + for(x=0; x < NumNeighb; x++) { + index2 = (N*M*x) + index; + H_i_d[index2] = ind_i[x]; + H_j_d[index2] = ind_j[x]; + Weights_d[index2] = Weight_Vec[x]; + } +}
+__global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
+ + long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; + float normsum; + + float Weight_Vec[CONSTVECSIZE9]; + unsigned short ind_i[CONSTVECSIZE9]; + unsigned short ind_j[CONSTVECSIZE9]; +
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ long index = i*M+j; + + counter = 0; + for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) { + for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { + i1 = i+i_m; + j1 = j+j_m; + if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) { + normsum = 0.0f; counterG = 0; + for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) { + for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) { + i2 = i1 + i_c; + j2 = j1 + j_c; + i3 = i + i_c; + j3 = j + j_c; + if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) { + if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) { + normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); + counterG++; + }} + }} + /* writing temporarily into vectors */ + if (normsum > EPS) { + Weight_Vec[counter] = expf(-normsum/h2); + ind_i[counter] = i1; + ind_j[counter] = j1; + counter++; + } + } + }} + + /* do sorting to choose the most prominent weights [HIGH to LOW] */ + /* and re-arrange indeces accordingly */ + for (x = 0; x < counter-1; x++) { + for (y = 0; y < counter-x-1; y++) { + if (Weight_Vec[y] < Weight_Vec[y+1]) { + swap(&Weight_Vec[y], &Weight_Vec[y+1]); + swapUS(&ind_i[y], &ind_i[y+1]); + swapUS(&ind_j[y], &ind_j[y+1]); + } + } + } + /*sorting loop finished*/ + /*now select the NumNeighb more prominent weights and store into arrays */ + for(x=0; x < NumNeighb; x++) { + index2 = (N*M*x) + index; + H_i_d[index2] = ind_i[x]; + H_j_d[index2] = ind_j[x]; + Weights_d[index2] = Weight_Vec[x]; + } +} +__global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
+ + long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; + float normsum; + + float Weight_Vec[CONSTVECSIZE11]; + unsigned short ind_i[CONSTVECSIZE11]; + unsigned short ind_j[CONSTVECSIZE11]; +
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ long index = i*M+j; + + counter = 0; + for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) { + for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { + i1 = i+i_m; + j1 = j+j_m; + if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) { + normsum = 0.0f; counterG = 0; + for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) { + for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) { + i2 = i1 + i_c; + j2 = j1 + j_c; + i3 = i + i_c; + j3 = j + j_c; + if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) { + if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) { + normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); + counterG++; + }} + }} + /* writing temporarily into vectors */ + if (normsum > EPS) { + Weight_Vec[counter] = __expf(-normsum/h2); + ind_i[counter] = i1; + ind_j[counter] = j1; + counter++; + } + } + }} + + /* do sorting to choose the most prominent weights [HIGH to LOW] */ + /* and re-arrange indeces accordingly */ + for (x = 0; x < counter-1; x++) { + for (y = 0; y < counter-x-1; y++) { + if (Weight_Vec[y] < Weight_Vec[y+1]) { + swap(&Weight_Vec[y], &Weight_Vec[y+1]); + swapUS(&ind_i[y], &ind_i[y+1]); + swapUS(&ind_j[y], &ind_j[y+1]); + } + } + } + /*sorting loop finished*/ + /*now select the NumNeighb more prominent weights and store into arrays */ + for(x=0; x < NumNeighb; x++) { + index2 = (N*M*x) + index; + H_i_d[index2] = ind_i[x]; + H_j_d[index2] = ind_j[x]; + Weights_d[index2] = Weight_Vec[x]; + } +} +__global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2)
+{
+ + long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; + float normsum; + + float Weight_Vec[CONSTVECSIZE13]; + unsigned short ind_i[CONSTVECSIZE13]; + unsigned short ind_j[CONSTVECSIZE13]; +
+ int i = blockDim.x * blockIdx.x + threadIdx.x;
+ int j = blockDim.y * blockIdx.y + threadIdx.y;
+
+ long index = i*M+j; + + counter = 0; + for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) { + for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { + i1 = i+i_m; + j1 = j+j_m; + if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) { + normsum = 0.0f; counterG = 0; + for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) { + for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) { + i2 = i1 + i_c; + j2 = j1 + j_c; + i3 = i + i_c; + j3 = j + j_c; + if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) { + if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) { + normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); + counterG++; + }} + }} + /* writing temporarily into vectors */ + if (normsum > EPS) { + Weight_Vec[counter] = __expf(-normsum/h2); + ind_i[counter] = i1; + ind_j[counter] = j1; + counter++; + } + } + }} + + /* do sorting to choose the most prominent weights [HIGH to LOW] */ + /* and re-arrange indeces accordingly */ + for (x = 0; x < counter-1; x++) { + for (y = 0; y < counter-x-1; y++) { + if (Weight_Vec[y] < Weight_Vec[y+1]) { + swap(&Weight_Vec[y], &Weight_Vec[y+1]); + swapUS(&ind_i[y], &ind_i[y+1]); + swapUS(&ind_j[y], &ind_j[y+1]); + } + } + } + /*sorting loop finished*/ + /*now select the NumNeighb more prominent weights and store into arrays */ + for(x=0; x < NumNeighb; x++) { + index2 = (N*M*x) + index; + H_i_d[index2] = ind_i[x]; + H_j_d[index2] = ind_j[x]; + Weights_d[index2] = Weight_Vec[x]; + } +} + +
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ +/********************* MAIN HOST FUNCTION ******************/ +/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+extern "C" void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h)
+{
+ int deviceCount = -1; // number of devices
+ cudaGetDeviceCount(&deviceCount);
+ if (deviceCount == 0) {
+ fprintf(stderr, "No CUDA devices found\n");
+ return;
+ } +
+ int SearchW_full, SimilW_full, counterG, i, j;
+ float *Ad, *Weights_d, h2, *Eucl_Vec, *Eucl_Vec_d; + unsigned short *H_i_d, *H_j_d;
+ h2 = h*h; +
+ dim3 dimBlock(BLKXSIZE,BLKYSIZE);
+ dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE));
+
+ SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window size */
+ SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1); /* the full similarity window size */
+ + /* generate a 2D Gaussian kernel for NLM procedure */ + Eucl_Vec = (float*) calloc (SimilW_full,sizeof(float)); + counterG = 0; + for(i=-SimilarWin; i<=SimilarWin; i++) { + for(j=-SimilarWin; j<=SimilarWin; j++) { + Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2))/(2.0*SimilarWin*SimilarWin)); + counterG++; + }} /*main neighb loop */ + + + /*allocate space on the device*/
+ checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) );
+ checkCudaErrors( cudaMalloc((void**)&H_i_d, N*M*NumNeighb*sizeof(unsigned short)) ); + checkCudaErrors( cudaMalloc((void**)&H_j_d, N*M*NumNeighb*sizeof(unsigned short)) ); + checkCudaErrors( cudaMalloc((void**)&Weights_d, N*M*NumNeighb*sizeof(float)) ); + checkCudaErrors( cudaMalloc((void**)&Eucl_Vec_d, SimilW_full*sizeof(float)) );
+
+ /* copy data from the host to the device */
+ checkCudaErrors( cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice) ); + checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) );
+ + /********************** Run CUDA kernel here ********************/ + if (SearchWindow == 5) IndexSelect2D_5_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2); + else if (SearchWindow == 7) IndexSelect2D_7_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2); + else if (SearchWindow == 9) IndexSelect2D_9_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2); + else if (SearchWindow == 11) IndexSelect2D_11_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2); + else if (SearchWindow == 13) IndexSelect2D_13_kernel<<<dimGrid,dimBlock>>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2); + else { + fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n");
+ return;} + checkCudaErrors(cudaPeekAtLastError() );
+ checkCudaErrors(cudaDeviceSynchronize());
+ /***************************************************************/
+
+ checkCudaErrors(cudaMemcpy(H_i, H_i_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) ); + checkCudaErrors(cudaMemcpy(H_j, H_j_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) ); + checkCudaErrors(cudaMemcpy(Weights, Weights_d, N*M*NumNeighb*sizeof(float),cudaMemcpyDeviceToHost) ); +
+ cudaFree(Ad); + cudaFree(H_i_d); + cudaFree(H_j_d); + cudaFree(Weights_d); + cudaFree(Eucl_Vec_d);
+} diff --git a/Core/regularisers_GPU/PatchSelect_GPU_core.h b/Core/regularisers_GPU/PatchSelect_GPU_core.h new file mode 100644 index 0000000..d20fe9f --- /dev/null +++ b/Core/regularisers_GPU/PatchSelect_GPU_core.h @@ -0,0 +1,8 @@ +#ifndef __NLREG_KERNELS_H_ +#define __NLREG_KERNELS_H_ +#include "CCPiDefines.h" +#include <stdio.h> + +extern "C" CCPI_EXPORT void PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h); + +#endif @@ -27,7 +27,7 @@ 5. Linear and nonlinear diffusion (explicit PDE minimisation scheme) **2D/3D CPU/GPU** (Ref. *8*) 6. Anisotropic Fourth-Order Diffusion (explicit PDE minimisation) **2D/3D CPU/GPU** (Ref. *9*) 7. A joint ROF-LLT (Lysaker-Lundervold-Tai) model for higher-order regularisation **2D/3D CPU/GPU** (Ref. *10,11*) -8. Nonlocal Total Variation regularisation (GS fixed point iteration) **2D/3D CPU/GPU** (Ref. *12*) +8. Nonlocal Total Variation regularisation (GS fixed point iteration) **2D CPU/GPU** (Ref. *12*) ### Multi-channel (denoising): 1. Fast-Gradient-Projection (FGP) Directional Total Variation **2D/3D CPU/GPU** (Ref. *3,4,2*) diff --git a/Wrappers/Matlab/demos/demoMatlab_denoise.m b/Wrappers/Matlab/demos/demoMatlab_denoise.m index 54b8bac..3506cca 100644 --- a/Wrappers/Matlab/demos/demoMatlab_denoise.m +++ b/Wrappers/Matlab/demos/demoMatlab_denoise.m @@ -138,17 +138,17 @@ figure; imshow(u_diff4, [0 1]); title('Diffusion 4thO denoised image (CPU)'); fprintf('Weights pre-calculation for Non-local TV (takes time on CPU) \n'); SearchingWindow = 7; PatchWindow = 2; -NeighboursNumber = 15; % the number of neibours to include +NeighboursNumber = 20; % the number of neibours to include h = 0.23; % edge related parameter for NLM -[H_i, H_j, Weights] = PatchSelect(single(u0), SearchingWindow, PatchWindow, NeighboursNumber, h); +tic; [H_i, H_j, Weights] = PatchSelect(single(u0), SearchingWindow, PatchWindow, NeighboursNumber, h); toc; %% fprintf('Denoise using Non-local Total Variation (CPU) \n'); -iter_nltv = 2; % number of nltv iterations -lambda_nltv = 0.085; % regularisation parameter for nltv +iter_nltv = 3; % number of nltv iterations +lambda_nltv = 0.05; % regularisation parameter for nltv tic; u_nltv = Nonlocal_TV(single(u0), H_i, H_j, 0, Weights, lambda_nltv, iter_nltv); toc; rmse_nltv = (RMSE(u_nltv(:),Im(:))); fprintf('%s %f \n', 'RMSE error for Non-local Total Variation is:', rmse_nltv); -figure; imshow(u_nltv, [0 1]); title('Non-local Total Variation denoised image (CPU)'); +figure; imagesc(u_nltv, [0 1]); colormap(gray); daspect([1 1 1]); title('Non-local Total Variation denoised image (CPU)'); %% %>>>>>>>>>>>>>> MULTI-CHANNEL priors <<<<<<<<<<<<<<< % diff --git a/Wrappers/Python/ccpi/filters/regularisers.py b/Wrappers/Python/ccpi/filters/regularisers.py index bf7e23c..0a65590 100644 --- a/Wrappers/Python/ccpi/filters/regularisers.py +++ b/Wrappers/Python/ccpi/filters/regularisers.py @@ -4,7 +4,7 @@ script which assigns a proper device core function based on a flag ('cpu' or 'gp from ccpi.filters.cpu_regularisers import TV_ROF_CPU, TV_FGP_CPU, TV_SB_CPU, dTV_FGP_CPU, TNV_CPU, NDF_CPU, Diff4th_CPU, TGV_CPU, LLT_ROF_CPU, PATCHSEL_CPU, NLTV_CPU try: - from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU, TV_SB_GPU, dTV_FGP_GPU, NDF_GPU, Diff4th_GPU, TGV_GPU, LLT_ROF_GPU + from ccpi.filters.gpu_regularisers import TV_ROF_GPU, TV_FGP_GPU, TV_SB_GPU, dTV_FGP_GPU, NDF_GPU, Diff4th_GPU, TGV_GPU, LLT_ROF_GPU, PATCHSEL_GPU gpu_enabled = True except ImportError: gpu_enabled = False @@ -153,7 +153,11 @@ def PatchSelect(inputData, searchwindow, patchwindow, neighbours, edge_parameter neighbours, edge_parameter) elif device == 'gpu' and gpu_enabled: - return 1 + return PATCHSEL_GPU(inputData, + searchwindow, + patchwindow, + neighbours, + edge_parameter) else: if not gpu_enabled and device == 'gpu': raise ValueError ('GPU is not available') diff --git a/Wrappers/Python/conda-recipe/run_test.py b/Wrappers/Python/conda-recipe/run_test.py index 6ffaca1..499ae7f 100755 --- a/Wrappers/Python/conda-recipe/run_test.py +++ b/Wrappers/Python/conda-recipe/run_test.py @@ -2,7 +2,7 @@ import unittest import numpy as np
import os
import timeit
-from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, DIFF4th
+from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, DIFF4th from PIL import Image
class TiffReader(object):
diff --git a/Wrappers/Python/demos/demo_cpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_regularisers.py index 31e4cad..78e9aff 100644 --- a/Wrappers/Python/demos/demo_cpu_regularisers.py +++ b/Wrappers/Python/demos/demo_cpu_regularisers.py @@ -400,20 +400,29 @@ plt.title('{}'.format('CPU results')) print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") print ("___Nonlocal patches pre-calculation____") print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +start_time = timeit.default_timer() # set parameters pars = {'algorithm' : PatchSelect, \ 'input' : u0,\ 'searchwindow': 7, \ 'patchwindow': 2,\ 'neighbours' : 15 ,\ - 'edge_parameter':0.23} + 'edge_parameter':0.18} H_i, H_j, Weights = PatchSelect(pars['input'], pars['searchwindow'], pars['patchwindow'], pars['neighbours'], pars['edge_parameter'],'cpu') - + +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +""" +plt.figure() +plt.imshow(Weights[0,:,:],cmap="gray",interpolation="nearest",vmin=0, vmax=1) +plt.show() +""" #%% print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") print ("___Nonlocal Total Variation penalty____") @@ -431,10 +440,9 @@ pars2 = {'algorithm' : NLTV, \ 'H_j': H_j,\ 'H_k' : 0,\ 'Weights' : Weights,\ - 'regularisation_parameter': 0.085,\ - 'iterations': 2 + 'regularisation_parameter': 0.04,\ + 'iterations': 3 } -#%% start_time = timeit.default_timer() nltv_cpu = NLTV(pars2['input'], pars2['H_i'], diff --git a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py index 3d6e92f..616eab0 100644 --- a/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py +++ b/Wrappers/Python/demos/demo_cpu_vs_gpu_regularisers.py @@ -13,6 +13,7 @@ import numpy as np import os import timeit from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, DIFF4th +from ccpi.filters.regularisers import PatchSelect from qualitymetrics import rmse ############################################################################### def printParametersToString(pars): @@ -732,4 +733,58 @@ if (diff_im.sum() > 1): print ("Arrays do not match!") else: print ("Arrays match") +#%% +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("____Non-local regularisation bench_________") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") + +## plot +fig = plt.figure() +plt.suptitle('Comparison of Nonlocal TV regulariser using CPU and GPU implementations') +a=fig.add_subplot(1,2,1) +a.set_title('Noisy Image') +imgplot = plt.imshow(u0,cmap="gray") + +pars = {'algorithm' : PatchSelect, \ + 'input' : u0,\ + 'searchwindow': 7, \ + 'patchwindow': 2,\ + 'neighbours' : 15 ,\ + 'edge_parameter':0.18} + +print ("############## Nonlocal Patches on CPU##################") +start_time = timeit.default_timer() +H_i, H_j, WeightsCPU = PatchSelect(pars['input'], + pars['searchwindow'], + pars['patchwindow'], + pars['neighbours'], + pars['edge_parameter'],'cpu') +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) + +print ("############## Nonlocal Patches on GPU##################") +start_time = timeit.default_timer() +start_time = timeit.default_timer() +H_i, H_j, WeightsGPU = PatchSelect(pars['input'], + pars['searchwindow'], + pars['patchwindow'], + pars['neighbours'], + pars['edge_parameter'],'gpu') +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) + +print ("--------Compare the results--------") +tolerance = 1e-05 +diff_im = np.zeros(np.shape(u0)) +diff_im = abs(WeightsCPU[0,:,:] - WeightsGPU[0,:,:]) +diff_im[diff_im > tolerance] = 1 +a=fig.add_subplot(1,2,2) +imgplot = plt.imshow(diff_im, vmin=0, vmax=1, cmap="gray") +plt.title('{}'.format('Pixels larger threshold difference')) +if (diff_im.sum() > 1): + print ("Arrays do not match!") +else: + print ("Arrays match") #%%
\ No newline at end of file diff --git a/Wrappers/Python/demos/demo_gpu_regularisers.py b/Wrappers/Python/demos/demo_gpu_regularisers.py index de0cbde..2ada559 100644 --- a/Wrappers/Python/demos/demo_gpu_regularisers.py +++ b/Wrappers/Python/demos/demo_gpu_regularisers.py @@ -13,6 +13,7 @@ import numpy as np import os import timeit from ccpi.filters.regularisers import ROF_TV, FGP_TV, SB_TV, TGV, LLT_ROF, FGP_dTV, NDF, DIFF4th +from ccpi.filters.regularisers import PatchSelect, NLTV from qualitymetrics import rmse ############################################################################### def printParametersToString(pars): @@ -84,7 +85,7 @@ pars = {'algorithm': ROF_TV, \ 'input' : u0,\ 'regularisation_parameter':0.04,\ 'number_of_iterations': 1200,\ - 'time_marching_parameter': 0.0025 + 'time_marching_parameter': 0.0025 } print ("##############ROF TV GPU##################") start_time = timeit.default_timer() @@ -394,6 +395,77 @@ plt.title('{}'.format('GPU results')) #%% print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("___Nonlocal patches pre-calculation____") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +start_time = timeit.default_timer() +# set parameters +pars = {'algorithm' : PatchSelect, \ + 'input' : u0,\ + 'searchwindow': 7, \ + 'patchwindow': 2,\ + 'neighbours' : 15 ,\ + 'edge_parameter':0.18} + +H_i, H_j, Weights = PatchSelect(pars['input'], + pars['searchwindow'], + pars['patchwindow'], + pars['neighbours'], + pars['edge_parameter'],'gpu') + +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +""" +plt.figure() +plt.imshow(Weights[0,:,:],cmap="gray",interpolation="nearest",vmin=0, vmax=1) +plt.show() +""" +#%% +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +print ("___Nonlocal Total Variation penalty____") +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") +## plot +fig = plt.figure() +plt.suptitle('Performance of NLTV regulariser using the CPU') +a=fig.add_subplot(1,2,1) +a.set_title('Noisy Image') +imgplot = plt.imshow(u0,cmap="gray") + +pars2 = {'algorithm' : NLTV, \ + 'input' : u0,\ + 'H_i': H_i, \ + 'H_j': H_j,\ + 'H_k' : 0,\ + 'Weights' : Weights,\ + 'regularisation_parameter': 0.02,\ + 'iterations': 3 + } +start_time = timeit.default_timer() +nltv_cpu = NLTV(pars2['input'], + pars2['H_i'], + pars2['H_j'], + pars2['H_k'], + pars2['Weights'], + pars2['regularisation_parameter'], + pars2['iterations']) + +rms = rmse(Im, nltv_cpu) +pars['rmse'] = rms + +txtstr = printParametersToString(pars) +txtstr += "%s = %.3fs" % ('elapsed time',timeit.default_timer() - start_time) +print (txtstr) +a=fig.add_subplot(1,2,2) + +# these are matplotlib.patch.Patch properties +props = dict(boxstyle='round', facecolor='wheat', alpha=0.75) +# place a text box in upper left in axes coords +a.text(0.15, 0.25, txtstr, transform=a.transAxes, fontsize=14, + verticalalignment='top', bbox=props) +imgplot = plt.imshow(nltv_cpu, cmap="gray") +plt.title('{}'.format('CPU results')) +#%% +print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") print ("____________FGP-dTV bench___________________") print ("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%") diff --git a/Wrappers/Python/setup-regularisers.py.in b/Wrappers/Python/setup-regularisers.py.in index 542dcb4..462edda 100644 --- a/Wrappers/Python/setup-regularisers.py.in +++ b/Wrappers/Python/setup-regularisers.py.in @@ -45,6 +45,7 @@ extra_include_dirs += [os.path.join(".." , ".." , "Core"), os.path.join(".." , ".." , "Core", "regularisers_GPU" , "NDF" ) , os.path.join(".." , ".." , "Core", "regularisers_GPU" , "dTV_FGP" ) , os.path.join(".." , ".." , "Core", "regularisers_GPU" , "DIFF4th" ) , + os.path.join(".." , ".." , "Core", "regularisers_GPU" , "PatchSelect" ) , "."] if platform.system() == 'Windows': diff --git a/Wrappers/Python/src/cpu_regularisers.pyx b/Wrappers/Python/src/cpu_regularisers.pyx index e51e6d8..4aa3251 100644 --- a/Wrappers/Python/src/cpu_regularisers.pyx +++ b/Wrappers/Python/src/cpu_regularisers.pyx @@ -456,7 +456,7 @@ def PATCHSEL_CPU(inputData, searchwindow, patchwindow, neighbours, edge_paramete if inputData.ndim == 2: return PatchSel_2D(inputData, searchwindow, patchwindow, neighbours, edge_parameter) elif inputData.ndim == 3: - return PatchSel_3D(inputData, searchwindow, patchwindow, neighbours, edge_parameter) + return 1 def PatchSel_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, int searchwindow, int patchwindow, @@ -480,7 +480,7 @@ def PatchSel_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, # Run patch-based weight selection function PatchSelect_CPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], 0, searchwindow, patchwindow, neighbours, edge_parameter, 1) return H_i, H_j, Weights - +""" def PatchSel_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, int searchwindow, int patchwindow, @@ -507,7 +507,7 @@ def PatchSel_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, # Run patch-based weight selection function PatchSelect_CPU_main(&inputData[0,0,0], &H_i[0,0,0,0], &H_j[0,0,0,0], &H_k[0,0,0,0], &Weights[0,0,0,0], dims[2], dims[1], dims[0], searchwindow, patchwindow, neighbours, edge_parameter, 1) return H_i, H_j, H_k, Weights - +""" #****************************************************************# #***************Non-local Total Variation******************# diff --git a/Wrappers/Python/src/gpu_regularisers.pyx b/Wrappers/Python/src/gpu_regularisers.pyx index 82d3e01..302727e 100644 --- a/Wrappers/Python/src/gpu_regularisers.pyx +++ b/Wrappers/Python/src/gpu_regularisers.pyx @@ -26,6 +26,7 @@ cdef extern void LLT_ROF_GPU_main(float *Input, float *Output, float lambdaROF, cdef extern void NonlDiff_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int N, int M, int Z); cdef extern void dTV_FGP_GPU_main(float *Input, float *InputRef, float *Output, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int printM, int N, int M, int Z); cdef extern void Diffus4th_GPU_main(float *Input, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int N, int M, int Z); +cdef extern void PatchSelect_GPU_main(float *Input, unsigned short *H_i, unsigned short *H_j, float *Weights, int N, int M, int SearchWindow, int SimilarWin, int NumNeighb, float h); # Total-variation Rudin-Osher-Fatemi (ROF) def TV_ROF_GPU(inputData, @@ -542,3 +543,34 @@ def Diff4th_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, Diffus4th_GPU_main(&inputData[0,0,0], &outputData[0,0,0], regularisation_parameter, edge_parameter, iterationsNumb, time_marching_parameter, dims[2], dims[1], dims[0]) return outputData +#****************************************************************# +#************Patch-based weights pre-selection******************# +#****************************************************************# +def PATCHSEL_GPU(inputData, searchwindow, patchwindow, neighbours, edge_parameter): + if inputData.ndim == 2: + return PatchSel_2D(inputData, searchwindow, patchwindow, neighbours, edge_parameter) + elif inputData.ndim == 3: + return 1 +def PatchSel_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, + int searchwindow, + int patchwindow, + int neighbours, + float edge_parameter): + cdef long dims[3] + dims[0] = neighbours + dims[1] = inputData.shape[0] + dims[2] = inputData.shape[1] + + cdef np.ndarray[np.float32_t, ndim=3, mode="c"] Weights = \ + np.zeros([dims[0], dims[1],dims[2]], dtype='float32') + + cdef np.ndarray[np.uint16_t, ndim=3, mode="c"] H_i = \ + np.zeros([dims[0], dims[1],dims[2]], dtype='uint16') + + cdef np.ndarray[np.uint16_t, ndim=3, mode="c"] H_j = \ + np.zeros([dims[0], dims[1],dims[2]], dtype='uint16') + + # Run patch-based weight selection function + PatchSelect_GPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], searchwindow, patchwindow, neighbours, edge_parameter) + + return H_i, H_j, Weights |