From d2d4b4bd1db461fb9215abd20f829ce0298f3876 Mon Sep 17 00:00:00 2001 From: dkazanc Date: Tue, 3 Sep 2019 17:02:54 +0100 Subject: CPU fixed, GPU almost --- src/Core/regularisers_CPU/Nonlocal_TV_core.c | 73 ++++--- src/Core/regularisers_CPU/Nonlocal_TV_core.h | 18 +- src/Core/regularisers_CPU/PatchSelect_core.c | 164 ++++---------- src/Core/regularisers_CPU/PatchSelect_core.h | 7 +- src/Core/regularisers_GPU/PatchSelect_GPU_core.cu | 236 +++++++++++---------- .../mex_compile/regularisers_CPU/Nonlocal_TV.c | 30 +-- .../mex_compile/regularisers_CPU/PatchSelect.c | 24 +-- src/Python/src/cpu_regularisers.pyx | 8 +- 8 files changed, 243 insertions(+), 317 deletions(-) (limited to 'src') diff --git a/src/Core/regularisers_CPU/Nonlocal_TV_core.c b/src/Core/regularisers_CPU/Nonlocal_TV_core.c index c4c9118..de1c173 100644 --- a/src/Core/regularisers_CPU/Nonlocal_TV_core.c +++ b/src/Core/regularisers_CPU/Nonlocal_TV_core.c @@ -1,11 +1,11 @@ /* * This work is part of the Core Imaging Library developed by * Visual Analytics and Imaging System Group of the Science Technology - * Facilities Council, STFC and Diamond Light Source Ltd. + * Facilities Council, STFC and Diamond Light Source Ltd. * * Copyright 2017 Daniil Kazantsev * Copyright 2017 Srikanth Nagella, Edoardo Pasca - * Copyright 2018 Diamond Light Source Ltd. + * Copyright 2018 Diamond Light Source Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,50 +31,55 @@ * 2. AR_i - indeces of i neighbours * 3. AR_j - indeces of j neighbours * 4. AR_k - indeces of k neighbours (0 - for 2D case) - * 5. Weights_ij(k) - associated weights + * 5. Weights_ij(k) - associated weights * 6. regularisation parameter - * 7. iterations number - + * 7. iterations number + * Output: - * 1. denoised image/volume + * 1. denoised image/volume * Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans. Image Processing 17, no. 7 (2008): 1047-1060. - + */ /*****************************************************************************/ -float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb) +float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb, int switchM) { long i, j, k; int iter; lambdaReg = 1.0f/lambdaReg; - + /*****2D INPUT *****/ if (dimZ == 0) { copyIm(A_orig, Output, (long)(dimX), (long)(dimY), 1l); /* for each pixel store indeces of the most similar neighbours (patches) */ - for(iter=0; iter>>>**********/ float NLM_H1_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg) { - long x, i1, j1, index, index_m; + long x, i1, j1, index, index_m; float value = 0.0f, normweight = 0.0f; - + index_m = j*dimX+i; for(x=0; x < NumNeighb; x++) { index = (dimX*dimY*x) + j*dimX+i; @@ -99,9 +104,9 @@ float NLM_H1_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_ /*3D version*/ float NLM_H1_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg) { - long x, i1, j1, k1, index; + long x, i1, j1, k1, index; float value = 0.0f, normweight = 0.0f; - + for(x=0; x < NumNeighb; x++) { index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i; i1 = H_i[index]; @@ -109,7 +114,7 @@ float NLM_H1_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_ k1 = H_k[index]; value += A[(dimX*dimY*k1) + j1*dimX+i1]*Weights[index]; normweight += Weights[index]; - } + } A[(dimX*dimY*k) + j*dimX+i] = (lambdaReg*A_orig[(dimX*dimY*k) + j*dimX+i] + value)/(lambdaReg + normweight); return *A; } @@ -118,37 +123,37 @@ float NLM_H1_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_ /***********<<<
>>>**********/ float NLM_TV_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg) { - long x, i1, j1, index, index_m; + long x, i1, j1, index, index_m; float value = 0.0f, normweight = 0.0f, NLgrad_magn = 0.0f, NLCoeff; - + index_m = j*dimX+i; - + for(x=0; x < NumNeighb; x++) { index = (dimX*dimY*x) + j*dimX+i; /*c*/ i1 = H_i[index]; j1 = H_j[index]; NLgrad_magn += powf((A[j1*dimX+i1] - A[index_m]),2)*Weights[index]; } - + NLgrad_magn = sqrtf(NLgrad_magn); /*Non Local Gradients Magnitude */ NLCoeff = 2.0f*(1.0f/(NLgrad_magn + EPS)); - + for(x=0; x < NumNeighb; x++) { index = (dimX*dimY*x) + j*dimX+i; /*c*/ i1 = H_i[index]; j1 = H_j[index]; value += A[j1*dimX+i1]*NLCoeff*Weights[index]; normweight += Weights[index]*NLCoeff; - } + } A[index_m] = (lambdaReg*A_orig[index_m] + value)/(lambdaReg + normweight); return *A; } /*3D version*/ float NLM_TV_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg) { - long x, i1, j1, k1, index; + long x, i1, j1, k1, index; float value = 0.0f, normweight = 0.0f, NLgrad_magn = 0.0f, NLCoeff; - + for(x=0; x < NumNeighb; x++) { index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i; i1 = H_i[index]; @@ -156,10 +161,10 @@ float NLM_TV_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_ k1 = H_k[index]; NLgrad_magn += powf((A[(dimX*dimY*k1) + j1*dimX+i1] - A[(dimX*dimY*k1) + j*dimX+i]),2)*Weights[index]; } - + NLgrad_magn = sqrtf(NLgrad_magn); /*Non Local Gradients Magnitude */ NLCoeff = 2.0f*(1.0f/(NLgrad_magn + EPS)); - + for(x=0; x < NumNeighb; x++) { index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i; i1 = H_i[index]; @@ -167,7 +172,7 @@ float NLM_TV_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_ k1 = H_k[index]; value += A[(dimX*dimY*k1) + j1*dimX+i1]*NLCoeff*Weights[index]; normweight += Weights[index]*NLCoeff; - } + } A[(dimX*dimY*k) + j*dimX+i] = (lambdaReg*A_orig[(dimX*dimY*k) + j*dimX+i] + value)/(lambdaReg + normweight); return *A; } diff --git a/src/Core/regularisers_CPU/Nonlocal_TV_core.h b/src/Core/regularisers_CPU/Nonlocal_TV_core.h index 6d55101..18f6724 100644 --- a/src/Core/regularisers_CPU/Nonlocal_TV_core.h +++ b/src/Core/regularisers_CPU/Nonlocal_TV_core.h @@ -1,11 +1,11 @@ /* * This work is part of the Core Imaging Library developed by * Visual Analytics and Imaging System Group of the Science Technology - * Facilities Council, STFC and Diamond Light Source Ltd. + * Facilities Council, STFC and Diamond Light Source Ltd. * * Copyright 2017 Daniil Kazantsev * Copyright 2017 Srikanth Nagella, Edoardo Pasca - * Copyright 2018 Diamond Light Source Ltd. + * Copyright 2018 Diamond Light Source Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,19 +39,19 @@ * 2. AR_i - indeces of i neighbours * 3. AR_j - indeces of j neighbours * 4. AR_k - indeces of k neighbours (0 - for 2D case) - * 5. Weights_ij(k) - associated weights + * 5. Weights_ij(k) - associated weights * 6. regularisation parameter - * 7. iterations number - + * 7. iterations number + * Output: - * 1. denoised image/volume - * Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans. Image Processing 17, no. 7 (2008): 1047-1060. + * 1. denoised image/volume + * Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans. Image Processing 17, no. 7 (2008): 1047-1060. */ - + #ifdef __cplusplus extern "C" { #endif -CCPI_EXPORT float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb); +CCPI_EXPORT float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb, int switchM); CCPI_EXPORT float NLM_H1_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg); CCPI_EXPORT float NLM_TV_2D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, int NumNeighb, float lambdaReg); CCPI_EXPORT float NLM_H1_3D(float *A, float *A_orig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimX, long dimY, long dimZ, int NumNeighb, float lambdaReg); diff --git a/src/Core/regularisers_CPU/PatchSelect_core.c b/src/Core/regularisers_CPU/PatchSelect_core.c index cf5cdc7..8581797 100644 --- a/src/Core/regularisers_CPU/PatchSelect_core.c +++ b/src/Core/regularisers_CPU/PatchSelect_core.c @@ -1,11 +1,11 @@ /* * This work is part of the Core Imaging Library developed by * Visual Analytics and Imaging System Group of the Science Technology - * Facilities Council, STFC and Diamond Light Source Ltd. + * Facilities Council, STFC and Diamond Light Source Ltd. * * Copyright 2017 Daniil Kazantsev * Copyright 2017 Srikanth Nagella, Edoardo Pasca - * Copyright 2018 Diamond Light Source Ltd. + * Copyright 2018 Diamond Light Source Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,27 +44,27 @@ * 4. Weights_ijk - associated weights */ -void swap(float *xp, float *yp) -{ - float temp = *xp; - *xp = *yp; - *yp = temp; -} +void swap(float *xp, float *yp) +{ + float temp = *xp; + *xp = *yp; + *yp = temp; +} -void swapUS(unsigned short *xp, unsigned short *yp) -{ - unsigned short temp = *xp; - *xp = *yp; - *yp = temp; -} +void swapUS(unsigned short *xp, unsigned short *yp) +{ + unsigned short temp = *xp; + *xp = *yp; + *yp = temp; +} /**************************************************/ -float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h, int switchM) +float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h) { int counterG; long i, j, k; float *Eucl_Vec, h2; - h2 = h*h; + h2 = h*h; /****************2D INPUT ***************/ if (dimZ == 0) { /* generate a 2D Gaussian kernel for NLM procedure */ @@ -76,23 +76,14 @@ float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, u counterG++; }} /*main neighb loop */ /* for each pixel store indeces of the most similar neighbours (patches) */ - if (switchM == 1) { -#pragma omp parallel for shared (A, Weights, H_i, H_j) private(i,j) - for(i=0; i<(long)(dimX); i++) { - for(j=0; j<(long)(dimY); j++) { - Indeces2D_p(A, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2); - }} - } - else { #pragma omp parallel for shared (A, Weights, H_i, H_j) private(i,j) for(i=0; i<(long)(dimX); i++) { for(j=0; j<(long)(dimY); j++) { Indeces2D(A, H_i, H_j, Weights, i, j, (long)(dimX), (long)(dimY), Eucl_Vec, NumNeighb, SearchWindow, SimilarWin, h2); }} - } } else { - /****************3D INPUT ***************/ + /****************3D INPUT ***************/ /* generate a 3D Gaussian kernel for NLM procedure */ Eucl_Vec = (float*) calloc ((2*SimilarWin+1)*(2*SimilarWin+1)*(2*SimilarWin+1),sizeof(float)); counterG = 0; @@ -101,25 +92,15 @@ float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, u for(k=-SimilarWin; k<=SimilarWin; k++) { Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2) + pow(((float) k), 2))/(2*SimilarWin*SimilarWin*SimilarWin)); counterG++; - }}} /*main neighb loop */ - + }}} /*main neighb loop */ + /* for each voxel store indeces of the most similar neighbours (patches) */ - if (switchM == 1) { #pragma omp parallel for shared (A, Weights, H_i, H_j, H_k) private(i,j,k) for(i=0; i= 0) && (i1 < dimX)) && ((j1 >= 0) && (j1 < dimY))) { - normsum = 0.0f; counterG = 0; - for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) { - for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) { - i2 = i1 + i_c; - j2 = j1 + j_c; - i3 = i + i_c; - j3 = j + j_c; - if (((i2 >= 0) && (i2 < dimX)) && ((j2 >= 0) && (j2 < dimY))) { - if (((i3 >= 0) && (i3 < dimX)) && ((j3 >= 0) && (j3 < dimY))) { - normsum += Eucl_Vec[counterG]*pow(Aorig[j3*dimX + (i3)] - Aorig[j2*dimX + (i2)], 2); - counterG++; - }} - - }} - /* writing temporarily into vectors */ - if (normsum > EPS) { - Weight_Vec[counter] = expf(-normsum/h2); - ind_i[counter] = i1; - ind_j[counter] = j1; - counter++; - } - } - }} - /* do sorting to choose the most prominent weights [HIGH to LOW] */ - /* and re-arrange indeces accordingly */ - for (x = 0; x < counter-1; x++) { - for (y = 0; y < counter-x-1; y++) { - if (Weight_Vec[y] < Weight_Vec[y+1]) { - swap(&Weight_Vec[y], &Weight_Vec[y+1]); - swapUS(&ind_i[y], &ind_i[y+1]); - swapUS(&ind_j[y], &ind_j[y+1]); - } - } - } - /*sorting loop finished*/ - /*now select the NumNeighb more prominent weights and store into pre-allocated arrays */ - for(x=0; x < NumNeighb; x++) { - index = (dimX*dimY*x) + j*dimX+i; - H_i[index] = ind_i[x]; - H_j[index] = ind_j[x]; - Weights[index] = Weight_Vec[x]; - } - free(ind_i); - free(ind_j); - free(Weight_Vec); - return 1; -} -float Indeces2D_p(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2) -{ - long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, index, sizeWin_tot, counterG; - float *Weight_Vec, normsum; - unsigned short *ind_i, *ind_j; - + sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1); - + Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float)); ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short)); ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short)); - + counter = 0; for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) { for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { @@ -217,11 +133,9 @@ float Indeces2D_p(float *Aorig, unsigned short *H_i, unsigned short *H_j, float j3 = j + j_c; if (((i2 >= 0) && (i2 < dimX)) && ((j2 >= 0) && (j2 < dimY))) { if (((i3 >= 0) && (i3 < dimX)) && ((j3 >= 0) && (j3 < dimY))) { - //normsum += Eucl_Vec[counterG]*pow(Aorig[j3*dimX + (i3)] - Aorig[j2*dimX + (i2)], 2); - normsum += Eucl_Vec[counterG]*pow(Aorig[i3*dimY + (j3)] - Aorig[i2*dimY + (j2)], 2); + normsum += Eucl_Vec[counterG]*powf(Aorig[j3*dimX + (i3)] - Aorig[j2*dimX + (i2)], 2); counterG++; }} - }} /* writing temporarily into vectors */ if (normsum > EPS) { @@ -232,26 +146,25 @@ float Indeces2D_p(float *Aorig, unsigned short *H_i, unsigned short *H_j, float } } }} - /* do sorting to choose the most prominent weights [HIGH to LOW] */ + /* do sorting to choose the most prominent weights [HIGH to LOW] */ /* and re-arrange indeces accordingly */ for (x = 0; x < counter-1; x++) { for (y = 0; y < counter-x-1; y++) { if (Weight_Vec[y] < Weight_Vec[y+1]) { - swap(&Weight_Vec[y], &Weight_Vec[y+1]); + swap(&Weight_Vec[y], &Weight_Vec[y+1]); swapUS(&ind_i[y], &ind_i[y+1]); - swapUS(&ind_j[y], &ind_j[y+1]); + swapUS(&ind_j[y], &ind_j[y+1]); } } } - /*sorting loop finished*/ - - /*now select the NumNeighb more prominent weights and store into pre-allocated arrays */ + /*sorting loop finished*/ + /*now select the NumNeighb more prominent weights and store into pre-allocated arrays */ for(x=0; x < NumNeighb; x++) { - index = (dimX*dimY*x) + i*dimY+j; + index = (dimX*dimY*x) + j*dimX+i; H_i[index] = ind_i[x]; H_j[index] = ind_j[x]; Weights[index] = Weight_Vec[x]; - } + } free(ind_i); free(ind_j); free(Weight_Vec); @@ -263,14 +176,14 @@ float Indeces3D(float *Aorig, unsigned short *H_i, unsigned short *H_j, unsigned long i1, j1, k1, i_m, j_m, k_m, i_c, j_c, k_c, i2, j2, k2, i3, j3, k3, counter, x, y, index, sizeWin_tot, counterG; float *Weight_Vec, normsum, temp; unsigned short *ind_i, *ind_j, *ind_k, temp_i, temp_j, temp_k; - + sizeWin_tot = (2*SearchWindow + 1)*(2*SearchWindow + 1)*(2*SearchWindow + 1); - + Weight_Vec = (float*) calloc(sizeWin_tot, sizeof(float)); ind_i = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short)); ind_j = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short)); ind_k = (unsigned short*) calloc(sizeWin_tot, sizeof(unsigned short)); - + counter = 0l; for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) { for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { @@ -324,22 +237,21 @@ float Indeces3D(float *Aorig, unsigned short *H_i, unsigned short *H_j, unsigned ind_k[y] = temp_k; }}} /*sorting loop finished*/ - + /*now select the NumNeighb more prominent weights and store into arrays */ for(x=0; x < NumNeighb; x++) { index = dimX*dimY*dimZ*x + (dimX*dimY*k) + j*dimX+i; - + H_i[index] = ind_i[x]; H_j[index] = ind_j[x]; H_k[index] = ind_k[x]; - + Weights[index] = Weight_Vec[x]; } - + free(ind_i); free(ind_j); free(ind_k); free(Weight_Vec); return 1; } - diff --git a/src/Core/regularisers_CPU/PatchSelect_core.h b/src/Core/regularisers_CPU/PatchSelect_core.h index ddaa428..ea18c26 100644 --- a/src/Core/regularisers_CPU/PatchSelect_core.h +++ b/src/Core/regularisers_CPU/PatchSelect_core.h @@ -1,11 +1,11 @@ /* * This work is part of the Core Imaging Library developed by * Visual Analytics and Imaging System Group of the Science Technology - * Facilities Council, STFC and Diamond Light Source Ltd. + * Facilities Council, STFC and Diamond Light Source Ltd. * * Copyright 2017 Daniil Kazantsev * Copyright 2017 Srikanth Nagella, Edoardo Pasca - * Copyright 2018 Diamond Light Source Ltd. + * Copyright 2018 Diamond Light Source Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -54,9 +54,8 @@ #ifdef __cplusplus extern "C" { #endif -CCPI_EXPORT float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h, int switchM); +CCPI_EXPORT float PatchSelect_CPU_main(float *A, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h); CCPI_EXPORT float Indeces2D(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2); -CCPI_EXPORT float Indeces2D_p(float *Aorig, unsigned short *H_i, unsigned short *H_j, float *Weights, long i, long j, long dimX, long dimY, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2); CCPI_EXPORT float Indeces3D(float *Aorig, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, long i, long j, long k, long dimY, long dimX, long dimZ, float *Eucl_Vec, int NumNeighb, int SearchWindow, int SimilarWin, float h2); #ifdef __cplusplus } diff --git a/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu b/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu index 98c8488..2cd27ff 100644 --- a/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu +++ b/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu @@ -1,11 +1,11 @@ /* * This work is part of the Core Imaging Library developed by * Visual Analytics and Imaging System Group of the Science Technology - * Facilities Council, STFC and Diamond Light Source Ltd. + * Facilities Council, STFC and Diamond Light Source Ltd. * * Copyright 2017 Daniil Kazantsev * Copyright 2017 Srikanth Nagella, Edoardo Pasca - * Copyright 2018 Diamond Light Source Ltd. + * Copyright 2018 Diamond Light Source Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -51,35 +51,35 @@ #define CONSTVECSIZE11 529 #define CONSTVECSIZE13 729 -__device__ void swap(float *xp, float *yp) +__device__ void swap(float *xp, float *yp) { - float temp = *xp; - *xp = *yp; - *yp = temp; + float temp = *xp; + *xp = *yp; + *yp = temp; } -__device__ void swapUS(unsigned short *xp, unsigned short *yp) -{ - unsigned short temp = *xp; - *xp = *yp; - *yp = temp; +__device__ void swapUS(unsigned short *xp, unsigned short *yp) +{ + unsigned short temp = *xp; + *xp = *yp; + *yp = temp; } /********************************************************************************/ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) -{ +{ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; float normsum; - + float Weight_Vec[CONSTVECSIZE5]; unsigned short ind_i[CONSTVECSIZE5]; unsigned short ind_j[CONSTVECSIZE5]; int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; - - long index = i*M+j; - + + long index = i*M+j; + counter = 0; for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) { for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { @@ -95,9 +95,9 @@ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigne j3 = j + j_c; if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) { if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) { - normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); + normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); counterG++; - }} + }} }} /* writing temporarily into vectors */ if (normsum > EPS) { @@ -108,43 +108,43 @@ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigne } } }} - + /* do sorting to choose the most prominent weights [HIGH to LOW] */ /* and re-arrange indeces accordingly */ for (x = 0; x < counter-1; x++) { for (y = 0; y < counter-x-1; y++) { if (Weight_Vec[y] < Weight_Vec[y+1]) { - swap(&Weight_Vec[y], &Weight_Vec[y+1]); + swap(&Weight_Vec[y], &Weight_Vec[y+1]); swapUS(&ind_i[y], &ind_i[y+1]); - swapUS(&ind_j[y], &ind_j[y+1]); + swapUS(&ind_j[y], &ind_j[y+1]); } } - } - /*sorting loop finished*/ - /*now select the NumNeighb more prominent weights and store into arrays */ + } + /*sorting loop finished*/ + /*now select the NumNeighb more prominent weights and store into arrays */ for(x=0; x < NumNeighb; x++) { index2 = (N*M*x) + index; H_i_d[index2] = ind_i[x]; H_j_d[index2] = ind_j[x]; Weights_d[index2] = Weight_Vec[x]; } -} +} /********************************************************************************/ __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) -{ +{ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; float normsum; - + float Weight_Vec[CONSTVECSIZE7]; unsigned short ind_i[CONSTVECSIZE7]; unsigned short ind_j[CONSTVECSIZE7]; int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; - - long index = i*M+j; - + + long index = i*M+j; + counter = 0; for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) { for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { @@ -160,9 +160,9 @@ __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigne j3 = j + j_c; if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) { if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) { - normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); + normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); counterG++; - }} + }} }} /* writing temporarily into vectors */ if (normsum > EPS) { @@ -173,20 +173,20 @@ __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigne } } }} - + /* do sorting to choose the most prominent weights [HIGH to LOW] */ /* and re-arrange indeces accordingly */ for (x = 0; x < counter-1; x++) { for (y = 0; y < counter-x-1; y++) { if (Weight_Vec[y] < Weight_Vec[y+1]) { - swap(&Weight_Vec[y], &Weight_Vec[y+1]); + swap(&Weight_Vec[y], &Weight_Vec[y+1]); swapUS(&ind_i[y], &ind_i[y+1]); - swapUS(&ind_j[y], &ind_j[y+1]); + swapUS(&ind_j[y], &ind_j[y+1]); } } - } - /*sorting loop finished*/ - /*now select the NumNeighb more prominent weights and store into arrays */ + } + /*sorting loop finished*/ + /*now select the NumNeighb more prominent weights and store into arrays */ for(x=0; x < NumNeighb; x++) { index2 = (N*M*x) + index; H_i_d[index2] = ind_i[x]; @@ -195,39 +195,47 @@ __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigne } } __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) -{ +{ - long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; + long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2, ind; float normsum; - + float Weight_Vec[CONSTVECSIZE9]; unsigned short ind_i[CONSTVECSIZE9]; unsigned short ind_j[CONSTVECSIZE9]; + for(ind=0; ind= 0) && (i1 < N)) { for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { - i1 = i+i_m; j1 = j+j_m; - if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) { + if ((j1 >= 0) && (j1 < M)) { normsum = 0.0f; counterG = 0; for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) { + i2 = i1 + i_c; + i3 = i + i_c; + //if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) { for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) { - i2 = i1 + i_c; j2 = j1 + j_c; - i3 = i + i_c; j3 = j + j_c; - if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) { - if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) { - normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); + //if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) { + normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2); counterG++; - }} - }} + // } /*if j2 j3*/ + } + // } /*if i2 i3*/ + } /* writing temporarily into vectors */ if (normsum > EPS) { Weight_Vec[counter] = expf(-normsum/h2); @@ -235,44 +243,46 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne ind_j[counter] = j1; counter++; } + } /*if j1*/ } - }} - + } /*if i1*/ + } + /* do sorting to choose the most prominent weights [HIGH to LOW] */ /* and re-arrange indeces accordingly */ for (x = 0; x < counter-1; x++) { for (y = 0; y < counter-x-1; y++) { if (Weight_Vec[y] < Weight_Vec[y+1]) { - swap(&Weight_Vec[y], &Weight_Vec[y+1]); + swap(&Weight_Vec[y], &Weight_Vec[y+1]); swapUS(&ind_i[y], &ind_i[y+1]); - swapUS(&ind_j[y], &ind_j[y+1]); + swapUS(&ind_j[y], &ind_j[y+1]); } } - } - /*sorting loop finished*/ - /*now select the NumNeighb more prominent weights and store into arrays */ + } + /*sorting loop finished*/ + /*now select the NumNeighb more prominent weights and store into arrays */ for(x=0; x < NumNeighb; x++) { index2 = (N*M*x) + index; H_i_d[index2] = ind_i[x]; H_j_d[index2] = ind_j[x]; Weights_d[index2] = Weight_Vec[x]; - } + } } __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) -{ +{ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; float normsum; - + float Weight_Vec[CONSTVECSIZE11]; unsigned short ind_i[CONSTVECSIZE11]; unsigned short ind_j[CONSTVECSIZE11]; int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; - - long index = i*M+j; - + + long index = i*M+j; + counter = 0; for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) { for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { @@ -288,9 +298,9 @@ __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsign j3 = j + j_c; if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) { if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) { - normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); + normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); counterG++; - }} + }} }} /* writing temporarily into vectors */ if (normsum > EPS) { @@ -301,42 +311,42 @@ __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsign } } }} - + /* do sorting to choose the most prominent weights [HIGH to LOW] */ /* and re-arrange indeces accordingly */ for (x = 0; x < counter-1; x++) { for (y = 0; y < counter-x-1; y++) { if (Weight_Vec[y] < Weight_Vec[y+1]) { - swap(&Weight_Vec[y], &Weight_Vec[y+1]); + swap(&Weight_Vec[y], &Weight_Vec[y+1]); swapUS(&ind_i[y], &ind_i[y+1]); - swapUS(&ind_j[y], &ind_j[y+1]); + swapUS(&ind_j[y], &ind_j[y+1]); } } - } - /*sorting loop finished*/ - /*now select the NumNeighb more prominent weights and store into arrays */ + } + /*sorting loop finished*/ + /*now select the NumNeighb more prominent weights and store into arrays */ for(x=0; x < NumNeighb; x++) { index2 = (N*M*x) + index; H_i_d[index2] = ind_i[x]; H_j_d[index2] = ind_j[x]; Weights_d[index2] = Weight_Vec[x]; } -} +} __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) -{ +{ long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; float normsum; - + float Weight_Vec[CONSTVECSIZE13]; unsigned short ind_i[CONSTVECSIZE13]; unsigned short ind_j[CONSTVECSIZE13]; int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; - - long index = i*M+j; - + + long index = i*M+j; + counter = 0; for(i_m=-SearchWindow; i_m<=SearchWindow; i_m++) { for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { @@ -352,9 +362,9 @@ __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsign j3 = j + j_c; if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) { if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) { - normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); + normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); counterG++; - }} + }} }} /* writing temporarily into vectors */ if (normsum > EPS) { @@ -365,29 +375,29 @@ __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsign } } }} - + /* do sorting to choose the most prominent weights [HIGH to LOW] */ /* and re-arrange indeces accordingly */ for (x = 0; x < counter-1; x++) { for (y = 0; y < counter-x-1; y++) { if (Weight_Vec[y] < Weight_Vec[y+1]) { - swap(&Weight_Vec[y], &Weight_Vec[y+1]); + swap(&Weight_Vec[y], &Weight_Vec[y+1]); swapUS(&ind_i[y], &ind_i[y+1]); - swapUS(&ind_j[y], &ind_j[y+1]); + swapUS(&ind_j[y], &ind_j[y+1]); } } - } - /*sorting loop finished*/ - /*now select the NumNeighb more prominent weights and store into arrays */ + } + /*sorting loop finished*/ + /*now select the NumNeighb more prominent weights and store into arrays */ for(x=0; x < NumNeighb; x++) { index2 = (N*M*x) + index; H_i_d[index2] = ind_i[x]; H_j_d[index2] = ind_j[x]; Weights_d[index2] = Weight_Vec[x]; } -} +} + - /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ /********************* MAIN HOST FUNCTION ******************/ /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ @@ -398,19 +408,19 @@ extern "C" int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned shor if (deviceCount == 0) { fprintf(stderr, "No CUDA devices found\n"); return -1; - } - + } + int SearchW_full, SimilW_full, counterG, i, j; - float *Ad, *Weights_d, h2, *Eucl_Vec, *Eucl_Vec_d; + float *Ad, *Weights_d, h2, *Eucl_Vec, *Eucl_Vec_d; unsigned short *H_i_d, *H_j_d; h2 = h*h; - + dim3 dimBlock(BLKXSIZE,BLKYSIZE); - dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE)); - + dim3 dimGrid(idivup(N,BLKXSIZE), idivup(M,BLKYSIZE)); + SearchW_full = (2*SearchWindow + 1)*(2*SearchWindow + 1); /* the full searching window size */ SimilW_full = (2*SimilarWin + 1)*(2*SimilarWin + 1); /* the full similarity window size */ - + /* generate a 2D Gaussian kernel for NLM procedure */ Eucl_Vec = (float*) calloc (SimilW_full,sizeof(float)); counterG = 0; @@ -419,8 +429,8 @@ extern "C" int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned shor Eucl_Vec[counterG] = (float)exp(-(pow(((float) i), 2) + pow(((float) j), 2))/(2.0*SimilarWin*SimilarWin)); counterG++; }} /*main neighb loop */ - - + + /*allocate space on the device*/ checkCudaErrors( cudaMalloc((void**)&Ad, N*M*sizeof(float)) ); checkCudaErrors( cudaMalloc((void**)&H_i_d, N*M*NumNeighb*sizeof(unsigned short)) ); @@ -430,8 +440,8 @@ extern "C" int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned shor /* copy data from the host to the device */ checkCudaErrors( cudaMemcpy(Ad,A,N*M*sizeof(float),cudaMemcpyHostToDevice) ); - checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) ); - + checkCudaErrors( cudaMemcpy(Eucl_Vec_d,Eucl_Vec,SimilW_full*sizeof(float),cudaMemcpyHostToDevice) ); + /********************** Run CUDA kernel here ********************/ if (SearchWindow == 5) IndexSelect2D_5_kernel<<>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2); else if (SearchWindow == 7) IndexSelect2D_7_kernel<<>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2); @@ -440,19 +450,19 @@ extern "C" int PatchSelect_GPU_main(float *A, unsigned short *H_i, unsigned shor else if (SearchWindow == 13) IndexSelect2D_13_kernel<<>>(Ad, H_i_d, H_j_d, Weights_d, Eucl_Vec_d, N, M, SearchWindow, SearchW_full, SimilarWin, NumNeighb, h2); else { fprintf(stderr, "Select the searching window size from 5, 7, 9, 11 or 13\n"); - return -1;} - checkCudaErrors(cudaPeekAtLastError() ); - checkCudaErrors(cudaDeviceSynchronize()); - /***************************************************************/ - + return -1;} + checkCudaErrors(cudaPeekAtLastError() ); + checkCudaErrors(cudaDeviceSynchronize()); + /***************************************************************/ + checkCudaErrors(cudaMemcpy(H_i, H_i_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) ); - checkCudaErrors(cudaMemcpy(H_j, H_j_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) ); - checkCudaErrors(cudaMemcpy(Weights, Weights_d, N*M*NumNeighb*sizeof(float),cudaMemcpyDeviceToHost) ); - - - cudaFree(Ad); - cudaFree(H_i_d); - cudaFree(H_j_d); + checkCudaErrors(cudaMemcpy(H_j, H_j_d, N*M*NumNeighb*sizeof(unsigned short),cudaMemcpyDeviceToHost) ); + checkCudaErrors(cudaMemcpy(Weights, Weights_d, N*M*NumNeighb*sizeof(float),cudaMemcpyDeviceToHost) ); + + + cudaFree(Ad); + cudaFree(H_i_d); + cudaFree(H_j_d); cudaFree(Weights_d); cudaFree(Eucl_Vec_d); cudaDeviceReset(); diff --git a/src/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c b/src/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c index 34b9915..a5fb1df 100644 --- a/src/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c +++ b/src/Matlab/mex_compile/regularisers_CPU/Nonlocal_TV.c @@ -1,11 +1,11 @@ /* * This work is part of the Core Imaging Library developed by * Visual Analytics and Imaging System Group of the Science Technology - * Facilities Council, STFC and Diamond Light Source Ltd. + * Facilities Council, STFC and Diamond Light Source Ltd. * * Copyright 2017 Daniil Kazantsev * Copyright 2017 Srikanth Nagella, Edoardo Pasca - * Copyright 2018 Diamond Light Source Ltd. + * Copyright 2018 Diamond Light Source Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,12 +35,12 @@ * 2. AR_i - indeces of i neighbours * 3. AR_j - indeces of j neighbours * 4. AR_k - indeces of k neighbours (0 - for 2D case) - * 5. Weights_ij(k) - associated weights + * 5. Weights_ij(k) - associated weights * 6. regularisation parameter - * 7. iterations number - + * 7. iterations number + * Output: - * 1. denoised image/volume + * 1. denoised image/volume * Elmoataz, Abderrahim, Olivier Lezoray, and Sébastien Bougleux. "Nonlocal discrete regularization on weighted graphs: a framework for image and manifold processing." IEEE Trans. Image Processing 17, no. 7 (2008): 1047-1060. */ @@ -54,11 +54,11 @@ void mexFunction( const mwSize *dim_array; const mwSize *dim_array2; float *A_orig, *Output=NULL, *Weights, lambda; - + dim_array = mxGetDimensions(prhs[0]); dim_array2 = mxGetDimensions(prhs[1]); number_of_dims = mxGetNumberOfDimensions(prhs[0]); - + /*Handling Matlab input data*/ A_orig = (float *) mxGetData(prhs[0]); /* a 2D image or a set of 2D images (3D stack) */ H_i = (unsigned short *) mxGetData(prhs[1]); /* indeces of i neighbours */ @@ -67,14 +67,14 @@ void mexFunction( Weights = (float *) mxGetData(prhs[4]); /* weights for patches */ lambda = (float) mxGetScalar(prhs[5]); /* regularisation parameter */ IterNumb = (int) mxGetScalar(prhs[6]); /* the number of iterations */ - - dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; - + + dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; + /*****2D INPUT *****/ if (number_of_dims == 2) { - dimZ = 0; + dimZ = 0; NumNeighb = dim_array2[2]; - Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); + Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); } /*****3D INPUT *****/ /****************************************************/ @@ -82,7 +82,7 @@ void mexFunction( NumNeighb = dim_array2[3]; Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); } - + /* run the main function here */ - Nonlocal_TV_CPU_main(A_orig, Output, H_i, H_j, H_k, Weights, dimX, dimY, dimZ, NumNeighb, lambda, IterNumb); + Nonlocal_TV_CPU_main(A_orig, Output, H_i, H_j, H_k, Weights, dimX, dimY, dimZ, NumNeighb, lambda, IterNumb, 0); } diff --git a/src/Matlab/mex_compile/regularisers_CPU/PatchSelect.c b/src/Matlab/mex_compile/regularisers_CPU/PatchSelect.c index 1acab29..84b08dd 100644 --- a/src/Matlab/mex_compile/regularisers_CPU/PatchSelect.c +++ b/src/Matlab/mex_compile/regularisers_CPU/PatchSelect.c @@ -1,11 +1,11 @@ /* * This work is part of the Core Imaging Library developed by * Visual Analytics and Imaging System Group of the Science Technology - * Facilities Council, STFC and Diamond Light Source Ltd. + * Facilities Council, STFC and Diamond Light Source Ltd. * * Copyright 2017 Daniil Kazantsev * Copyright 2017 Srikanth Nagella, Edoardo Pasca - * Copyright 2018 Diamond Light Source Ltd. + * Copyright 2018 Diamond Light Source Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,14 +53,14 @@ void mexFunction( int number_of_dims, SearchWindow, SimilarWin, NumNeighb; mwSize dimX, dimY, dimZ; const mwSize *dim_array; - unsigned short *H_i=NULL, *H_j=NULL, *H_k=NULL; + unsigned short *H_i=NULL, *H_j=NULL, *H_k=NULL; float *A, *Weights = NULL, h; mwSize dim_array2[3]; /* for 2D data */ mwSize dim_array3[4]; /* for 3D data */ - + dim_array = mxGetDimensions(prhs[0]); number_of_dims = mxGetNumberOfDimensions(prhs[0]); - + /*Handling Matlab input data*/ A = (float *) mxGetData(prhs[0]); /* a 2D or 3D image/volume */ SearchWindow = (int) mxGetScalar(prhs[1]); /* Large Searching window */ @@ -71,22 +71,22 @@ void mexFunction( dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; dim_array2[0] = dimX; dim_array2[1] = dimY; dim_array2[2] = NumNeighb; /* 2D case */ dim_array3[0] = dimX; dim_array3[1] = dimY; dim_array3[2] = dimZ; dim_array3[3] = NumNeighb; /* 3D case */ - + /****************2D INPUT ***************/ if (number_of_dims == 2) { - dimZ = 0; + dimZ = 0; H_i = (unsigned short*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array2, mxUINT16_CLASS, mxREAL)); H_j = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(3, dim_array2, mxUINT16_CLASS, mxREAL)); Weights = (float*)mxGetPr(plhs[2] = mxCreateNumericArray(3, dim_array2, mxSINGLE_CLASS, mxREAL)); } /****************3D INPUT ***************/ - if (number_of_dims == 3) { + if (number_of_dims == 3) { H_i = (unsigned short*)mxGetPr(plhs[0] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL)); H_j = (unsigned short*)mxGetPr(plhs[1] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL)); H_k = (unsigned short*)mxGetPr(plhs[2] = mxCreateNumericArray(4, dim_array3, mxUINT16_CLASS, mxREAL)); - Weights = (float*)mxGetPr(plhs[3] = mxCreateNumericArray(4, dim_array3, mxSINGLE_CLASS, mxREAL)); + Weights = (float*)mxGetPr(plhs[3] = mxCreateNumericArray(4, dim_array3, mxSINGLE_CLASS, mxREAL)); } - - PatchSelect_CPU_main(A, H_i, H_j, H_k, Weights, (long)(dimX), (long)(dimY), (long)(dimZ), SearchWindow, SimilarWin, NumNeighb, h, 0); - + + PatchSelect_CPU_main(A, H_i, H_j, H_k, Weights, (long)(dimX), (long)(dimY), (long)(dimZ), SearchWindow, SimilarWin, NumNeighb, h); + } diff --git a/src/Python/src/cpu_regularisers.pyx b/src/Python/src/cpu_regularisers.pyx index 904b4f5..4917d06 100644 --- a/src/Python/src/cpu_regularisers.pyx +++ b/src/Python/src/cpu_regularisers.pyx @@ -27,8 +27,8 @@ cdef extern float Diffusion_CPU_main(float *Input, float *Output, float *infovec cdef extern float Diffus4th_CPU_main(float *Input, float *Output, float *infovector, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, float epsil, int dimX, int dimY, int dimZ); cdef extern float dTV_FGP_CPU_main(float *Input, float *InputRef, float *Output, float *infovector, float lambdaPar, int iterationsNumb, float epsil, float eta, int methodTV, int nonneg, int dimX, int dimY, int dimZ); cdef extern float TNV_CPU_main(float *Input, float *u, float lambdaPar, int maxIter, float tol, int dimX, int dimY, int dimZ); -cdef extern float PatchSelect_CPU_main(float *Input, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h, int switchM); -cdef extern float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb); +cdef extern float PatchSelect_CPU_main(float *Input, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int SearchWindow, int SimilarWin, int NumNeighb, float h); +cdef extern float Nonlocal_TV_CPU_main(float *A_orig, float *Output, unsigned short *H_i, unsigned short *H_j, unsigned short *H_k, float *Weights, int dimX, int dimY, int dimZ, int NumNeighb, float lambdaReg, int IterNumb, int switchM); cdef extern float Diffusion_Inpaint_CPU_main(float *Input, unsigned char *Mask, float *Output, float lambdaPar, float sigmaPar, int iterationsNumb, float tau, int penaltytype, int dimX, int dimY, int dimZ); cdef extern float NonlocalMarching_Inpaint_main(float *Input, unsigned char *M, float *Output, unsigned char *M_upd, int SW_increment, int iterationsNumb, int trigger, int dimX, int dimY, int dimZ); @@ -570,7 +570,7 @@ def PatchSel_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, np.zeros([dims[0], dims[1],dims[2]], dtype='uint16') # Run patch-based weight selection function - PatchSelect_CPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], 0, searchwindow, patchwindow, neighbours, edge_parameter, 1) + PatchSelect_CPU_main(&inputData[0,0], &H_j[0,0,0], &H_i[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[2], dims[1], 0, searchwindow, patchwindow, neighbours, edge_parameter) return H_i, H_j, Weights """ def PatchSel_3D(np.ndarray[np.float32_t, ndim=3, mode="c"] inputData, @@ -625,7 +625,7 @@ def NLTV_2D(np.ndarray[np.float32_t, ndim=2, mode="c"] inputData, np.zeros([dims[0],dims[1]], dtype='float32') # Run nonlocal TV regularisation - Nonlocal_TV_CPU_main(&inputData[0,0], &outputData[0,0], &H_i[0,0,0], &H_j[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[1], dims[0], 0, neighbours, regularisation_parameter, iterations) + Nonlocal_TV_CPU_main(&inputData[0,0], &outputData[0,0], &H_i[0,0,0], &H_j[0,0,0], &H_i[0,0,0], &Weights[0,0,0], dims[1], dims[0], 0, neighbours, regularisation_parameter, iterations, 1) return outputData #*********************Inpainting WITH****************************# -- cgit v1.2.3 From e2601be8f44c09ff21f259d0ce3219bfd5918f12 Mon Sep 17 00:00:00 2001 From: dkazanc Date: Wed, 4 Sep 2019 13:31:59 +0100 Subject: gpu module fixed --- src/Core/regularisers_GPU/PatchSelect_GPU_core.cu | 164 ++++++++++++++-------- 1 file changed, 102 insertions(+), 62 deletions(-) (limited to 'src') diff --git a/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu b/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu index 2cd27ff..fb6fa95 100644 --- a/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu +++ b/src/Core/regularisers_GPU/PatchSelect_GPU_core.cu @@ -40,8 +40,8 @@ */ -#define BLKXSIZE 16 -#define BLKYSIZE 16 +#define BLKXSIZE 8 +#define BLKYSIZE 4 #define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) #define M_PI 3.14159265358979323846 #define EPS 1.0e-8 @@ -68,46 +68,56 @@ __device__ void swapUS(unsigned short *xp, unsigned short *yp) __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) { - long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; + long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2, ind; float normsum; float Weight_Vec[CONSTVECSIZE5]; unsigned short ind_i[CONSTVECSIZE5]; unsigned short ind_j[CONSTVECSIZE5]; + for(ind=0; ind= 0) && (i1 < N)) { for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { - i1 = i+i_m; j1 = j+j_m; - if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) { + if ((j1 >= 0) && (j1 < M)) { normsum = 0.0f; counterG = 0; for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) { + i2 = i1 + i_c; + i3 = i + i_c; + if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) { for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) { - i2 = i1 + i_c; j2 = j1 + j_c; - i3 = i + i_c; j3 = j + j_c; - if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) { - if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) { - normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); + if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) { + normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2); counterG++; - }} - }} + } /*if j2 j3*/ + } + } /*if i2 i3*/ + } /* writing temporarily into vectors */ if (normsum > EPS) { - Weight_Vec[counter] = __expf(-normsum/h2); + Weight_Vec[counter] = expf(-normsum/h2); ind_i[counter] = i1; ind_j[counter] = j1; counter++; } - } - }} + } /*if j1*/ + } + } /*if i1*/ + } /* do sorting to choose the most prominent weights [HIGH to LOW] */ /* and re-arrange indeces accordingly */ @@ -133,46 +143,56 @@ __global__ void IndexSelect2D_5_kernel(float *Ad, unsigned short *H_i_d, unsigne __global__ void IndexSelect2D_7_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) { - long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; + long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2, ind; float normsum; float Weight_Vec[CONSTVECSIZE7]; unsigned short ind_i[CONSTVECSIZE7]; unsigned short ind_j[CONSTVECSIZE7]; + for(ind=0; ind= 0) && (i1 < N)) { for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { - i1 = i+i_m; j1 = j+j_m; - if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) { + if ((j1 >= 0) && (j1 < M)) { normsum = 0.0f; counterG = 0; for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) { + i2 = i1 + i_c; + i3 = i + i_c; + if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) { for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) { - i2 = i1 + i_c; j2 = j1 + j_c; - i3 = i + i_c; j3 = j + j_c; - if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) { - if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) { - normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); + if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) { + normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2); counterG++; - }} - }} + } /*if j2 j3*/ + } + } /*if i2 i3*/ + } /* writing temporarily into vectors */ if (normsum > EPS) { - Weight_Vec[counter] = __expf(-normsum/h2); + Weight_Vec[counter] = expf(-normsum/h2); ind_i[counter] = i1; ind_j[counter] = j1; counter++; } - } - }} + } /*if j1*/ + } + } /*if i1*/ + } /* do sorting to choose the most prominent weights [HIGH to LOW] */ /* and re-arrange indeces accordingly */ @@ -225,16 +245,16 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) { i2 = i1 + i_c; i3 = i + i_c; - //if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) { + if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) { for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) { j2 = j1 + j_c; j3 = j + j_c; - //if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) { + if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) { normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2); counterG++; - // } /*if j2 j3*/ + } /*if j2 j3*/ } - // } /*if i2 i3*/ + } /*if i2 i3*/ } /* writing temporarily into vectors */ if (normsum > EPS) { @@ -271,46 +291,56 @@ __global__ void IndexSelect2D_9_kernel(float *Ad, unsigned short *H_i_d, unsigne __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) { - long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; + long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2, ind; float normsum; float Weight_Vec[CONSTVECSIZE11]; unsigned short ind_i[CONSTVECSIZE11]; unsigned short ind_j[CONSTVECSIZE11]; + for(ind=0; ind= 0) && (i1 < N)) { for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { - i1 = i+i_m; j1 = j+j_m; - if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) { + if ((j1 >= 0) && (j1 < M)) { normsum = 0.0f; counterG = 0; for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) { + i2 = i1 + i_c; + i3 = i + i_c; + if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) { for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) { - i2 = i1 + i_c; j2 = j1 + j_c; - i3 = i + i_c; j3 = j + j_c; - if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) { - if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) { - normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); + if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) { + normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2); counterG++; - }} - }} + } /*if j2 j3*/ + } + } /*if i2 i3*/ + } /* writing temporarily into vectors */ if (normsum > EPS) { - Weight_Vec[counter] = __expf(-normsum/h2); + Weight_Vec[counter] = expf(-normsum/h2); ind_i[counter] = i1; ind_j[counter] = j1; counter++; } - } - }} + } /*if j1*/ + } + } /*if i1*/ + } /* do sorting to choose the most prominent weights [HIGH to LOW] */ /* and re-arrange indeces accordingly */ @@ -335,46 +365,56 @@ __global__ void IndexSelect2D_11_kernel(float *Ad, unsigned short *H_i_d, unsign __global__ void IndexSelect2D_13_kernel(float *Ad, unsigned short *H_i_d, unsigned short *H_j_d, float *Weights_d, float *Eucl_Vec_d, int N, int M, int SearchWindow, int SearchW_full, int SimilarWin, int NumNeighb, float h2) { - long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2; + long i1, j1, i_m, j_m, i_c, j_c, i2, j2, i3, j3, counter, x, y, counterG, index2, ind; float normsum; float Weight_Vec[CONSTVECSIZE13]; unsigned short ind_i[CONSTVECSIZE13]; unsigned short ind_j[CONSTVECSIZE13]; + for(ind=0; ind= 0) && (i1 < N)) { for(j_m=-SearchWindow; j_m<=SearchWindow; j_m++) { - i1 = i+i_m; j1 = j+j_m; - if (((i1 >= 0) && (i1 < N)) && ((j1 >= 0) && (j1 < M))) { + if ((j1 >= 0) && (j1 < M)) { normsum = 0.0f; counterG = 0; for(i_c=-SimilarWin; i_c<=SimilarWin; i_c++) { + i2 = i1 + i_c; + i3 = i + i_c; + if ((i2 >= 0) && (i2 < N) && (i3 >= 0) && (i3 < N)) { for(j_c=-SimilarWin; j_c<=SimilarWin; j_c++) { - i2 = i1 + i_c; j2 = j1 + j_c; - i3 = i + i_c; j3 = j + j_c; - if (((i2 >= 0) && (i2 < N)) && ((j2 >= 0) && (j2 < M))) { - if (((i3 >= 0) && (i3 < N)) && ((j3 >= 0) && (j3 < M))) { - normsum += Eucl_Vec_d[counterG]*powf(Ad[i3*M + j3] - Ad[i2*M + j2], 2); + if ((j2 >= 0) && (j2 < M) && (j3 >= 0) && (j3 < M)) { + normsum += Eucl_Vec_d[counterG]*powf(Ad[i3 + N*j3] - Ad[i2 + N*j2], 2); counterG++; - }} - }} + } /*if j2 j3*/ + } + } /*if i2 i3*/ + } /* writing temporarily into vectors */ if (normsum > EPS) { - Weight_Vec[counter] = __expf(-normsum/h2); + Weight_Vec[counter] = expf(-normsum/h2); ind_i[counter] = i1; ind_j[counter] = j1; counter++; } - } - }} + } /*if j1*/ + } + } /*if i1*/ + } /* do sorting to choose the most prominent weights [HIGH to LOW] */ /* and re-arrange indeces accordingly */ -- cgit v1.2.3