diff options
-rw-r--r-- | Core/regularisers_CPU/TGV_core.c | 434 | ||||
-rw-r--r-- | Core/regularisers_GPU/TGV_GPU_core.cu | 409 | ||||
-rw-r--r-- | Readme.md | 2 | ||||
-rw-r--r-- | Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m | 16 | ||||
-rw-r--r-- | Wrappers/Matlab/demos/demoMatlab_denoise.m | 16 | ||||
-rw-r--r-- | Wrappers/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp | 32 | ||||
-rw-r--r-- | run.sh | 19 |
7 files changed, 523 insertions, 405 deletions
diff --git a/Core/regularisers_CPU/TGV_core.c b/Core/regularisers_CPU/TGV_core.c index a177e1d..136e0bd 100644 --- a/Core/regularisers_CPU/TGV_core.c +++ b/Core/regularisers_CPU/TGV_core.c @@ -1,25 +1,25 @@ /* -This work is part of the Core Imaging Library developed by -Visual Analytics and Imaging System Group of the Science Technology -Facilities Council, STFC - -Copyright 2017 Daniil Kazantsev -Copyright 2017 Srikanth Nagella, Edoardo Pasca - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ + * This work is part of the Core Imaging Library developed by + * Visual Analytics and Imaging System Group of the Science Technology + * Facilities Council, STFC + * + * Copyright 2019 Daniil Kazantsev + * Copyright 2019 Srikanth Nagella, Edoardo Pasca + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include "TGV_core.h" -/* C-OMP implementation of Primal-Dual denoising method for +/* C-OMP implementation of Primal-Dual denoising method for * Total Generilized Variation (TGV)-L2 model [1] (2D/3D case) * * Input Parameters: @@ -29,44 +29,44 @@ limitations under the License. * 4. parameter to control the second-order term (alpha0) * 5. Number of Chambolle-Pock (Primal-Dual) iterations * 6. Lipshitz constant (default is 12) - * + * * Output: * Filtered/regularised image/volume * * References: * [1] K. Bredies "Total Generalized Variation" - * + * */ - + float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, int iter, float L2, int dimX, int dimY, int dimZ) { - long DimTotal; - int ll; - float *U_old, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, tau, sigma; - - DimTotal = (long)(dimX*dimY*dimZ); - copyIm(U0, U, (long)(dimX), (long)(dimY), (long)(dimZ)); /* initialize */ - tau = pow(L2,-0.5); - sigma = pow(L2,-0.5); - - /* dual variables */ - P1 = calloc(DimTotal, sizeof(float)); - P2 = calloc(DimTotal, sizeof(float)); + long DimTotal; + int ll; + float *U_old, *P1, *P2, *Q1, *Q2, *Q3, *V1, *V1_old, *V2, *V2_old, tau, sigma; + + DimTotal = (long)(dimX*dimY*dimZ); + copyIm(U0, U, (long)(dimX), (long)(dimY), (long)(dimZ)); /* initialize */ + tau = pow(L2,-0.5); + sigma = pow(L2,-0.5); + + /* dual variables */ + P1 = calloc(DimTotal, sizeof(float)); + P2 = calloc(DimTotal, sizeof(float)); + + Q1 = calloc(DimTotal, sizeof(float)); + Q2 = calloc(DimTotal, sizeof(float)); + Q3 = calloc(DimTotal, sizeof(float)); + + U_old = calloc(DimTotal, sizeof(float)); + + V1 = calloc(DimTotal, sizeof(float)); + V1_old = calloc(DimTotal, sizeof(float)); + V2 = calloc(DimTotal, sizeof(float)); + V2_old = calloc(DimTotal, sizeof(float)); + + if (dimZ == 1) { + /*2D case*/ - Q1 = calloc(DimTotal, sizeof(float)); - Q2 = calloc(DimTotal, sizeof(float)); - Q3 = calloc(DimTotal, sizeof(float)); - - U_old = calloc(DimTotal, sizeof(float)); - - V1 = calloc(DimTotal, sizeof(float)); - V1_old = calloc(DimTotal, sizeof(float)); - V2 = calloc(DimTotal, sizeof(float)); - V2_old = calloc(DimTotal, sizeof(float)); - - if (dimZ == 1) { - /*2D case*/ - /* Primal-dual iterations begin here */ for(ll = 0; ll < iter; ll++) { @@ -102,8 +102,8 @@ float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, in newU(V1, V1_old, (long)(dimX), (long)(dimY)); newU(V2, V2_old, (long)(dimX), (long)(dimY)); } /*end of iterations*/ - } - else { + } + else { /*3D case*/ float *P3, *Q4, *Q5, *Q6, *V3, *V3_old; @@ -114,7 +114,7 @@ float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, in V3 = calloc(DimTotal, sizeof(float)); V3_old = calloc(DimTotal, sizeof(float)); - /* Primal-dual iterations begin here */ + /* Primal-dual iterations begin here */ for(ll = 0; ll < iter; ll++) { /* Calculate Dual Variable P */ @@ -145,15 +145,15 @@ float TGV_main(float *U0, float *U, float lambda, float alpha1, float alpha0, in UpdV_3D(V1, V2, V3, P1, P2, P3, Q1, Q2, Q3, Q4, Q5, Q6, (long)(dimX), (long)(dimY), (long)(dimZ), tau); /*get new V*/ - newU3D_3Ar(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ)); - } /*end of iterations*/ + newU3D_3Ar(V1, V2, V3, V1_old, V2_old, V3_old, (long)(dimX), (long)(dimY), (long)(dimZ)); + } /*end of iterations*/ free(P3);free(Q4);free(Q5);free(Q6);free(V3);free(V3_old); - } - + } + /*freeing*/ free(P1);free(P2);free(Q1);free(Q2);free(Q3);free(U_old); free(V1);free(V2);free(V1_old);free(V2_old); - return *U; + return *U; } /********************************************************************/ @@ -166,10 +166,10 @@ float DualP_2D(float *U, float *V1, float *V2, float *P1, float *P2, long dimX, #pragma omp parallel for shared(U,V1,V2,P1,P2) private(i,j,index) for(i=0; i<dimX; i++) { for(j=0; j<dimY; j++) { - index = j*dimX+i; + index = j*dimX+i; /* symmetric boundary conditions (Neuman) */ - if (i == dimX-1) P1[index] += sigma*(-V1[index]); - else P1[index] += sigma*((U[j*dimX+(i+1)] - U[index]) - V1[index]); + if (i == dimX-1) P1[index] += sigma*(-V1[index]); + else P1[index] += sigma*((U[j*dimX+(i+1)] - U[index]) - V1[index]); if (j == dimY-1) P2[index] += sigma*(-V2[index]); else P2[index] += sigma*((U[(j+1)*dimX+i] - U[index]) - V2[index]); @@ -184,7 +184,7 @@ float ProjP_2D(float *P1, float *P2, long dimX, long dimY, float alpha1) #pragma omp parallel for shared(P1,P2) private(i,j,index,grad_magn) for(i=0; i<dimX; i++) { for(j=0; j<dimY; j++) { - index = j*dimX+i; + index = j*dimX+i; grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2)))/alpha1; if (grad_magn > 1.0f) { P1[index] /= grad_magn; @@ -201,8 +201,8 @@ float DualQ_2D(float *V1, float *V2, float *Q1, float *Q2, float *Q3, long dimX, #pragma omp parallel for shared(Q1,Q2,Q3,V1,V2) private(i,j,index,q1,q2,q11,q22) for(i=0; i<dimX; i++) { for(j=0; j<dimY; j++) { - index = j*dimX+i; - q1 = 0.0f; q11 = 0.0f; q2 = 0.0f; q22 = 0.0f; + index = j*dimX+i; + q1 = 0.0f; q11 = 0.0f; q2 = 0.0f; q22 = 0.0f; /* boundary conditions (Neuman) */ if (i != dimX-1){ q1 = V1[j*dimX+(i+1)] - V1[index]; @@ -225,7 +225,7 @@ float ProjQ_2D(float *Q1, float *Q2, float *Q3, long dimX, long dimY, float alph #pragma omp parallel for shared(Q1,Q2,Q3) private(i,j,index,grad_magn) for(i=0; i<dimX; i++) { for(j=0; j<dimY; j++) { - index = j*dimX+i; + index = j*dimX+i; grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2)); grad_magn = grad_magn/alpha0; if (grad_magn > 1.0f) { @@ -244,14 +244,14 @@ float DivProjP_2D(float *U, float *U0, float *P1, float *P2, long dimX, long dim #pragma omp parallel for shared(U,U0,P1,P2) private(i,j,index,P_v1,P_v2,div) for(i=0; i<dimX; i++) { for(j=0; j<dimY; j++) { - index = j*dimX+i; - + index = j*dimX+i; + if (i == 0) P_v1 = P1[index]; - else if (i == dimX) P_v1 = -P1[j*dimX+(i-1)]; + else if (i == dimX-1) P_v1 = -P1[j*dimX+(i-1)]; else P_v1 = P1[index] - P1[j*dimX+(i-1)]; if (j == 0) P_v2 = P2[index]; - else if (j == dimY) P_v2 = -P2[(j-1)*dimX+i]; + else if (j == dimY-1) P_v2 = -P2[(j-1)*dimX+i]; else P_v2 = P2[index] - P2[(j-1)*dimX+i]; div = P_v1 + P_v2; @@ -275,29 +275,28 @@ float UpdV_2D(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, #pragma omp parallel for shared(V1,V2,P1,P2,Q1,Q2,Q3) private(i, j, index, q1, q3_x, q3_y, q2, div1, div2) for(i=0; i<dimX; i++) { for(j=0; j<dimY; j++) { - index = j*dimX+i; - q2 = 0.0f; q3_y = 0.0f; q1 = 0.0f; q3_x = 0.0; + index = j*dimX+i; /* boundary conditions (Neuman) */ if (i == 0) { - q1 = Q1[index]; - q3_x = Q3[index]; } - else if (i == dimX) { - q1 = -Q1[j*dimX+(i-1)]; - q3_x = -Q3[j*dimX+(i-1)]; } + q1 = Q1[index]; + q3_x = Q3[index]; } + else if (i == dimX-1) { + q1 = -Q1[j*dimX+(i-1)]; + q3_x = -Q3[j*dimX+(i-1)]; } else { - q1 = Q1[index] - Q1[j*dimX+(i-1)]; - q3_x = Q3[index] - Q3[j*dimX+(i-1)]; } + q1 = Q1[index] - Q1[j*dimX+(i-1)]; + q3_x = Q3[index] - Q3[j*dimX+(i-1)]; } if (j == 0) { - q2 = Q2[index]; - q3_y = Q3[index]; } - else if (j == dimY) { - q2 = -Q2[(j-1)*dimX+i]; - q3_y = -Q3[(j-1)*dimX+i]; } + q2 = Q2[index]; + q3_y = Q3[index]; } + else if (j == dimY-1) { + q2 = -Q2[(j-1)*dimX+i]; + q3_y = -Q3[(j-1)*dimX+i]; } else { - q2 = Q2[index] - Q2[(j-1)*dimX+i]; - q3_y = Q3[index] - Q3[(j-1)*dimX+i]; } + q2 = Q2[index] - Q2[(j-1)*dimX+i]; + q3_y = Q3[index] - Q3[(j-1)*dimX+i]; } div1 = q1 + q3_y; @@ -318,16 +317,16 @@ float DualP_3D(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, #pragma omp parallel for shared(U,V1,V2,V3,P1,P2,P3) private(i,j,k,index) for(i=0; i<dimX; i++) { for(j=0; j<dimY; j++) { - for(k=0; k<dimZ; k++) { - index = (dimX*dimY)*k + j*dimX+i; - /* symmetric boundary conditions (Neuman) */ - if (i == dimX-1) P1[index] += sigma*(-V1[index]); - else P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i+1)] - U[index]) - V1[index]); - if (j == dimY-1) P2[index] += sigma*(-V2[index]); - else P2[index] += sigma*((U[(dimX*dimY)*k + (j+1)*dimX+i] - U[index]) - V2[index]); - if (k == dimZ-1) P3[index] += sigma*(-V3[index]); - else P3[index] += sigma*((U[(dimX*dimY)*(k+1) + j*dimX+i] - U[index]) - V3[index]); - }}} + for(k=0; k<dimZ; k++) { + index = (dimX*dimY)*k + j*dimX+i; + /* symmetric boundary conditions (Neuman) */ + if (i == dimX-1) P1[index] += sigma*(-V1[index]); + else P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i+1)] - U[index]) - V1[index]); + if (j == dimY-1) P2[index] += sigma*(-V2[index]); + else P2[index] += sigma*((U[(dimX*dimY)*k + (j+1)*dimX+i] - U[index]) - V2[index]); + if (k == dimZ-1) P3[index] += sigma*(-V3[index]); + else P3[index] += sigma*((U[(dimX*dimY)*(k+1) + j*dimX+i] - U[index]) - V3[index]); + }}} return 1; } /*Projection onto convex set for P*/ @@ -338,15 +337,15 @@ float ProjP_3D(float *P1, float *P2, float *P3, long dimX, long dimY, long dimZ, #pragma omp parallel for shared(P1,P2,P3) private(i,j,k,index,grad_magn) for(i=0; i<dimX; i++) { for(j=0; j<dimY; j++) { - for(k=0; k<dimZ; k++) { - index = (dimX*dimY)*k + j*dimX+i; - grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2)))/alpha1; - if (grad_magn > 1.0f) { - P1[index] /= grad_magn; - P2[index] /= grad_magn; - P3[index] /= grad_magn; - } - }}} + for(k=0; k<dimZ; k++) { + index = (dimX*dimY)*k + j*dimX+i; + grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2)))/alpha1; + if (grad_magn > 1.0f) { + P1[index] /= grad_magn; + P2[index] /= grad_magn; + P3[index] /= grad_magn; + } + }}} return 1; } /*Calculating dual variable Q (using forward differences)*/ @@ -357,33 +356,33 @@ float DualQ_3D(float *V1, float *V2, float *V3, float *Q1, float *Q2, float *Q3, #pragma omp parallel for shared(Q1,Q2,Q3,Q4,Q5,Q6,V1,V2,V3) private(i,j,k,index,q1,q2,q3,q11,q22,q33,q44,q55,q66) for(i=0; i<dimX; i++) { for(j=0; j<dimY; j++) { - for(k=0; k<dimZ; k++) { - index = (dimX*dimY)*k + j*dimX+i; - q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f; - /* symmetric boundary conditions (Neuman) */ - if (i != dimX-1){ - q1 = V1[(dimX*dimY)*k + j*dimX+(i+1)] - V1[index]; - q11 = V2[(dimX*dimY)*k + j*dimX+(i+1)] - V2[index]; - q33 = V3[(dimX*dimY)*k + j*dimX+(i+1)] - V3[index]; - } - if (j != dimY-1) { - q2 = V2[(dimX*dimY)*k + (j+1)*dimX+i] - V2[index]; - q22 = V1[(dimX*dimY)*k + (j+1)*dimX+i] - V1[index]; - q55 = V3[(dimX*dimY)*k + (j+1)*dimX+i] - V3[index]; - } - if (k != dimZ-1) { - q3 = V3[(dimX*dimY)*(k+1) + j*dimX+i] - V3[index]; - q44 = V1[(dimX*dimY)*(k+1) + j*dimX+i] - V1[index]; - q66 = V2[(dimX*dimY)*(k+1) + j*dimX+i] - V2[index]; - } - - Q1[index] += sigma*(q1); /*Q11*/ - Q2[index] += sigma*(q2); /*Q22*/ - Q3[index] += sigma*(q3); /*Q33*/ - Q4[index] += sigma*(0.5f*(q11 + q22)); /* Q21 / Q12 */ - Q5[index] += sigma*(0.5f*(q33 + q44)); /* Q31 / Q13 */ - Q6[index] += sigma*(0.5f*(q55 + q66)); /* Q32 / Q23 */ - }}} + for(k=0; k<dimZ; k++) { + index = (dimX*dimY)*k + j*dimX+i; + q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f; + /* symmetric boundary conditions (Neuman) */ + if (i != dimX-1){ + q1 = V1[(dimX*dimY)*k + j*dimX+(i+1)] - V1[index]; + q11 = V2[(dimX*dimY)*k + j*dimX+(i+1)] - V2[index]; + q33 = V3[(dimX*dimY)*k + j*dimX+(i+1)] - V3[index]; + } + if (j != dimY-1) { + q2 = V2[(dimX*dimY)*k + (j+1)*dimX+i] - V2[index]; + q22 = V1[(dimX*dimY)*k + (j+1)*dimX+i] - V1[index]; + q55 = V3[(dimX*dimY)*k + (j+1)*dimX+i] - V3[index]; + } + if (k != dimZ-1) { + q3 = V3[(dimX*dimY)*(k+1) + j*dimX+i] - V3[index]; + q44 = V1[(dimX*dimY)*(k+1) + j*dimX+i] - V1[index]; + q66 = V2[(dimX*dimY)*(k+1) + j*dimX+i] - V2[index]; + } + + Q1[index] += sigma*(q1); /*Q11*/ + Q2[index] += sigma*(q2); /*Q22*/ + Q3[index] += sigma*(q3); /*Q33*/ + Q4[index] += sigma*(0.5f*(q11 + q22)); /* Q21 / Q12 */ + Q5[index] += sigma*(0.5f*(q33 + q44)); /* Q31 / Q13 */ + Q6[index] += sigma*(0.5f*(q55 + q66)); /* Q32 / Q23 */ + }}} return 1; } float ProjQ_3D(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, long dimX, long dimY, long dimZ, float alpha0) @@ -393,19 +392,19 @@ float ProjQ_3D(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, #pragma omp parallel for shared(Q1,Q2,Q3,Q4,Q5,Q6) private(i,j,k,index,grad_magn) for(i=0; i<dimX; i++) { for(j=0; j<dimY; j++) { - for(k=0; k<dimZ; k++) { - index = (dimX*dimY)*k + j*dimX+i; - grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + pow(Q3[index],2) + 2.0f*pow(Q4[index],2) + 2.0f*pow(Q5[index],2) + 2.0f*pow(Q6[index],2)); - grad_magn = grad_magn/alpha0; - if (grad_magn > 1.0f) { - Q1[index] /= grad_magn; - Q2[index] /= grad_magn; - Q3[index] /= grad_magn; - Q4[index] /= grad_magn; - Q5[index] /= grad_magn; - Q6[index] /= grad_magn; - } - }}} + for(k=0; k<dimZ; k++) { + index = (dimX*dimY)*k + j*dimX+i; + grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + pow(Q3[index],2) + 2.0f*pow(Q4[index],2) + 2.0f*pow(Q5[index],2) + 2.0f*pow(Q6[index],2)); + grad_magn = grad_magn/alpha0; + if (grad_magn > 1.0f) { + Q1[index] /= grad_magn; + Q2[index] /= grad_magn; + Q3[index] /= grad_magn; + Q4[index] /= grad_magn; + Q5[index] /= grad_magn; + Q6[index] /= grad_magn; + } + }}} return 1; } /* Divergence and projection for P*/ @@ -416,22 +415,22 @@ float DivProjP_3D(float *U, float *U0, float *P1, float *P2, float *P3, long dim #pragma omp parallel for shared(U,U0,P1,P2,P3) private(i,j,k,index,P_v1,P_v2,P_v3,div) for(i=0; i<dimX; i++) { for(j=0; j<dimY; j++) { - for(k=0; k<dimZ; k++) { - index = (dimX*dimY)*k + j*dimX+i; - - if (i == 0) P_v1 = P1[index]; - else if (i == dimX) P_v1 = -P1[(dimX*dimY)*k + j*dimX+(i-1)]; - else P_v1 = P1[index] - P1[(dimX*dimY)*k + j*dimX+(i-1)]; - if (j == 0) P_v2 = P2[index]; - else if (j == dimY) P_v2 = -P2[(dimX*dimY)*k + (j-1)*dimX+i]; - else P_v2 = P2[index] - P2[(dimX*dimY)*k + (j-1)*dimX+i]; - if (k == 0) P_v3 = P3[index]; - else if (k == dimZ) P_v3 = -P3[(dimX*dimY)*(k-1) + (j)*dimX+i]; - else P_v3 = P3[index] - P3[(dimX*dimY)*(k-1) + (j)*dimX+i]; - - div = P_v1 + P_v2 + P_v3; - U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau); - }}} + for(k=0; k<dimZ; k++) { + index = (dimX*dimY)*k + j*dimX+i; + + if (i == 0) P_v1 = P1[index]; + else if (i == dimX-1) P_v1 = -P1[(dimX*dimY)*k + j*dimX+(i-1)]; + else P_v1 = P1[index] - P1[(dimX*dimY)*k + j*dimX+(i-1)]; + if (j == 0) P_v2 = P2[index]; + else if (j == dimY-1) P_v2 = -P2[(dimX*dimY)*k + (j-1)*dimX+i]; + else P_v2 = P2[index] - P2[(dimX*dimY)*k + (j-1)*dimX+i]; + if (k == 0) P_v3 = P3[index]; + else if (k == dimZ-1) P_v3 = -P3[(dimX*dimY)*(k-1) + (j)*dimX+i]; + else P_v3 = P3[index] - P3[(dimX*dimY)*(k-1) + (j)*dimX+i]; + + div = P_v1 + P_v2 + P_v3; + U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau); + }}} return *U; } /*get update for V*/ @@ -442,69 +441,70 @@ float UpdV_3D(float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, #pragma omp parallel for shared(V1,V2,V3,P1,P2,P3,Q1,Q2,Q3,Q4,Q5,Q6) private(i,j,k,index,q1,q4x,q5x,q2,q4y,q6y,q6z,q5z,q3,div1,div2,div3) for(i=0; i<dimX; i++) { for(j=0; j<dimY; j++) { - for(k=0; k<dimZ; k++) { - index = (dimX*dimY)*k + j*dimX+i; - q1 = 0.0f; q4x= 0.0f; q5x= 0.0f; q2= 0.0f; q4y= 0.0f; q6y= 0.0f; q6z= 0.0f; q5z= 0.0f; q3= 0.0f; - /* Q1 - Q11, Q2 - Q22, Q3 - Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/ - /* symmetric boundary conditions (Neuman) */ - - if (i == 0) { - q1 = Q1[index]; - q4x = Q4[index]; - q5x = Q5[index]; } - else if (i == dimX) { - q1 = -Q1[(dimX*dimY)*k + j*dimX+(i-1)]; - q4x = -Q4[(dimX*dimY)*k + j*dimX+(i-1)]; - q5x = -Q5[(dimX*dimY)*k + j*dimX+(i-1)]; } - else { - q1 = Q1[index] - Q1[(dimX*dimY)*k + j*dimX+(i-1)]; - q4x = Q4[index] - Q4[(dimX*dimY)*k + j*dimX+(i-1)]; - q5x = Q5[index] - Q5[(dimX*dimY)*k + j*dimX+(i-1)]; } - if (j == 0) { - q2 = Q2[index]; - q4y = Q4[index]; - q6y = Q6[index]; } - else if (j == dimY) { - q2 = -Q2[(dimX*dimY)*k + (j-1)*dimX+i]; - q4y = -Q4[(dimX*dimY)*k + (j-1)*dimX+i]; - q6y = -Q6[(dimX*dimY)*k + (j-1)*dimX+i]; } - else { - q2 = Q2[index] - Q2[(dimX*dimY)*k + (j-1)*dimX+i]; - q4y = Q4[index] - Q4[(dimX*dimY)*k + (j-1)*dimX+i]; - q6y = Q6[index] - Q6[(dimX*dimY)*k + (j-1)*dimX+i]; } - if (k == 0) { - q6z = Q6[index]; - q5z = Q5[index]; - q3 = Q3[index]; } - else if (k == dimZ) { - q6z = -Q6[(dimX*dimY)*(k-1) + (j)*dimX+i]; - q5z = -Q5[(dimX*dimY)*(k-1) + (j)*dimX+i]; - q3 = -Q3[(dimX*dimY)*(k-1) + (j)*dimX+i]; } - else { - q6z = Q6[index] - Q6[(dimX*dimY)*(k-1) + (j)*dimX+i]; - q5z = Q5[index] - Q5[(dimX*dimY)*(k-1) + (j)*dimX+i]; - q3 = Q3[index] - Q3[(dimX*dimY)*(k-1) + (j)*dimX+i]; } - div1 = q1 + q4y + q5z; - div2 = q4x + q2 + q6z; - div3 = q5x + q6y + q3; - - V1[index] += tau*(P1[index] + div1); - V2[index] += tau*(P2[index] + div2); - V3[index] += tau*(P3[index] + div3); - }}} + for(k=0; k<dimZ; k++) { + index = (dimX*dimY)*k + j*dimX+i; + q1 = 0.0f; q4x= 0.0f; q5x= 0.0f; q2= 0.0f; q4y= 0.0f; q6y= 0.0f; q6z= 0.0f; q5z= 0.0f; q3= 0.0f; + /* Q1 - Q11, Q2 - Q22, Q3 - Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/ + /* symmetric boundary conditions (Neuman) */ + + if (i == 0) { + q1 = Q1[index]; + q4x = Q4[index]; + q5x = Q5[index]; } + else if (i == dimX-1) { + q1 = -Q1[(dimX*dimY)*k + j*dimX+(i-1)]; + q4x = -Q4[(dimX*dimY)*k + j*dimX+(i-1)]; + q5x = -Q5[(dimX*dimY)*k + j*dimX+(i-1)]; } + else { + q1 = Q1[index] - Q1[(dimX*dimY)*k + j*dimX+(i-1)]; + q4x = Q4[index] - Q4[(dimX*dimY)*k + j*dimX+(i-1)]; + q5x = Q5[index] - Q5[(dimX*dimY)*k + j*dimX+(i-1)]; } + if (j == 0) { + q2 = Q2[index]; + q4y = Q4[index]; + q6y = Q6[index]; } + else if (j == dimY-1) { + q2 = -Q2[(dimX*dimY)*k + (j-1)*dimX+i]; + q4y = -Q4[(dimX*dimY)*k + (j-1)*dimX+i]; + q6y = -Q6[(dimX*dimY)*k + (j-1)*dimX+i]; } + else { + q2 = Q2[index] - Q2[(dimX*dimY)*k + (j-1)*dimX+i]; + q4y = Q4[index] - Q4[(dimX*dimY)*k + (j-1)*dimX+i]; + q6y = Q6[index] - Q6[(dimX*dimY)*k + (j-1)*dimX+i]; } + if (k == 0) { + q6z = Q6[index]; + q5z = Q5[index]; + q3 = Q3[index]; } + else if (k == dimZ-1) { + q6z = -Q6[(dimX*dimY)*(k-1) + (j)*dimX+i]; + q5z = -Q5[(dimX*dimY)*(k-1) + (j)*dimX+i]; + q3 = -Q3[(dimX*dimY)*(k-1) + (j)*dimX+i]; } + else { + q6z = Q6[index] - Q6[(dimX*dimY)*(k-1) + (j)*dimX+i]; + q5z = Q5[index] - Q5[(dimX*dimY)*(k-1) + (j)*dimX+i]; + q3 = Q3[index] - Q3[(dimX*dimY)*(k-1) + (j)*dimX+i]; } + + div1 = q1 + q4y + q5z; + div2 = q4x + q2 + q6z; + div3 = q5x + q6y + q3; + + V1[index] += tau*(P1[index] + div1); + V2[index] += tau*(P2[index] + div2); + V3[index] += tau*(P3[index] + div3); + }}} return 1; } float copyIm_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, float *V3_old, long dimX, long dimY, long dimZ) { - long j; + long j; #pragma omp parallel for shared(V1, V2, V3, V1_old, V2_old, V3_old) private(j) - for (j = 0; j<dimX*dimY*dimZ; j++) { - V1_old[j] = V1[j]; - V2_old[j] = V2[j]; - V3_old[j] = V3[j]; - } - return 1; + for (j = 0; j<dimX*dimY*dimZ; j++) { + V1_old[j] = V1[j]; + V2_old[j] = V2[j]; + V3_old[j] = V3[j]; + } + return 1; } /*get updated solution U*/ @@ -523,9 +523,9 @@ float newU3D_3Ar(float *V1, float *V2, float *V3, float *V1_old, float *V2_old, long i; #pragma omp parallel for shared(V1, V2, V3, V1_old, V2_old, V3_old) private(i) for(i=0; i<dimX*dimY*dimZ; i++) { - V1[i] = 2.0f*V1[i] - V1_old[i]; - V2[i] = 2.0f*V2[i] - V2_old[i]; - V3[i] = 2.0f*V3[i] - V3_old[i]; + V1[i] = 2.0f*V1[i] - V1_old[i]; + V2[i] = 2.0f*V2[i] - V2_old[i]; + V3[i] = 2.0f*V3[i] - V3_old[i]; } return 1; } diff --git a/Core/regularisers_GPU/TGV_GPU_core.cu b/Core/regularisers_GPU/TGV_GPU_core.cu index 58b2c41..e4abf72 100644 --- a/Core/regularisers_GPU/TGV_GPU_core.cu +++ b/Core/regularisers_GPU/TGV_GPU_core.cu @@ -3,8 +3,8 @@ This work is part of the Core Imaging Library developed by Visual Analytics and Imaging System Group of the Science Technology Facilities Council, STFC -Copyright 2017 Daniil Kazantsev -Copyright 2017 Srikanth Nagella, Edoardo Pasca +Copyright 2019 Daniil Kazantsev +Copyright 2019 Srikanth Nagella, Edoardo Pasca Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -32,7 +32,7 @@ limitations under the License. * 6. Lipshitz constant (default is 12) * * Output: - * Filtered/regulariaed image + * Filtered/regularised image * * References: * [1] K. Bredies "Total Generalized Variation" @@ -42,8 +42,8 @@ limitations under the License. #define BLKYSIZE 8 #define BLKZSIZE 8 -#define BLKXSIZE2D 16 -#define BLKYSIZE2D 16 +#define BLKXSIZE2D 8 +#define BLKYSIZE2D 8 #define EPS 1.0e-7 #define idivup(a, b) ( ((a)%(b) != 0) ? (a)/(b)+1 : (a)/(b) ) @@ -53,80 +53,84 @@ limitations under the License. /********************************************************************/ __global__ void DualP_2D_kernel(float *U, float *V1, float *V2, float *P1, float *P2, int dimX, int dimY, float sigma) { - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; + int num_total = dimX*dimY; + const int i = blockDim.x * blockIdx.x + threadIdx.x; + const int j = blockDim.y * blockIdx.y + threadIdx.y; - int index = i + dimX*j; + int index = i + dimX*j; - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) { - /* symmetric boundary conditions (Neuman) */ - if (i == dimX-1) P1[index] += sigma*((U[j*dimX+(i-1)] - U[index]) - V1[index]); - else P1[index] += sigma*((U[j*dimX+(i+1)] - U[index]) - V1[index]); - if (j == dimY-1) P2[index] += sigma*((U[(j-1)*dimX+i] - U[index]) - V2[index]); - else P2[index] += sigma*((U[(j+1)*dimX+i] - U[index]) - V2[index]); - } + if (index < num_total) { + /* symmetric boundary conditions (Neuman) */ + if ((i >= 0) && (i < dimX-1)) P1[index] += sigma*((U[(i+1) + dimX*j] - U[index]) - V1[index]); + else P1[index] += sigma*(-V1[index]); + if ((j >= 0) && (j < dimY-1)) P2[index] += sigma*((U[i + dimX*(j+1)] - U[index]) - V2[index]); + else P2[index] += sigma*(-V2[index]); + } return; } __global__ void ProjP_2D_kernel(float *P1, float *P2, int dimX, int dimY, float alpha1) { float grad_magn; - - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; + int num_total = dimX*dimY; + + const int i = blockDim.x * blockIdx.x + threadIdx.x; + const int j = blockDim.y * blockIdx.y + threadIdx.y; int index = i + dimX*j; - - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) { - - grad_magn = sqrt(pow(P1[index],2) + pow(P2[index],2)); + + if (index < num_total) { + grad_magn = sqrtf(pow(P1[index],2) + pow(P2[index],2)); grad_magn = grad_magn/alpha1; if (grad_magn > 1.0f) { P1[index] /= grad_magn; P2[index] /= grad_magn; } - } + } return; } __global__ void DualQ_2D_kernel(float *V1, float *V2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float sigma) { float q1, q2, q11, q22; - - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; + int num_total = dimX*dimY; + + const int i = blockDim.x * blockIdx.x + threadIdx.x; + const int j = blockDim.y * blockIdx.y + threadIdx.y; - int index = i + dimX*j; + int index = i + dimX*j; - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) { - /* symmetric boundary conditions (Neuman) */ - q1 = 0.0f; q11 = 0.0f; q2 = 0.0f; q22 = 0.0f; - /* boundary conditions (Neuman) */ - if (i != dimX-1){ - q1 = V1[j*dimX+(i+1)] - V1[index]; - q11 = V2[j*dimX+(i+1)] - V2[index]; - } - if (j != dimY-1) { - q2 = V2[(j+1)*dimX+i] - V2[index]; - q22 = V1[(j+1)*dimX+i] - V1[index]; - } + if (index < num_total) { + q1 = 0.0f; q2 = 0.0f; q11 = 0.0f; q22 = 0.0f; + + if ((i >= 0) && (i < dimX-1)) { + /* boundary conditions (Neuman) */ + q1 = V1[(i+1) + dimX*j] - V1[index]; + q11 = V2[(i+1) + dimX*j] - V2[index]; + } + if ((j >= 0) && (j < dimY-1)) { + q2 = V2[i + dimX*(j+1)] - V2[index]; + q22 = V1[i + dimX*(j+1)] - V1[index]; + } + Q1[index] += sigma*(q1); Q2[index] += sigma*(q2); Q3[index] += sigma*(0.5f*(q11 + q22)); - } + } return; } __global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, int dimX, int dimY, float alpha0) { float grad_magn; + int num_total = dimX*dimY; - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - - int index = i + dimX*j; + const int i = blockDim.x * blockIdx.x + threadIdx.x; + const int j = blockDim.y * blockIdx.y + threadIdx.y; - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) { + int index = i + dimX*j; + + if (index < num_total) { grad_magn = sqrt(pow(Q1[index],2) + pow(Q2[index],2) + 2*pow(Q3[index],2)); grad_magn = grad_magn/alpha0; if (grad_magn > 1.0f) { @@ -141,44 +145,75 @@ __global__ void ProjQ_2D_kernel(float *Q1, float *Q2, float *Q3, int dimX, int d __global__ void DivProjP_2D_kernel(float *U, float *U0, float *P1, float *P2, int dimX, int dimY, float lambda, float tau) { float P_v1, P_v2, div; - - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; + int num_total = dimX*dimY; + + const int i = blockDim.x * blockIdx.x + threadIdx.x; + const int j = blockDim.y * blockIdx.y + threadIdx.y; int index = i + dimX*j; + + if (index < num_total) { + P_v1 = 0.0f; P_v2 = 0.0f; - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) { - - if (i == 0) P_v1 = P1[index]; - else P_v1 = P1[index] - P1[j*dimX+(i-1)]; - if (j == 0) P_v2 = P2[index]; - else P_v2 = P2[index] - P2[(j-1)*dimX+i]; - div = P_v1 + P_v2; - U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau); - } + if (i == 0) P_v1 = P1[index]; + if (i == dimX-1) P_v1 = -P1[(i-1) + dimX*j]; + if ((i > 0) && (i < dimX-1)) P_v1 = P1[index] - P1[(i-1) + dimX*j]; + + if (j == 0) P_v2 = P2[index]; + if (j == dimY-1) P_v2 = -P2[i + dimX*(j-1)]; + if ((j > 0) && (j < dimY-1)) P_v2 = P2[index] - P2[i + dimX*(j-1)]; + + div = P_v1 + P_v2; + U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau); + } return; } __global__ void UpdV_2D_kernel(float *V1, float *V2, float *P1, float *P2, float *Q1, float *Q2, float *Q3, int dimX, int dimY, float tau) { float q1, q3_x, q2, q3_y, div1, div2; - - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - - int index = i + dimX*j; + int num_total = dimX*dimY; + int i1, j1; + + const int i = blockDim.x * blockIdx.x + threadIdx.x; + const int j = blockDim.y * blockIdx.y + threadIdx.y; - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY)) { - q2 = 0.0f; q3_y = 0.0f; q1 = 0.0f; q3_x = 0.0; - /* boundary conditions (Neuman) */ - if (i != 0) { - q1 = Q1[index] - Q1[j*dimX+(i-1)]; - q3_x = Q3[index] - Q3[j*dimX+(i-1)]; - } - if (j != 0) { - q2 = Q2[index] - Q2[(j-1)*dimX+i]; - q3_y = Q3[index] - Q3[(j-1)*dimX+i]; - } + int index = i + dimX*j; + + if (index < num_total) { + + i1 = (i-1) + dimX*j; + j1 = (i) + dimX*(j-1); + + /* boundary conditions (Neuman) */ + if ((i > 0) && (i < dimX-1)) { + q1 = Q1[index] - Q1[i1]; + q3_x = Q3[index] - Q3[i1]; } + else if (i == 0) { + q1 = Q1[index]; + q3_x = Q3[index]; } + else if (i == dimX-1) { + q1 = -Q1[i1]; + q3_x = -Q3[i1]; } + else { + q1 = 0.0f; + q3_x = 0.0f; + } + + if ((j > 0) && (j < dimY-1)) { + q2 = Q2[index] - Q2[j1]; + q3_y = Q3[index] - Q3[j1]; } + else if (j == dimY-1) { + q2 = -Q2[j1]; + q3_y = -Q3[j1]; } + else if (j == 0) { + q2 = Q2[index]; + q3_y = Q3[index]; } + else { + q2 = 0.0f; + q3_y = 0.0f; + } + div1 = q1 + q3_y; div2 = q3_x + q2; V1[index] += tau*(P1[index] + div1); @@ -243,21 +278,22 @@ __global__ void newU_kernel_ar2(float *V1, float *V2, float *V1_old, float *V2_o __global__ void DualP_3D_kernel(float *U, float *V1, float *V2, float *V3, float *P1, float *P2, float *P3, int dimX, int dimY, int dimZ, float sigma) { int index; - int i = blockDim.x * blockIdx.x + threadIdx.x; - int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; + const int i = blockDim.x * blockIdx.x + threadIdx.x; + const int j = blockDim.y * blockIdx.y + threadIdx.y; + const int k = blockDim.z * blockIdx.z + threadIdx.z; + + int num_total = dimX*dimY*dimZ; - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { - - index = (dimX*dimY)*k + j*dimX+i; - /* symmetric boundary conditions (Neuman) */ - if (i == dimX-1) P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i-1)] - U[index]) - V1[index]); - else P1[index] += sigma*((U[(dimX*dimY)*k + j*dimX+(i+1)] - U[index]) - V1[index]); - if (j == dimY-1) P2[index] += sigma*((U[(dimX*dimY)*k + (j-1)*dimX+i] - U[index]) - V2[index]); - else P2[index] += sigma*((U[(dimX*dimY)*k + (j+1)*dimX+i] - U[index]) - V2[index]); - if (k == dimZ-1) P3[index] += sigma*((U[(dimX*dimY)*(k-1) + j*dimX+i] - U[index]) - V3[index]); - else P3[index] += sigma*((U[(dimX*dimY)*(k+1) + j*dimX+i] - U[index]) - V3[index]); - } + index = (dimX*dimY)*k + i*dimX+j; + if (index < num_total) { + /* symmetric boundary conditions (Neuman) */ + if ((i >= 0) && (i < dimX-1)) P1[index] += sigma*((U[(dimX*dimY)*k + (i+1)*dimX+j] - U[index]) - V1[index]); + else P1[index] += sigma*(-V1[index]); + if ((j >= 0) && (j < dimY-1)) P2[index] += sigma*((U[(dimX*dimY)*k + i*dimX+(j+1)] - U[index]) - V2[index]); + else P2[index] += sigma*(-V2[index]); + if ((k >= 0) && (k < dimZ-1)) P3[index] += sigma*((U[(dimX*dimY)*(k+1) + i*dimX+(j)] - U[index]) - V3[index]); + else P3[index] += sigma*(-V3[index]); + } return; } @@ -265,14 +301,14 @@ __global__ void ProjP_3D_kernel(float *P1, float *P2, float *P3, int dimX, int d { float grad_magn; int index; + int num_total = dimX*dimY*dimZ; int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; int k = blockDim.z * blockIdx.z + threadIdx.z; - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { - index = (dimX*dimY)*k + j*dimX+i; - + index = (dimX*dimY)*k + i*dimX+j; + if (index < num_total) { grad_magn = (sqrtf(pow(P1[index],2) + pow(P2[index],2) + pow(P3[index],2)))/alpha1; if (grad_magn > 1.0f) { P1[index] /= grad_magn; @@ -288,55 +324,57 @@ __global__ void DualQ_3D_kernel(float *V1, float *V2, float *V3, float *Q1, floa int index; float q1, q2, q3, q11, q22, q33, q44, q55, q66; + int num_total = dimX*dimY*dimZ; + int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; int k = blockDim.z * blockIdx.z + threadIdx.z; - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { - - index = (dimX*dimY)*k + j*dimX+i; - q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f; - /* symmetric boundary conditions (Neuman) */ - if (i != dimX-1){ - q1 = V1[(dimX*dimY)*k + j*dimX+(i+1)] - V1[index]; - q11 = V2[(dimX*dimY)*k + j*dimX+(i+1)] - V2[index]; - q33 = V3[(dimX*dimY)*k + j*dimX+(i+1)] - V3[index]; - } - if (j != dimY-1) { - q2 = V2[(dimX*dimY)*k + (j+1)*dimX+i] - V2[index]; - q22 = V1[(dimX*dimY)*k + (j+1)*dimX+i] - V1[index]; - q55 = V3[(dimX*dimY)*k + (j+1)*dimX+i] - V3[index]; - } - if (k != dimZ-1) { - q3 = V3[(dimX*dimY)*(k+1) + j*dimX+i] - V3[index]; - q44 = V1[(dimX*dimY)*(k+1) + j*dimX+i] - V1[index]; - q66 = V2[(dimX*dimY)*(k+1) + j*dimX+i] - V2[index]; - } - + index = (dimX*dimY)*k + i*dimX+j; + int i1 = (dimX*dimY)*k + (i+1)*dimX+j; + int j1 = (dimX*dimY)*k + (i)*dimX+(j+1); + int k1 = (dimX*dimY)*(k+1) + (i)*dimX+(j); + + if (index < num_total) { + q1 = 0.0f; q11 = 0.0f; q33 = 0.0f; q2 = 0.0f; q22 = 0.0f; q55 = 0.0f; q3 = 0.0f; q44 = 0.0f; q66 = 0.0f; + + /* boundary conditions (Neuman) */ + if ((i >= 0) && (i < dimX-1)) { + q1 = V1[i1] - V1[index]; + q11 = V2[i1] - V2[index]; + q33 = V3[i1] - V3[index]; } + if ((j >= 0) && (j < dimY-1)) { + q2 = V2[j1] - V2[index]; + q22 = V1[j1] - V1[index]; + q55 = V3[j1] - V3[index]; } + if ((k >= 0) && (k < dimZ-1)) { + q3 = V3[k1] - V3[index]; + q44 = V1[k1] - V1[index]; + q66 = V2[k1] - V2[index]; } + Q1[index] += sigma*(q1); /*Q11*/ Q2[index] += sigma*(q2); /*Q22*/ Q3[index] += sigma*(q3); /*Q33*/ Q4[index] += sigma*(0.5f*(q11 + q22)); /* Q21 / Q12 */ Q5[index] += sigma*(0.5f*(q33 + q44)); /* Q31 / Q13 */ Q6[index] += sigma*(0.5f*(q55 + q66)); /* Q32 / Q23 */ - } + } return; } - __global__ void ProjQ_3D_kernel(float *Q1, float *Q2, float *Q3, float *Q4, float *Q5, float *Q6, int dimX, int dimY, int dimZ, float alpha0) { float grad_magn; int index; - + int num_total = dimX*dimY*dimZ; + int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + index = (dimX*dimY)*k + i*dimX+j; - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { - - index = (dimX*dimY)*k + j*dimX+i; - + if (index < num_total) { grad_magn = sqrtf(pow(Q1[index],2) + pow(Q2[index],2) + pow(Q3[index],2) + 2.0f*pow(Q4[index],2) + 2.0f*pow(Q5[index],2) + 2.0f*pow(Q6[index],2)); grad_magn = grad_magn/alpha0; if (grad_magn > 1.0f) { @@ -354,21 +392,33 @@ __global__ void DivProjP_3D_kernel(float *U, float *U0, float *P1, float *P2, fl { float P_v1, P_v2, P_v3, div; int index; - + int num_total = dimX*dimY*dimZ; + + int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; - int k = blockDim.z * blockIdx.z + threadIdx.z; + int k = blockDim.z * blockIdx.z + threadIdx.z; + + index = (dimX*dimY)*k + i*dimX+j; + int i1 = (dimX*dimY)*k + (i-1)*dimX+j; + int j1 = (dimX*dimY)*k + (i)*dimX+(j-1); + int k1 = (dimX*dimY)*(k-1) + (i)*dimX+(j); + + if (index < num_total) { + P_v1 = 0.0f; P_v2 = 0.0f; P_v3 = 0.0f; - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { - - index = (dimX*dimY)*k + j*dimX+i; - if (i == 0) P_v1 = P1[index]; - else P_v1 = P1[index] - P1[(dimX*dimY)*k + j*dimX+(i-1)]; + if (i == dimX-1) P_v1 = -P1[i1]; + if ((i > 0) && (i < dimX-1)) P_v1 = P1[index] - P1[i1]; + if (j == 0) P_v2 = P2[index]; - else P_v2 = P2[index] - P2[(dimX*dimY)*k + (j-1)*dimX+i]; + if (j == dimY-1) P_v2 = -P2[j1]; + if ((j > 0) && (j < dimY-1)) P_v2 = P2[index] - P2[j1]; + if (k == 0) P_v3 = P3[index]; - else P_v3 = P3[index] - P3[(dimX*dimY)*(k-1) + (j)*dimX+i]; + if (k == dimZ-1) P_v3 = -P3[k1]; + if ((k > 0) && (k < dimZ-1)) P_v3 = P3[index] - P3[k1]; + div = P_v1 + P_v2 + P_v3; U[index] = (lambda*(U[index] + tau*div) + tau*U0[index])/(lambda + tau); @@ -379,33 +429,73 @@ __global__ void UpdV_3D_kernel(float *V1, float *V2, float *V3, float *P1, float { float q1, q4x, q5x, q2, q4y, q6y, q6z, q5z, q3, div1, div2, div3; int index; + int num_total = dimX*dimY*dimZ; int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.y * blockIdx.y + threadIdx.y; int k = blockDim.z * blockIdx.z + threadIdx.z; - if ((i >= 0) && (i < dimX) && (j >= 0) && (j < dimY) && (k >= 0) && (k < dimZ)) { - - index = (dimX*dimY)*k + j*dimX+i; + index = (dimX*dimY)*k + i*dimX+j; + int i1 = (dimX*dimY)*k + (i-1)*dimX+j; + int j1 = (dimX*dimY)*k + (i)*dimX+(j-1); + int k1 = (dimX*dimY)*(k-1) + (i)*dimX+(j); - q1 = 0.0f; q4x= 0.0f; q5x= 0.0f; q2= 0.0f; q4y= 0.0f; q6y= 0.0f; q6z= 0.0f; q5z= 0.0f; q3= 0.0f; - /* Q1 - Q11, Q2 - Q22, Q3 - Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/ - /* symmetric boundary conditions (Neuman) */ - if (i != 0) { - q1 = Q1[index] - Q1[(dimX*dimY)*k + j*dimX+(i-1)]; - q4x = Q4[index] - Q4[(dimX*dimY)*k + j*dimX+(i-1)]; - q5x = Q5[index] - Q5[(dimX*dimY)*k + j*dimX+(i-1)]; - } - if (j != 0) { - q2 = Q2[index] - Q2[(dimX*dimY)*k + (j-1)*dimX+i]; - q4y = Q4[index] - Q4[(dimX*dimY)*k + (j-1)*dimX+i]; - q6y = Q6[index] - Q6[(dimX*dimY)*k + (j-1)*dimX+i]; - } - if (k != 0) { - q6z = Q6[index] - Q6[(dimX*dimY)*(k-1) + (j)*dimX+i]; - q5z = Q5[index] - Q5[(dimX*dimY)*(k-1) + (j)*dimX+i]; - q3 = Q3[index] - Q3[(dimX*dimY)*(k-1) + (j)*dimX+i]; - } + /* Q1 - Q11, Q2 - Q22, Q3 - Q33, Q4 - Q21/Q12, Q5 - Q31/Q13, Q6 - Q32/Q23*/ + if (index < num_total) { + + /* boundary conditions (Neuman) */ + if ((i > 0) && (i < dimX-1)) { + q1 = Q1[index] - Q1[i1]; + q4x = Q4[index] - Q4[i1]; + q5x = Q5[index] - Q5[i1]; } + else if (i == 0) { + q1 = Q1[index]; + q4x = Q4[index]; + q5x = Q5[index]; } + else if (i == dimX-1) { + q1 = -Q1[i1]; + q4x = -Q4[i1]; + q5x = -Q5[i1]; } + else { + q1 = 0.0f; + q4x = 0.0f; + q5x = 0.0f; } + + if ((j > 0) && (j < dimY-1)) { + q2 = Q2[index] - Q2[j1]; + q4y = Q4[index] - Q4[j1]; + q6y = Q6[index] - Q6[j1]; } + else if (j == dimY-1) { + q2 = -Q2[j1]; + q4y = -Q4[j1]; + q6y = -Q6[j1]; } + else if (j == 0) { + q2 = Q2[index]; + q4y = Q4[index]; + q6y = Q6[index]; } + else { + q2 = 0.0f; + q4y = 0.0f; + q6y = 0.0f; + } + + if ((k > 0) && (k < dimZ-1)) { + q6z = Q6[index] - Q6[k1]; + q5z = Q5[index] - Q5[k1]; + q3 = Q3[index] - Q3[k1]; } + else if (k == dimZ-1) { + q6z = -Q6[k1]; + q5z = -Q5[k1]; + q3 = -Q3[k1]; } + else if (k == 0) { + q6z = Q6[index]; + q5z = Q5[index]; + q3 = Q3[index]; } + else { + q6z = 0.0f; + q5z = 0.0f; + q3 = 0.0f; } + div1 = q1 + q4y + q5z; div2 = q4x + q2 + q6z; div3 = q5x + q6y + q3; @@ -510,7 +600,14 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo CHECK(cudaMalloc((void**)&V2_old,dimTotal*sizeof(float))); CHECK(cudaMemcpy(d_U0,U0,dimTotal*sizeof(float),cudaMemcpyHostToDevice)); - CHECK(cudaMemcpy(d_U,U0,dimTotal*sizeof(float),cudaMemcpyHostToDevice)); + CHECK(cudaMemcpy(d_U,U0,dimTotal*sizeof(float),cudaMemcpyHostToDevice)); + cudaMemset(P1, 0, dimTotal*sizeof(float)); + cudaMemset(P2, 0, dimTotal*sizeof(float)); + cudaMemset(Q1, 0, dimTotal*sizeof(float)); + cudaMemset(Q2, 0, dimTotal*sizeof(float)); + cudaMemset(Q3, 0, dimTotal*sizeof(float)); + cudaMemset(V1, 0, dimTotal*sizeof(float)); + cudaMemset(V2, 0, dimTotal*sizeof(float)); if (dimZ == 1) { /*2D case */ @@ -521,14 +618,14 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo /* Calculate Dual Variable P */ DualP_2D_kernel<<<dimGrid,dimBlock>>>(d_U, V1, V2, P1, P2, dimX, dimY, sigma); - CHECK(cudaDeviceSynchronize()); + CHECK(cudaDeviceSynchronize()); /*Projection onto convex set for P*/ ProjP_2D_kernel<<<dimGrid,dimBlock>>>(P1, P2, dimX, dimY, alpha1); CHECK(cudaDeviceSynchronize()); /* Calculate Dual Variable Q */ DualQ_2D_kernel<<<dimGrid,dimBlock>>>(V1, V2, Q1, Q2, Q3, dimX, dimY, sigma); CHECK(cudaDeviceSynchronize()); - /*Projection onto convex set for Q*/ + /*Projection onto convex set for Q*/ ProjQ_2D_kernel<<<dimGrid,dimBlock>>>(Q1, Q2, Q3, dimX, dimY, alpha0); CHECK(cudaDeviceSynchronize()); /*saving U into U_old*/ @@ -549,7 +646,7 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo /*get new V*/ newU_kernel_ar2<<<dimGrid,dimBlock>>>(V1, V2, V1_old, V2_old, dimX, dimY, dimTotal); CHECK(cudaDeviceSynchronize()); - } + } } else { /*3D case */ @@ -565,6 +662,12 @@ extern "C" int TGV_GPU_main(float *U0, float *U, float lambda, float alpha1, flo CHECK(cudaMalloc((void**)&V3,dimTotal*sizeof(float))); CHECK(cudaMalloc((void**)&V3_old,dimTotal*sizeof(float))); + cudaMemset(Q4, 0.0f, dimTotal*sizeof(float)); + cudaMemset(Q5, 0.0f, dimTotal*sizeof(float)); + cudaMemset(Q6, 0.0f, dimTotal*sizeof(float)); + cudaMemset(P3, 0.0f, dimTotal*sizeof(float)); + cudaMemset(V3, 0.0f, dimTotal*sizeof(float)); + for(int n=0; n < iterationsNumb; n++) { /* Calculate Dual Variable P */ @@ -93,7 +93,7 @@ conda install ccpi-regulariser -c ccpi -c conda-forge #### Python (conda-build) ``` - export CIL_VERSION=0.10.4 + export CIL_VERSION=19.02 conda build Wrappers/Python/conda-recipe --numpy 1.12 --python 3.5 conda install ccpi-regulariser=${CIL_VERSION} --use-local --force cd demos/ diff --git a/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m b/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m index 0c331a4..ac8e1ba 100644 --- a/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m +++ b/Wrappers/Matlab/demos/demoMatlab_3Ddenoise.m @@ -8,7 +8,7 @@ addpath(Path2); addpath(Path3); N = 512; -slices = 7; +slices = 15; vol3D = zeros(N,N,slices, 'single'); Ideal3D = zeros(N,N,slices, 'single'); Im = double(imread('lena_gray_512.tif'))/255; % loading image @@ -17,9 +17,7 @@ vol3D(:,:,i) = Im + .05*randn(size(Im)); Ideal3D(:,:,i) = Im; end vol3D(vol3D < 0) = 0; -figure; imshow(vol3D(:,:,15), [0 1]); title('Noisy image'); - - +figure; imshow(vol3D(:,:,7), [0 1]); title('Noisy image'); lambda_reg = 0.03; % regularsation parameter for all methods %% fprintf('Denoise a volume using the ROF-TV model (CPU) \n'); @@ -143,6 +141,16 @@ rmseTGV = RMSE(Ideal3D(:),u_tgv(:)); fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV); figure; imshow(u_tgv(:,:,3), [0 1]); title('TGV denoised volume (CPU)'); %% +% fprintf('Denoise using the TGV model (GPU) \n'); +% lambda_TGV = 0.03; % regularisation parameter +% alpha1 = 1.0; % parameter to control the first-order term +% alpha0 = 2.0; % parameter to control the second-order term +% iter_TGV = 500; % number of Primal-Dual iterations for TGV +% tic; u_tgv_gpu = TGV_GPU(single(vol3D), lambda_TGV, alpha1, alpha0, iter_TGV); toc; +% rmseTGV = RMSE(Ideal3D(:),u_tgv_gpu(:)); +% fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV); +% figure; imshow(u_tgv_gpu(:,:,3), [0 1]); title('TGV denoised volume (GPU)'); +%% %>>>>>>>>>>>>>> MULTI-CHANNEL priors <<<<<<<<<<<<<<< % fprintf('Denoise a volume using the FGP-dTV model (CPU) \n'); diff --git a/Wrappers/Matlab/demos/demoMatlab_denoise.m b/Wrappers/Matlab/demos/demoMatlab_denoise.m index 14d3096..62e5834 100644 --- a/Wrappers/Matlab/demos/demoMatlab_denoise.m +++ b/Wrappers/Matlab/demos/demoMatlab_denoise.m @@ -5,7 +5,9 @@ fsep = '/'; Path1 = sprintf(['..' fsep 'mex_compile' fsep 'installed'], 1i); Path2 = sprintf(['..' fsep '..' fsep '..' fsep 'data' fsep], 1i); Path3 = sprintf(['..' fsep 'supp'], 1i); -addpath(Path1); addpath(Path2); addpath(Path3); +addpath(Path1); +addpath(Path2); +addpath(Path3); Im = double(imread('lena_gray_512.tif'))/255; % loading image u0 = Im + .05*randn(size(Im)); u0(u0 < 0) = 0; @@ -29,7 +31,7 @@ figure; imshow(u_rof, [0 1]); title('ROF-TV denoised image (CPU)'); % figure; imshow(u_rofG, [0 1]); title('ROF-TV denoised image (GPU)'); %% fprintf('Denoise using the FGP-TV model (CPU) \n'); -iter_fgp = 1000; % number of FGP iterations +iter_fgp = 1300; % number of FGP iterations epsil_tol = 1.0e-06; % tolerance tic; u_fgp = FGP_TV(single(u0), lambda_reg, iter_fgp, epsil_tol); toc; energyfunc_val_fgp = TV_energy(single(u_fgp),single(u0),lambda_reg, 1); % get energy function value @@ -39,8 +41,8 @@ figure; imshow(u_fgp, [0 1]); title('FGP-TV denoised image (CPU)'); %% % fprintf('Denoise using the FGP-TV model (GPU) \n'); -% iter_fgp = 1000; % number of FGP iterations -% epsil_tol = 1.0e-05; % tolerance +% iter_fgp = 1300; % number of FGP iterations +% epsil_tol = 1.0e-06; % tolerance % tic; u_fgpG = FGP_TV_GPU(single(u0), lambda_reg, iter_fgp, epsil_tol); toc; % figure; imshow(u_fgpG, [0 1]); title('FGP-TV denoised image (GPU)'); %% @@ -63,17 +65,17 @@ fprintf('Denoise using the TGV model (CPU) \n'); lambda_TGV = 0.045; % regularisation parameter alpha1 = 1.0; % parameter to control the first-order term alpha0 = 2.0; % parameter to control the second-order term -iter_TGV = 2000; % number of Primal-Dual iterations for TGV +iter_TGV = 1500; % number of Primal-Dual iterations for TGV tic; u_tgv = TGV(single(u0), lambda_TGV, alpha1, alpha0, iter_TGV); toc; rmseTGV = (RMSE(u_tgv(:),Im(:))); fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV); figure; imshow(u_tgv, [0 1]); title('TGV denoised image (CPU)'); -%% + % fprintf('Denoise using the TGV model (GPU) \n'); % lambda_TGV = 0.045; % regularisation parameter % alpha1 = 1.0; % parameter to control the first-order term % alpha0 = 2.0; % parameter to control the second-order term -% iter_TGV = 2000; % number of Primal-Dual iterations for TGV +% iter_TGV = 1500; % number of Primal-Dual iterations for TGV % tic; u_tgv_gpu = TGV_GPU(single(u0), lambda_TGV, alpha1, alpha0, iter_TGV); toc; % rmseTGV_gpu = (RMSE(u_tgv_gpu(:),Im(:))); % fprintf('%s %f \n', 'RMSE error for TGV is:', rmseTGV_gpu); diff --git a/Wrappers/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp b/Wrappers/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp index edb551d..1173282 100644 --- a/Wrappers/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp +++ b/Wrappers/Matlab/mex_compile/regularisers_GPU/TGV_GPU.cpp @@ -21,18 +21,18 @@ limitations under the License. #include "TGV_GPU_core.h" /* CUDA implementation of Primal-Dual denoising method for - * Total Generilized Variation (TGV)-L2 model [1] (2D case only) + * Total Generilized Variation (TGV)-L2 model [1] (2D/3D) * * Input Parameters: - * 1. Noisy image (2D) (required) - * 2. lambda - regularisation parameter (required) - * 3. parameter to control the first-order term (alpha1) (default - 1) - * 4. parameter to control the second-order term (alpha0) (default - 0.5) - * 5. Number of Chambolle-Pock (Primal-Dual) iterations (default is 300) + * 1. Noisy image/volume (2D/3D) + * 2. lambda - regularisation parameter + * 3. parameter to control the first-order term (alpha1) + * 4. parameter to control the second-order term (alpha0) + * 5. Number of Chambolle-Pock (Primal-Dual) iterations * 6. Lipshitz constant (default is 12) * * Output: - * Filtered/regulariaed image + * Filtered/regularised image * * References: * [1] K. Bredies "Total Generalized Variation" @@ -44,7 +44,7 @@ void mexFunction( { int number_of_dims, iter; - mwSize dimX, dimY; + mwSize dimX, dimY, dimZ; const mwSize *dim_array; float *Input, *Output=NULL, lambda, alpha0, alpha1, L2; @@ -57,8 +57,8 @@ void mexFunction( Input = (float *) mxGetData(prhs[0]); /*noisy image (2D) */ lambda = (float) mxGetScalar(prhs[1]); /* regularisation parameter */ alpha1 = 1.0f; /* parameter to control the first-order term */ - alpha0 = 0.5f; /* parameter to control the second-order term */ - iter = 300; /* Iterations number */ + alpha0 = 2.0f; /* parameter to control the second-order term */ + iter = 500; /* Iterations number */ L2 = 12.0f; /* Lipshitz constant */ if (mxGetClassID(prhs[0]) != mxSINGLE_CLASS) {mexErrMsgTxt("The input image must be in a single precision"); } @@ -68,12 +68,14 @@ void mexFunction( if (nrhs == 6) L2 = (float) mxGetScalar(prhs[5]); /* Lipshitz constant */ /*Handling Matlab output data*/ - dimX = dim_array[0]; dimY = dim_array[1]; + dimX = dim_array[0]; dimY = dim_array[1]; dimZ = dim_array[2]; if (number_of_dims == 2) { - Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); - /* running the function */ - TGV_GPU_main(Input, Output, lambda, alpha1, alpha0, iter, L2, dimX, dimY); + dimZ = 1; /*2D case*/ + Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(2, dim_array, mxSINGLE_CLASS, mxREAL)); } - if (number_of_dims == 3) {mexErrMsgTxt("Only 2D images accepted");} + if (number_of_dims == 3) Output = (float*)mxGetPr(plhs[0] = mxCreateNumericArray(3, dim_array, mxSINGLE_CLASS, mxREAL)); + + /* running the function */ + TGV_GPU_main(Input, Output, lambda, alpha1, alpha0, iter, L2, dimX, dimY, dimZ); } @@ -1,19 +1,22 @@ #!/bin/bash echo "Building CCPi-regularisation Toolkit using CMake" -# rm -r build +rm -r build # Requires Cython, install it first: # pip install cython -# mkdir build +mkdir build cd build/ make clean -# install Python modules only without CUDA -cmake ../ -DBUILD_PYTHON_WRAPPER=ON -DBUILD_MATLAB_WRAPPER=OFF -DBUILD_CUDA=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./install -# install Python modules only with CUDA +# install Python modules without CUDA +#cmake ../ -DBUILD_PYTHON_WRAPPER=ON -DBUILD_MATLAB_WRAPPER=OFF -DBUILD_CUDA=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./install +# install Python modules with CUDA # cmake ../ -DBUILD_PYTHON_WRAPPER=ON -DBUILD_MATLAB_WRAPPER=OFF -DBUILD_CUDA=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./install +# install Matlab modules with CUDA +cmake ../ -DBUILD_PYTHON_WRAPPER=OFF -DMatlab_ROOT_DIR=/dls_sw/apps/matlab/r2014a/ -DBUILD_MATLAB_WRAPPER=ON -DBUILD_CUDA=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./install make install # cp install/lib/libcilreg.so install/python/ccpi/filters -cd install/python -export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../lib +#cd install/python +#export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../lib # spyder # one can also run Matlab in Linux as: -# PATH="/path/to/mex/:$PATH" LD_LIBRARY_PATH="/path/to/library:$LD_LIBRARY_PATH" matlab +#PATH="/path/to/mex/:$PATH" LD_LIBRARY_PATH="/path/to/library:$LD_LIBRARY_PATH" matlab +PATH="/home/kjy41806/Documents/SOFT/CCPi-Regularisation-Toolkit/build/install/matlab/:$PATH" LD_LIBRARY_PATH="/home/kjy41806/Documents/SOFT/CCPi-Regularisation-Toolkit/build/install/lib:$LD_LIBRARY_PATH" matlab |