summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorGemma Fardell <47746591+gfardell@users.noreply.github.com>2020-01-23 11:52:46 +0000
committerEdoardo Pasca <edo.paskino@gmail.com>2020-01-23 11:52:46 +0000
commit894f35c9be404bc2c13f90f4a6184a545029181a (patch)
tree17938025fe60afac7006b8fb6c4e7016bf687ccd /src
parentfecff8ded735d309aba43d30226c0bb51386c905 (diff)
downloadframework-894f35c9be404bc2c13f90f4a6184a545029181a.tar.gz
framework-894f35c9be404bc2c13f90f4a6184a545029181a.tar.bz2
framework-894f35c9be404bc2c13f90f4a6184a545029181a.tar.xz
framework-894f35c9be404bc2c13f90f4a6184a545029181a.zip
Allows user to set number of threads used by openMP in C library grad… (#476)
* Allows user to set number of threads used by openMP in C library gradient operator. Changed to release build flags * closes #477 * added test function for c lib thread deployment * improved thread scaling for neumann algoritims * removed unnecessary thread sync * reverts omp number of threads at the end of the c function call Co-authored-by: Edoardo Pasca <edo.paskino@gmail.com>
Diffstat (limited to 'src')
-rw-r--r--src/Core/CMakeLists.txt1
-rw-r--r--src/Core/FiniteDifferenceLibrary.c281
2 files changed, 82 insertions, 200 deletions
diff --git a/src/Core/CMakeLists.txt b/src/Core/CMakeLists.txt
index 741b985..e828fe5 100644
--- a/src/Core/CMakeLists.txt
+++ b/src/Core/CMakeLists.txt
@@ -1,5 +1,6 @@
set (CMAKE_C_STANDARD 11)
+set (CMAKE_BUILD_TYPE Release)
if(APPLE)
if(NOT DEFINED OPENMP_INCLUDES OR NOT DEFINED OPENMP_LIBRARIES)
diff --git a/src/Core/FiniteDifferenceLibrary.c b/src/Core/FiniteDifferenceLibrary.c
index f5736e2..fbf2646 100644
--- a/src/Core/FiniteDifferenceLibrary.c
+++ b/src/Core/FiniteDifferenceLibrary.c
@@ -2,161 +2,66 @@
#include "FiniteDifferenceLibrary.h"
-DLL_EXPORT int openMPtest(void)
+DLL_EXPORT int openMPtest(int nThreads)
{
- int nThreads;
+ omp_set_num_threads(nThreads);
+
+ int nThreads_running;
#pragma omp parallel
{
if (omp_get_thread_num() == 0)
{
- nThreads = omp_get_num_threads();
+ nThreads_running = omp_get_num_threads();
}
}
- return nThreads;
+ return nThreads_running;
}
-int fdiff_direct_neumann(const float *inimagefull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, long nx, long ny, long nz, long nc)
+void threads_setup(int nThreads_requested, int *nThreads_current)
{
- size_t volume = nx * ny * nz;
-
- const float * inimage = inimagefull;
- float * outimageX = outimageXfull;
- float * outimageY = outimageYfull;
- float * outimageZ = outimageZfull;
-
- int offset1 = (nz - 1) * nx * ny; //ind to beginning of last slice
- int offset2 = offset1 + (ny - 1) * nx; //ind to beginning of last row
-
- long c;
- for (c = 0; c < nc; c++)
- {
#pragma omp parallel
- {
- long ind, k, j, i;
- float pix0;
- //run over all and then fix boundaries
-#pragma omp for
- for (ind = 0; ind < nx * ny * (nz - 1); ind++)
- {
- pix0 = -inimage[ind];
-
- outimageX[ind] = pix0 + inimage[ind + 1];
- outimageY[ind] = pix0 + inimage[ind + nx];
- outimageZ[ind] = pix0 + inimage[ind + nx * ny];
- }
-
-#pragma omp for
- for (ind = 0; ind < nx * (ny - 1); ind++)
- {
- pix0 = -inimage[ind + offset1];
-
- outimageX[ind + offset1] = pix0 + inimage[ind + offset1 + 1];
- outimageY[ind + offset1] = pix0 + inimage[ind + offset1 + nx];
- }
-
-#pragma omp for
- for (ind = 0; ind < nx - 1; ind++)
- {
- pix0 = -inimage[ind + offset2];
-
- outimageX[ind + offset2] = pix0 + inimage[ind + offset2 + 1];
- }
-
- //boundaries
-#pragma omp for
- for (k = 0; k < nz; k++)
- {
- for (i = 0; i < nx; i++)
- {
- outimageY[(k * ny * nx) + (ny - 1) * nx + i] = 0;
- }
- }
-
-#pragma omp for
- for (k = 0; k < nz; k++)
- {
- for (j = 0; j < ny; j++)
- {
- outimageX[k * ny * nx + j * nx + nx - 1] = 0;
- }
- }
-
- if (nz > 1)
- {
-#pragma omp for
- for (ind = 0; ind < ny * nx; ind++)
- {
- outimageZ[nx * ny * (nz - 1) + ind] = 0;
- }
- }
- }
-
- inimage += volume;
- outimageX += volume;
- outimageY += volume;
- outimageZ += volume;
- }
-
-
- //now the rest of the channels
- if (nc > 1)
{
- long ind;
-
- for (c = 0; c < nc - 1; c++)
- {
- float * outimageC = outimageCfull + c * volume;
- const float * inimage = inimagefull + c * volume;
-
-#pragma omp parallel for
- for (ind = 0; ind < volume; ind++)
- {
- outimageC[ind] = -inimage[ind] + inimage[ind + volume];
- }
- }
-
-#pragma omp parallel for
- for (ind = 0; ind < volume; ind++)
+ if (omp_get_thread_num() == 0)
{
- outimageCfull[(nc - 1) * volume + ind] = 0;
+ *nThreads_current = omp_get_num_threads();
}
}
-
- return 0;
+ omp_set_num_threads(nThreads_requested);
}
-int fdiff_direct_neumann_L21(const float *inimagefull, float *outimageL21normfull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, long nx, long ny, long nz, long nc)
+
+int fdiff_direct_neumann(const float *inimagefull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, long nx, long ny, long nz, long nc)
{
size_t volume = nx * ny * nz;
- int z_dim = nz - 1;
-
- const float * inimage = inimagefull;
- float * outimageX = outimageXfull;
- float * outimageY = outimageYfull;
- float * outimageZ = outimageZfull;
- float * outimageL21norm = outimageL21normfull;
+ const float *inimage = inimagefull;
+ float *outimageX = outimageXfull;
+ float *outimageY = outimageYfull;
+ float *outimageZ = outimageZfull;
- int offset1 = (nz - 1) * nx * ny; //ind to beginning of last slice
+ int offset1 = (nz - 1) * nx * ny; //ind to beginning of last slice
int offset2 = offset1 + (ny - 1) * nx; //ind to beginning of last row
long c;
+
+ int z_dim = nz > 1 ? 1: 0;
+
for (c = 0; c < nc; c++)
{
#pragma omp parallel
{
- long ind, k;
+ long ind, k, j, i;
float pix0;
//run over all and then fix boundaries
-#pragma omp for
+
+#pragma omp for nowait
for (ind = 0; ind < nx * ny * (nz - 1); ind++)
{
pix0 = -inimage[ind];
-
outimageX[ind] = pix0 + inimage[ind + 1];
outimageY[ind] = pix0 + inimage[ind + nx];
outimageZ[ind] = pix0 + inimage[ind + nx * ny];
}
-#pragma omp for
+#pragma omp for nowait
for (ind = 0; ind < nx * (ny - 1); ind++)
{
pix0 = -inimage[ind + offset1];
@@ -169,29 +74,26 @@ int fdiff_direct_neumann_L21(const float *inimagefull, float *outimageL21normful
for (ind = 0; ind < nx - 1; ind++)
{
pix0 = -inimage[ind + offset2];
-
outimageX[ind + offset2] = pix0 + inimage[ind + offset2 + 1];
}
//boundaries
-#pragma omp for
+#pragma omp for nowait
for (k = 0; k < nz; k++)
{
- for (int i = 0; i < nx; i++)
+ for (i = 0; i < nx; i++)
{
outimageY[(k * ny * nx) + (ny - 1) * nx + i] = 0;
}
}
-
-#pragma omp for
+#pragma omp for nowait
for (k = 0; k < nz; k++)
{
- for (int j = 0; j < ny; j++)
+ for (j = 0; j < ny; j++)
{
outimageX[k * ny * nx + j * nx + nx - 1] = 0;
}
}
-
if (z_dim)
{
#pragma omp for
@@ -199,34 +101,15 @@ int fdiff_direct_neumann_L21(const float *inimagefull, float *outimageL21normful
{
outimageZ[nx * ny * (nz - 1) + ind] = 0;
}
-
-#pragma omp for
- for (ind = 0; ind < volume; ind++)
- {
- outimageL21norm[ind] = outimageX[ind] * outimageX[ind] + outimageY[ind] * outimageY[ind] + outimageZ[ind] * outimageZ[ind];
- }
-
- }
- else
- {
-
-#pragma omp for
- for (ind = 0; ind < volume; ind++)
- {
- outimageL21norm[ind] = outimageX[ind] * outimageX[ind] + outimageY[ind] * outimageY[ind];
- }
}
}
-
inimage += volume;
outimageX += volume;
outimageY += volume;
outimageZ += volume;
- outimageL21norm += volume;
}
-
//now the rest of the channels
if (nc > 1)
{
@@ -234,18 +117,13 @@ int fdiff_direct_neumann_L21(const float *inimagefull, float *outimageL21normful
for (c = 0; c < nc - 1; c++)
{
- float * outimageC = outimageCfull + c * volume;
- float * outimageL21norm = outimageL21normfull + c * volume;
- const float * inimage = inimagefull + c * volume;
+ float *outimageC = outimageCfull + c * volume;
+ const float *inimage = inimagefull + c * volume;
#pragma omp parallel for
for (ind = 0; ind < volume; ind++)
{
outimageC[ind] = -inimage[ind] + inimage[ind + volume];
- outimageL21norm[ind] += outimageC[ind] * outimageC[ind];
-
- //sqrt
- outimageL21norm[ind] = (float)sqrt(outimageL21norm[ind]);
}
}
@@ -262,15 +140,14 @@ int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float
{
size_t volume = nx * ny * nz;
- const float * inimage = inimagefull;
- float * outimageX = outimageXfull;
- float * outimageY = outimageYfull;
- float * outimageZ = outimageZfull;
+ const float *inimage = inimagefull;
+ float *outimageX = outimageXfull;
+ float *outimageY = outimageYfull;
+ float *outimageZ = outimageZfull;
- int offset1 = (nz - 1) * nx * ny; //ind to beginning of last slice
+ int offset1 = (nz - 1) * nx * ny; //ind to beginning of last slice
int offset2 = offset1 + (ny - 1) * nx; //ind to beginning of last row
-
long c;
for (c = 0; c < nc; c++)
{
@@ -280,7 +157,7 @@ int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float
long ind, k;
float pix0;
//run over all and then fix boundaries
-#pragma omp for
+#pragma omp for nowait
for (ind = 0; ind < nx * ny * (nz - 1); ind++)
{
pix0 = -inimage[ind];
@@ -290,7 +167,7 @@ int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float
outimageZ[ind] = pix0 + inimage[ind + nx * ny];
}
-#pragma omp for
+#pragma omp for nowait
for (ind = 0; ind < nx * (ny - 1); ind++)
{
pix0 = -inimage[ind + offset1];
@@ -308,7 +185,7 @@ int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float
}
//boundaries
-#pragma omp for
+#pragma omp for nowait
for (k = 0; k < nz; k++)
{
for (int i = 0; i < nx; i++)
@@ -320,7 +197,7 @@ int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float
}
}
-#pragma omp for
+#pragma omp for nowait
for (k = 0; k < nz; k++)
{
for (int j = 0; j < ny; j++)
@@ -332,10 +209,9 @@ int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float
}
}
-
if (nz > 1)
{
-#pragma omp for
+#pragma omp for nowait
for (ind = 0; ind < ny * nx; ind++)
{
outimageZ[nx * ny * (nz - 1) + ind] = -inimage[nx * ny * (nz - 1) + ind] + inimage[ind];
@@ -356,8 +232,8 @@ int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float
for (c = 0; c < nc - 1; c++)
{
- float * outimageC = outimageCfull + c * volume;
- const float * inimage = inimagefull + c * volume;
+ float *outimageC = outimageCfull + c * volume;
+ const float *inimage = inimagefull + c * volume;
#pragma omp parallel for
for (ind = 0; ind < volume; ind++)
@@ -383,18 +259,18 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
//assumes nx and ny > 1
int z_dim = nz - 1;
- float * outimage = outimagefull;
- const float * inimageX = inimageXfull;
- const float * inimageY = inimageYfull;
- const float * inimageZ = inimageZfull;
+ float *outimage = outimagefull;
+ const float *inimageX = inimageXfull;
+ const float *inimageY = inimageYfull;
+ const float *inimageZ = inimageZfull;
- float * tempX = (float*)malloc(volume * sizeof(float));
- float * tempY = (float*)malloc(volume * sizeof(float));
- float * tempZ;
-
- if(z_dim)
+ float *tempX = (float *)malloc(volume * sizeof(float));
+ float *tempY = (float *)malloc(volume * sizeof(float));
+ float *tempZ;
+
+ if (z_dim)
{
- tempZ = (float*)malloc(volume * sizeof(float));
+ tempZ = (float *)malloc(volume * sizeof(float));
}
long c;
@@ -413,7 +289,6 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
for (ind = nx; ind < nx * ny * nz; ind++)
{
tempY[ind] = -inimageY[ind] + inimageY[ind - nx];
-
}
//boundaries
@@ -442,7 +317,7 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
for (ind = nx * ny; ind < nx * ny * nz; ind++)
{
tempZ[ind] = -inimageZ[ind] + inimageZ[ind - nx * ny];
- }
+ }
#pragma omp for
for (ind = 0; ind < ny * nx; ind++)
{
@@ -454,7 +329,6 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
{
outimage[ind] = tempX[ind] + tempY[ind] + tempZ[ind];
}
-
}
else
{
@@ -464,7 +338,6 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
outimage[ind] = tempX[ind] + tempY[ind];
}
}
-
}
outimage += volume;
@@ -475,10 +348,9 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
free(tempX);
free(tempY);
- if(z_dim)
+ if (z_dim)
free(tempZ);
-
// //now the rest of the channels
if (nc > 1)
{
@@ -500,7 +372,6 @@ int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const
outimagefull[ind] += -inimageCfull[ind];
outimagefull[(nc - 1) * volume + ind] += inimageCfull[(nc - 2) * volume + ind];
}
-
}
return 0;
@@ -513,18 +384,18 @@ int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const
//assumes nx and ny > 1
int z_dim = nz - 1;
- float * outimage = outimagefull;
- const float * inimageX = inimageXfull;
- const float * inimageY = inimageYfull;
- const float * inimageZ = inimageZfull;
+ float *outimage = outimagefull;
+ const float *inimageX = inimageXfull;
+ const float *inimageY = inimageYfull;
+ const float *inimageZ = inimageZfull;
- float * tempX = (float*)malloc(volume * sizeof(float));
- float * tempY = (float*)malloc(volume * sizeof(float));
- float * tempZ;
+ float *tempX = (float *)malloc(volume * sizeof(float));
+ float *tempY = (float *)malloc(volume * sizeof(float));
+ float *tempZ;
if (z_dim)
{
- tempZ = (float*)malloc(volume * sizeof(float));
+ tempZ = (float *)malloc(volume * sizeof(float));
}
long c;
@@ -583,7 +454,6 @@ int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const
{
outimage[ind] = tempX[ind] + tempY[ind] + tempZ[ind];
}
-
}
else
{
@@ -593,7 +463,6 @@ int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const
outimage[ind] = tempX[ind] + tempY[ind];
}
}
-
}
outimage += volume;
@@ -604,7 +473,7 @@ int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const
free(tempX);
free(tempY);
- if(z_dim)
+ if (z_dim)
free(tempZ);
//now the rest of the channels
@@ -632,9 +501,11 @@ int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const
return 0;
}
-
-DLL_EXPORT int fdiff4D(float *imagefull, float *gradCfull, float *gradZfull, float *gradYfull, float *gradXfull, long nc, long nz, long ny, long nx, int boundary, int direction)
+DLL_EXPORT int fdiff4D(float *imagefull, float *gradCfull, float *gradZfull, float *gradYfull, float *gradXfull, long nc, long nz, long ny, long nx, int boundary, int direction, int nThreads)
{
+ int nThreads_initial;
+ threads_setup(nThreads, &nThreads_initial);
+
if (boundary)
{
if (direction)
@@ -643,17 +514,21 @@ DLL_EXPORT int fdiff4D(float *imagefull, float *gradCfull, float *gradZfull, flo
fdiff_adjoint_periodic(imagefull, gradXfull, gradYfull, gradZfull, gradCfull, nx, ny, nz, nc);
}
else
- {
+ {
if (direction)
fdiff_direct_neumann(imagefull, gradXfull, gradYfull, gradZfull, gradCfull, nx, ny, nz, nc);
else
fdiff_adjoint_neumann(imagefull, gradXfull, gradYfull, gradZfull, gradCfull, nx, ny, nz, nc);
}
-
+
+ omp_set_num_threads(nThreads_initial);
return 0;
}
-DLL_EXPORT int fdiff3D(float *imagefull, float *gradZfull, float *gradYfull, float *gradXfull, long nz, long ny, long nx, int boundary, int direction)
+DLL_EXPORT int fdiff3D(float *imagefull, float *gradZfull, float *gradYfull, float *gradXfull, long nz, long ny, long nx, int boundary, int direction, int nThreads)
{
+ int nThreads_initial;
+ threads_setup(nThreads, &nThreads_initial);
+
if (boundary)
{
if (direction)
@@ -669,10 +544,14 @@ DLL_EXPORT int fdiff3D(float *imagefull, float *gradZfull, float *gradYfull, flo
fdiff_adjoint_neumann(imagefull, gradXfull, gradYfull, gradZfull, NULL, nx, ny, nz, 1);
}
+ omp_set_num_threads(nThreads_initial);
return 0;
}
-DLL_EXPORT int fdiff2D(float *imagefull, float *gradYfull, float *gradXfull, long ny, long nx, int boundary, int direction)
+DLL_EXPORT int fdiff2D(float *imagefull, float *gradYfull, float *gradXfull, long ny, long nx, int boundary, int direction, int nThreads)
{
+ int nThreads_initial;
+ threads_setup(nThreads, &nThreads_initial);
+
if (boundary)
{
if (direction)
@@ -688,5 +567,7 @@ DLL_EXPORT int fdiff2D(float *imagefull, float *gradYfull, float *gradXfull, lon
fdiff_adjoint_neumann(imagefull, gradXfull, gradYfull, NULL, NULL, nx, ny, 1, 1);
}
+ omp_set_num_threads(nThreads_initial);
return 0;
}
+