diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Core/CMakeLists.txt | 124 | ||||
-rw-r--r-- | src/Core/FiniteDifferenceLibrary.c | 692 | ||||
-rwxr-xr-x | src/Core/axpby.c | 109 | ||||
-rw-r--r-- | src/Core/include/FiniteDifferenceLibrary.h | 6 | ||||
-rw-r--r-- | src/Core/include/axpby.h | 17 | ||||
-rwxr-xr-x | src/Core/include/dll_export.h | 20 |
6 files changed, 968 insertions, 0 deletions
diff --git a/src/Core/CMakeLists.txt b/src/Core/CMakeLists.txt new file mode 100644 index 0000000..a527289 --- /dev/null +++ b/src/Core/CMakeLists.txt @@ -0,0 +1,124 @@ + +set (CMAKE_C_STANDARD 11) + +if(APPLE) + if(NOT DEFINED OPENMP_INCLUDES OR NOT DEFINED OPENMP_LIBRARIES) + set(OPENMP_INCLUDES "OPENMP_INCLUDES-NOTFOUND" CACHE PATH "Need to set OpenMP include dir on OSX") + set(OPENMP_LIBRARIES "OPENMP_LIBRARIES-NOTFOUND" CACHE PATH "Need to set OpenMP lib dir on OSX") + endif() + if(NOT EXISTS ${OPENMP_INCLUDES} OR NOT EXISTS ${OPENMP_LIBRARIES}) + message(FATAL_ERROR "Need to set OPENMP_INCLUDES and OPENMP_LIBRARIES on OSX.") + endif() + if(CMAKE_C_COMPILER_ID MATCHES "Clang") + set(OpenMP_C "${CMAKE_C_COMPILER}") + set(OpenMP_C_FLAGS "-fopenmp=libomp -Wno-unused-command-line-argument") + set(OpenMP_C_LIB_NAMES "libomp" "libgomp" "libiomp5") + set(OpenMP_libomp_LIBRARY ${OpenMP_C_LIB_NAMES}) + set(OpenMP_libgomp_LIBRARY ${OpenMP_C_LIB_NAMES}) + set(OpenMP_libiomp5_LIBRARY ${OpenMP_C_LIB_NAMES}) + endif() + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(OpenMP_CXX "${CMAKE_CXX_COMPILER}") + set(OpenMP_CXX_FLAGS "-fopenmp=libomp -Wno-unused-command-line-argument") + set(OpenMP_CXX_LIB_NAMES "libomp" "libgomp" "libiomp5") + set(OpenMP_libomp_LIBRARY ${OpenMP_CXX_LIB_NAMES}) + set(OpenMP_libgomp_LIBRARY ${OpenMP_CXX_LIB_NAMES}) + set(OpenMP_libiomp5_LIBRARY ${OpenMP_CXX_LIB_NAMES}) + endif() +endif() + + +find_package(OpenMP REQUIRED) + +add_definitions(${OpenMP_CXX_FLAGS}) +add_definitions(${OpenMP_C_FLAGS}) + + if (APPLE) + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") + set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}") + set (CMAKE_STATIC_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_STATIC_LINKER_FLAGS}") + include_directories("${OPENMP_INCLUDES}") + link_directories("${OPENMP_LIBRARIES}") + else() + if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.9.0") + set (OpenMP_EXE_LINKER_FLAGS OpenMP::OpenMP_CXX) + set (OpenMP_EXE_LINKER_FLAGS OpenMP::OpenMP_C) + else() + message(WARNING "Your CMake version is old. OpenMP linking flags might be incorrect.") + # need to explicitly set this. Definitely for gcc, hopefully also for other systems. + # See https://gitlab.kitware.com/cmake/cmake/issues/15392 + set (OpenMP_EXE_LINKER_FLAGS ${OpenMP_CXX_FLAGS}) + set (OpenMP_EXE_LINKER_FLAGS ${OpenMP_C_FLAGS}) + endif() + endif() + + +#if (OPENMP_FOUND) +# set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") +# set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +# set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS} ${OpenMP_CXX_FLAGS}") +# set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS} ${OpenMP_CXX_FLAGS}") +# set (CMAKE_STATIC_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_STATIC_LINKER_FLAGS} ${OpenMP_CXX_FLAGS}") +# if (UNIX) +# set (FLAGS "-O3 -funsigned-char -Wall -Wl,--no-undefined -march=native") +# set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}") +# set (CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} ${FLAGS}") +# +# set (EXTRA_LIBRARIES +# "gomp" +# "m" +# ) +# endif() +# endif() + +if (WIN32) + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Ddll_EXPORTS") +endif() + + +message("CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}") +message("CMAKE_C_FLAGS ${CMAKE_C_FLAGS}") +message("CMAKE_EXE_LINKER_FLAGS ${CMAKE_EXE_LINKER_FLAGS}") +message("CMAKE_SHARED_LINKER_FLAGS ${CMAKE_SHARED_LINKER_FLAGS}") +message("CMAKE_STATIC_LINKER_FLAGS ${CMAKE_STATIC_LINKER_FLAGS}") + + + +add_library(cilacc SHARED ${CMAKE_CURRENT_SOURCE_DIR}/axpby.c + ${CMAKE_CURRENT_SOURCE_DIR}/FiniteDifferenceLibrary.c ) + +target_link_libraries(cilacc ${OpenMP_C_LIB_NAMES} ) +include_directories(cilacc PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/include + ) + +## Install +#include(GNUInstallDirs) +#install(TARGETS cilacc +# RUNTIME DESTINATION bin +# LIBRARY DESTINATION lib +# ARCHIVE DESTINATION lib +# CONFIGURATIONS ${CMAKE_BUILD_TYPE} +# ) + +if (UNIX) +message ("I'd install into ${CMAKE_INSTALL_PREFIX}/lib") +install(TARGETS cilacc + LIBRARY DESTINATION lib + CONFIGURATIONS ${CMAKE_BUILD_TYPE} + ) +elseif(WIN32) +message ("I'd install into ${CMAKE_INSTALL_PREFIX} lib bin") + install(TARGETS cilacc + RUNTIME DESTINATION bin + ARCHIVE DESTINATION lib + CONFIGURATIONS ${CMAKE_BUILD_TYPE} + ) +endif() + +install(DIRECTORY ${PROJECT_SOURCE_DIR}/src/Core/include/ + DESTINATION ${CMAKE_INSTALL_PREFIX}/include/cil) + + diff --git a/src/Core/FiniteDifferenceLibrary.c b/src/Core/FiniteDifferenceLibrary.c new file mode 100644 index 0000000..f5736e2 --- /dev/null +++ b/src/Core/FiniteDifferenceLibrary.c @@ -0,0 +1,692 @@ +#define dll_EXPORTS = 1 + +#include "FiniteDifferenceLibrary.h" + +DLL_EXPORT int openMPtest(void) +{ + int nThreads; +#pragma omp parallel + { + if (omp_get_thread_num() == 0) + { + nThreads = omp_get_num_threads(); + } + } + return nThreads; +} +int fdiff_direct_neumann(const float *inimagefull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, long nx, long ny, long nz, long nc) +{ + size_t volume = nx * ny * nz; + + const float * inimage = inimagefull; + float * outimageX = outimageXfull; + float * outimageY = outimageYfull; + float * outimageZ = outimageZfull; + + int offset1 = (nz - 1) * nx * ny; //ind to beginning of last slice + int offset2 = offset1 + (ny - 1) * nx; //ind to beginning of last row + + long c; + for (c = 0; c < nc; c++) + { +#pragma omp parallel + { + long ind, k, j, i; + float pix0; + //run over all and then fix boundaries +#pragma omp for + for (ind = 0; ind < nx * ny * (nz - 1); ind++) + { + pix0 = -inimage[ind]; + + outimageX[ind] = pix0 + inimage[ind + 1]; + outimageY[ind] = pix0 + inimage[ind + nx]; + outimageZ[ind] = pix0 + inimage[ind + nx * ny]; + } + +#pragma omp for + for (ind = 0; ind < nx * (ny - 1); ind++) + { + pix0 = -inimage[ind + offset1]; + + outimageX[ind + offset1] = pix0 + inimage[ind + offset1 + 1]; + outimageY[ind + offset1] = pix0 + inimage[ind + offset1 + nx]; + } + +#pragma omp for + for (ind = 0; ind < nx - 1; ind++) + { + pix0 = -inimage[ind + offset2]; + + outimageX[ind + offset2] = pix0 + inimage[ind + offset2 + 1]; + } + + //boundaries +#pragma omp for + for (k = 0; k < nz; k++) + { + for (i = 0; i < nx; i++) + { + outimageY[(k * ny * nx) + (ny - 1) * nx + i] = 0; + } + } + +#pragma omp for + for (k = 0; k < nz; k++) + { + for (j = 0; j < ny; j++) + { + outimageX[k * ny * nx + j * nx + nx - 1] = 0; + } + } + + if (nz > 1) + { +#pragma omp for + for (ind = 0; ind < ny * nx; ind++) + { + outimageZ[nx * ny * (nz - 1) + ind] = 0; + } + } + } + + inimage += volume; + outimageX += volume; + outimageY += volume; + outimageZ += volume; + } + + + //now the rest of the channels + if (nc > 1) + { + long ind; + + for (c = 0; c < nc - 1; c++) + { + float * outimageC = outimageCfull + c * volume; + const float * inimage = inimagefull + c * volume; + +#pragma omp parallel for + for (ind = 0; ind < volume; ind++) + { + outimageC[ind] = -inimage[ind] + inimage[ind + volume]; + } + } + +#pragma omp parallel for + for (ind = 0; ind < volume; ind++) + { + outimageCfull[(nc - 1) * volume + ind] = 0; + } + } + + return 0; +} +int fdiff_direct_neumann_L21(const float *inimagefull, float *outimageL21normfull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, long nx, long ny, long nz, long nc) +{ + size_t volume = nx * ny * nz; + int z_dim = nz - 1; + + const float * inimage = inimagefull; + float * outimageX = outimageXfull; + float * outimageY = outimageYfull; + float * outimageZ = outimageZfull; + float * outimageL21norm = outimageL21normfull; + + + int offset1 = (nz - 1) * nx * ny; //ind to beginning of last slice + int offset2 = offset1 + (ny - 1) * nx; //ind to beginning of last row + + long c; + for (c = 0; c < nc; c++) + { +#pragma omp parallel + { + long ind, k; + float pix0; + //run over all and then fix boundaries +#pragma omp for + for (ind = 0; ind < nx * ny * (nz - 1); ind++) + { + pix0 = -inimage[ind]; + + outimageX[ind] = pix0 + inimage[ind + 1]; + outimageY[ind] = pix0 + inimage[ind + nx]; + outimageZ[ind] = pix0 + inimage[ind + nx * ny]; + } + +#pragma omp for + for (ind = 0; ind < nx * (ny - 1); ind++) + { + pix0 = -inimage[ind + offset1]; + + outimageX[ind + offset1] = pix0 + inimage[ind + offset1 + 1]; + outimageY[ind + offset1] = pix0 + inimage[ind + offset1 + nx]; + } + +#pragma omp for + for (ind = 0; ind < nx - 1; ind++) + { + pix0 = -inimage[ind + offset2]; + + outimageX[ind + offset2] = pix0 + inimage[ind + offset2 + 1]; + } + + //boundaries +#pragma omp for + for (k = 0; k < nz; k++) + { + for (int i = 0; i < nx; i++) + { + outimageY[(k * ny * nx) + (ny - 1) * nx + i] = 0; + } + } + +#pragma omp for + for (k = 0; k < nz; k++) + { + for (int j = 0; j < ny; j++) + { + outimageX[k * ny * nx + j * nx + nx - 1] = 0; + } + } + + if (z_dim) + { +#pragma omp for + for (ind = 0; ind < ny * nx; ind++) + { + outimageZ[nx * ny * (nz - 1) + ind] = 0; + } + +#pragma omp for + for (ind = 0; ind < volume; ind++) + { + outimageL21norm[ind] = outimageX[ind] * outimageX[ind] + outimageY[ind] * outimageY[ind] + outimageZ[ind] * outimageZ[ind]; + } + + } + else + { + +#pragma omp for + for (ind = 0; ind < volume; ind++) + { + outimageL21norm[ind] = outimageX[ind] * outimageX[ind] + outimageY[ind] * outimageY[ind]; + } + } + } + + + inimage += volume; + outimageX += volume; + outimageY += volume; + outimageZ += volume; + outimageL21norm += volume; + } + + + //now the rest of the channels + if (nc > 1) + { + long ind; + + for (c = 0; c < nc - 1; c++) + { + float * outimageC = outimageCfull + c * volume; + float * outimageL21norm = outimageL21normfull + c * volume; + const float * inimage = inimagefull + c * volume; + +#pragma omp parallel for + for (ind = 0; ind < volume; ind++) + { + outimageC[ind] = -inimage[ind] + inimage[ind + volume]; + outimageL21norm[ind] += outimageC[ind] * outimageC[ind]; + + //sqrt + outimageL21norm[ind] = (float)sqrt(outimageL21norm[ind]); + } + } + +#pragma omp parallel for + for (ind = 0; ind < volume; ind++) + { + outimageCfull[(nc - 1) * volume + ind] = 0; + } + } + + return 0; +} +int fdiff_direct_periodic(const float *inimagefull, float *outimageXfull, float *outimageYfull, float *outimageZfull, float *outimageCfull, long nx, long ny, long nz, long nc) +{ + size_t volume = nx * ny * nz; + + const float * inimage = inimagefull; + float * outimageX = outimageXfull; + float * outimageY = outimageYfull; + float * outimageZ = outimageZfull; + + int offset1 = (nz - 1) * nx * ny; //ind to beginning of last slice + int offset2 = offset1 + (ny - 1) * nx; //ind to beginning of last row + + + long c; + for (c = 0; c < nc; c++) + { + +#pragma omp parallel + { + long ind, k; + float pix0; + //run over all and then fix boundaries +#pragma omp for + for (ind = 0; ind < nx * ny * (nz - 1); ind++) + { + pix0 = -inimage[ind]; + + outimageX[ind] = pix0 + inimage[ind + 1]; + outimageY[ind] = pix0 + inimage[ind + nx]; + outimageZ[ind] = pix0 + inimage[ind + nx * ny]; + } + +#pragma omp for + for (ind = 0; ind < nx * (ny - 1); ind++) + { + pix0 = -inimage[ind + offset1]; + + outimageX[ind + offset1] = pix0 + inimage[ind + offset1 + 1]; + outimageY[ind + offset1] = pix0 + inimage[ind + offset1 + nx]; + } + +#pragma omp for + for (ind = 0; ind < nx - 1; ind++) + { + pix0 = -inimage[ind + offset2]; + + outimageX[ind + offset2] = pix0 + inimage[ind + offset2 + 1]; + } + + //boundaries +#pragma omp for + for (k = 0; k < nz; k++) + { + for (int i = 0; i < nx; i++) + { + int ind1 = (k * ny * nx); + int ind2 = ind1 + (ny - 1) * nx; + + outimageY[ind2 + i] = -inimage[ind2 + i] + inimage[ind1 + i]; + } + } + +#pragma omp for + for (k = 0; k < nz; k++) + { + for (int j = 0; j < ny; j++) + { + int ind1 = k * ny * nx + j * nx; + int ind2 = ind1 + nx - 1; + + outimageX[ind2] = -inimage[ind2] + inimage[ind1]; + } + } + + + if (nz > 1) + { +#pragma omp for + for (ind = 0; ind < ny * nx; ind++) + { + outimageZ[nx * ny * (nz - 1) + ind] = -inimage[nx * ny * (nz - 1) + ind] + inimage[ind]; + } + } + } + + inimage += volume; + outimageX += volume; + outimageY += volume; + outimageZ += volume; + } + + //now the rest of the channels + if (nc > 1) + { + long ind; + + for (c = 0; c < nc - 1; c++) + { + float * outimageC = outimageCfull + c * volume; + const float * inimage = inimagefull + c * volume; + +#pragma omp parallel for + for (ind = 0; ind < volume; ind++) + { + outimageC[ind] = -inimage[ind] + inimage[ind + volume]; + } + } + +#pragma omp parallel for + for (ind = 0; ind < volume; ind++) + { + outimageCfull[(nc - 1) * volume + ind] = -inimagefull[(nc - 1) * volume + ind] + inimagefull[ind]; + } + } + + return 0; +} +int fdiff_adjoint_neumann(float *outimagefull, const float *inimageXfull, const float *inimageYfull, const float *inimageZfull, const float *inimageCfull, long nx, long ny, long nz, long nc) +{ + //runs over full data in x, y, z. then corrects elements for bounday conditions and sums + size_t volume = nx * ny * nz; + + //assumes nx and ny > 1 + int z_dim = nz - 1; + + float * outimage = outimagefull; + const float * inimageX = inimageXfull; + const float * inimageY = inimageYfull; + const float * inimageZ = inimageZfull; + + float * tempX = (float*)malloc(volume * sizeof(float)); + float * tempY = (float*)malloc(volume * sizeof(float)); + float * tempZ; + + if(z_dim) + { + tempZ = (float*)malloc(volume * sizeof(float)); + } + + long c; + for (c = 0; c < nc; c++) //just calculating x, y and z in each channel here + { +#pragma omp parallel + { + long ind, k; + +#pragma omp for + for (ind = 1; ind < nx * ny * nz; ind++) + { + tempX[ind] = -inimageX[ind] + inimageX[ind - 1]; + } +#pragma omp for + for (ind = nx; ind < nx * ny * nz; ind++) + { + tempY[ind] = -inimageY[ind] + inimageY[ind - nx]; + + } + + //boundaries +#pragma omp for + for (k = 0; k < nz; k++) + { + for (int j = 0; j < ny; j++) + { + tempX[k * ny * nx + j * nx] = -inimageX[k * ny * nx + j * nx]; + tempX[k * ny * nx + j * nx + nx - 1] = inimageX[k * ny * nx + j * nx + nx - 2]; + } + } +#pragma omp for + for (k = 0; k < nz; k++) + { + for (int i = 0; i < nx; i++) + { + tempY[(k * ny * nx) + i] = -inimageY[(k * ny * nx) + i]; + tempY[(k * ny * nx) + nx * (ny - 1) + i] = inimageY[(k * ny * nx) + nx * (ny - 2) + i]; + } + } + + if (z_dim) + { +#pragma omp for + for (ind = nx * ny; ind < nx * ny * nz; ind++) + { + tempZ[ind] = -inimageZ[ind] + inimageZ[ind - nx * ny]; + } +#pragma omp for + for (ind = 0; ind < ny * nx; ind++) + { + tempZ[ind] = -inimageZ[ind]; + tempZ[nx * ny * (nz - 1) + ind] = inimageZ[nx * ny * (nz - 2) + ind]; + } +#pragma omp for + for (ind = 0; ind < volume; ind++) + { + outimage[ind] = tempX[ind] + tempY[ind] + tempZ[ind]; + } + + } + else + { +#pragma omp for + for (ind = 0; ind < volume; ind++) + { + outimage[ind] = tempX[ind] + tempY[ind]; + } + } + + } + + outimage += volume; + inimageX += volume; + inimageY += volume; + inimageZ += volume; + } + free(tempX); + free(tempY); + + if(z_dim) + free(tempZ); + + + // //now the rest of the channels + if (nc > 1) + { + long ind; + + for (c = 1; c < nc - 1; c++) + { + +#pragma omp parallel for + for (ind = 0; ind < volume; ind++) + { + outimagefull[ind + c * volume] += -inimageCfull[ind + c * volume] + inimageCfull[ind + (c - 1) * volume]; + } + } + +#pragma omp parallel for + for (ind = 0; ind < volume; ind++) + { + outimagefull[ind] += -inimageCfull[ind]; + outimagefull[(nc - 1) * volume + ind] += inimageCfull[(nc - 2) * volume + ind]; + } + + } + + return 0; +} +int fdiff_adjoint_periodic(float *outimagefull, const float *inimageXfull, const float *inimageYfull, const float *inimageZfull, const float *inimageCfull, long nx, long ny, long nz, long nc) +{ + //runs over full data in x, y, z. then correctects elements for bounday conditions and sums + size_t volume = nx * ny * nz; + + //assumes nx and ny > 1 + int z_dim = nz - 1; + + float * outimage = outimagefull; + const float * inimageX = inimageXfull; + const float * inimageY = inimageYfull; + const float * inimageZ = inimageZfull; + + float * tempX = (float*)malloc(volume * sizeof(float)); + float * tempY = (float*)malloc(volume * sizeof(float)); + float * tempZ; + + if (z_dim) + { + tempZ = (float*)malloc(volume * sizeof(float)); + } + + long c; + for (c = 0; c < nc; c++) //just calculating x, y and z in each channel here + { +#pragma omp parallel + { + long ind, k; + + //run over all and then fix boundaries +#pragma omp for + for (ind = 1; ind < volume; ind++) + { + tempX[ind] = -inimageX[ind] + inimageX[ind - 1]; + } +#pragma omp for + for (ind = nx; ind < volume; ind++) + { + tempY[ind] = -inimageY[ind] + inimageY[ind - nx]; + } + + //boundaries +#pragma omp for + for (k = 0; k < nz; k++) + { + for (int i = 0; i < nx; i++) + { + tempY[(k * ny * nx) + i] = -inimageY[(k * ny * nx) + i] + inimageY[(k * ny * nx) + nx * (ny - 1) + i]; + } + } +#pragma omp for + for (k = 0; k < nz; k++) + { + for (int j = 0; j < ny; j++) + { + tempX[k * ny * nx + j * nx] = -inimageX[k * ny * nx + j * nx] + inimageX[k * ny * nx + j * nx + nx - 1]; + } + } + + if (z_dim) + { + +#pragma omp for + for (ind = nx * ny; ind < nx * ny * nz; ind++) + { + tempZ[ind] = -inimageZ[ind] + inimageZ[ind - nx * ny]; + } +#pragma omp for + for (ind = 0; ind < ny * nx; ind++) + { + tempZ[ind] = -inimageZ[ind] + inimageZ[nx * ny * (nz - 1) + ind]; + } + +#pragma omp for + for (ind = 0; ind < volume; ind++) + { + outimage[ind] = tempX[ind] + tempY[ind] + tempZ[ind]; + } + + } + else + { +#pragma omp for + for (ind = 0; ind < volume; ind++) + { + outimage[ind] = tempX[ind] + tempY[ind]; + } + } + + } + + outimage += volume; + inimageX += volume; + inimageY += volume; + inimageZ += volume; + } + free(tempX); + free(tempY); + + if(z_dim) + free(tempZ); + + //now the rest of the channels + if (nc > 1) + { + long ind; + + for (c = 1; c < nc; c++) + { + +#pragma omp parallel for + for (ind = 0; ind < volume; ind++) + { + outimagefull[ind + c * volume] += -inimageCfull[ind + c * volume] + inimageCfull[ind + (c - 1) * volume]; + } + } + +#pragma omp parallel for + for (ind = 0; ind < volume; ind++) + { + outimagefull[ind] += -inimageCfull[ind] + inimageCfull[(nc - 1) * volume + ind]; + } + } + + return 0; +} + + +DLL_EXPORT int fdiff4D(float *imagefull, float *gradCfull, float *gradZfull, float *gradYfull, float *gradXfull, long nc, long nz, long ny, long nx, int boundary, int direction) +{ + if (boundary) + { + if (direction) + fdiff_direct_periodic(imagefull, gradXfull, gradYfull, gradZfull, gradCfull, nx, ny, nz, nc); + else + fdiff_adjoint_periodic(imagefull, gradXfull, gradYfull, gradZfull, gradCfull, nx, ny, nz, nc); + } + else + { + if (direction) + fdiff_direct_neumann(imagefull, gradXfull, gradYfull, gradZfull, gradCfull, nx, ny, nz, nc); + else + fdiff_adjoint_neumann(imagefull, gradXfull, gradYfull, gradZfull, gradCfull, nx, ny, nz, nc); + } + + return 0; +} +DLL_EXPORT int fdiff3D(float *imagefull, float *gradZfull, float *gradYfull, float *gradXfull, long nz, long ny, long nx, int boundary, int direction) +{ + if (boundary) + { + if (direction) + fdiff_direct_periodic(imagefull, gradXfull, gradYfull, gradZfull, NULL, nx, ny, nz, 1); + else + fdiff_adjoint_periodic(imagefull, gradXfull, gradYfull, gradZfull, NULL, nx, ny, nz, 1); + } + else + { + if (direction) + fdiff_direct_neumann(imagefull, gradXfull, gradYfull, gradZfull, NULL, nx, ny, nz, 1); + else + fdiff_adjoint_neumann(imagefull, gradXfull, gradYfull, gradZfull, NULL, nx, ny, nz, 1); + } + + return 0; +} +DLL_EXPORT int fdiff2D(float *imagefull, float *gradYfull, float *gradXfull, long ny, long nx, int boundary, int direction) +{ + if (boundary) + { + if (direction) + fdiff_direct_periodic(imagefull, gradXfull, gradYfull, NULL, NULL, nx, ny, 1, 1); + else + fdiff_adjoint_periodic(imagefull, gradXfull, gradYfull, NULL, NULL, nx, ny, 1, 1); + } + else + { + if (direction) + fdiff_direct_neumann(imagefull, gradXfull, gradYfull, NULL, NULL, nx, ny, 1, 1); + else + fdiff_adjoint_neumann(imagefull, gradXfull, gradYfull, NULL, NULL, nx, ny, 1, 1); + } + + return 0; +} diff --git a/src/Core/axpby.c b/src/Core/axpby.c new file mode 100755 index 0000000..c4d162d --- /dev/null +++ b/src/Core/axpby.c @@ -0,0 +1,109 @@ +#include "axpby.h" + + +DLL_EXPORT int padd(float * x, float * y, float * out, long size){ + long i = 0; +#pragma omp parallel for + for (i=0; i < size; i++) + { + *(out + i ) = *(x + i) + *(y+i); + } + return 0; +} + +DLL_EXPORT int psubtract(float * x, float * y, float * out, long size){ + long i = 0; +#pragma omp parallel +{ +//#pragma omp single +//{ +// printf("current number of threads %d\n", omp_get_num_threads()); +//} +#pragma omp for + for (i=0; i < size; i++) + { + *(out + i ) = *(x + i) - *(y+i); + } +} + return 0; + +} + +DLL_EXPORT int pmultiply(float * x, float * y, float * out, long size){ + long i = 0; +#pragma omp parallel for + for (i=0; i < size; i++) + { + *(out + i ) = *(x + i) * *(y+i); + } + return 0; +} + +DLL_EXPORT int pdivide(float * x, float * y, float * out, long size, float default_value) +{ + long i = 0; +#pragma omp parallel for + for (i=0; i < size; i++) + { + *(out + i ) = *(y+i) ? *(x + i) / *(y+i) : default_value; + } + return 0; +} +DLL_EXPORT int ppower(float * x, float * y, float * out, long size){ + long i = 0; +#pragma omp parallel for + for (i=0; i < size; i++) + { + *(out + i ) = (float)pow(*(x + i) , *(y+i)) ; + } + return 0; +} + +DLL_EXPORT int pminimum(float * x, float * y, float * out, long size){ + long i = 0; +#pragma omp parallel for + for (i=0; i < size; i++) + { + *(out + i ) = *(y+i) > (*x+i) ? *(x + i) : *(y+i); + } + return 0; +} + +DLL_EXPORT int pmaximum(float * x, float * y, float * out, long size) { + long i = 0; +#pragma omp parallel for + for (i = 0; i < size; i++) + { + *(out + i) = *(y + i) < (*x + i) ? *(x + i) : *(y + i); + } + return 0; +} + + +DLL_EXPORT int saxpby(float * x, float * y, float * out, float a, float b, long size){ + long i = 0; +#pragma omp parallel +{ +#pragma omp for + for (i=0; i < size; i++) + { + *(out + i ) = a * ( *(x + i) ) + b * ( *(y + i) ); + } +} + return 0; + +} + +DLL_EXPORT int daxpby(double * x, double * y, double * out, double a, double b, long size) { + long i = 0; +#pragma omp parallel + { +#pragma omp for + for (i = 0; i < size; i++) + { + *(out + i) = a * (*(x + i)) + b * (*(y + i)); + } + } + return 0; + +}
\ No newline at end of file diff --git a/src/Core/include/FiniteDifferenceLibrary.h b/src/Core/include/FiniteDifferenceLibrary.h new file mode 100644 index 0000000..6e426af --- /dev/null +++ b/src/Core/include/FiniteDifferenceLibrary.h @@ -0,0 +1,6 @@ +#include <math.h> +#include <stdlib.h> +#include <stdio.h> +#include "omp.h" +//#include "ipp.h" +#include "dll_export.h"
\ No newline at end of file diff --git a/src/Core/include/axpby.h b/src/Core/include/axpby.h new file mode 100644 index 0000000..2849547 --- /dev/null +++ b/src/Core/include/axpby.h @@ -0,0 +1,17 @@ +#include <math.h> +#include <stdlib.h> +#include <stdio.h> +#include "omp.h" +#include "dll_export.h" + + +DLL_EXPORT int padd(float * x, float * y, float * out, long size); +DLL_EXPORT int psubtract(float * x, float * y, float * out, long size); +DLL_EXPORT int pmultiply(float * x, float * y, float * out, long size); +DLL_EXPORT int pdivide(float * x, float * y, float * out, long size, float default_value); +DLL_EXPORT int ppower(float * x, float * y, float * out, long size); +DLL_EXPORT int pminimum(float * x, float * y, float * out, long size); +DLL_EXPORT int pmaximum(float * x, float * y, float * out, long size); + +DLL_EXPORT int saxpby(float * x, float * y, float * out, float a, float b, long size); +DLL_EXPORT int daxpby(double * x, double * y, double * out, double a, double b, long size); diff --git a/src/Core/include/dll_export.h b/src/Core/include/dll_export.h new file mode 100755 index 0000000..6b816c3 --- /dev/null +++ b/src/Core/include/dll_export.h @@ -0,0 +1,20 @@ +#pragma once +#ifndef DLLEXPORT_H +#define DLLEXPORT_H + +#if defined(_WIN32) || defined(__WIN32__) +#if defined(dll_EXPORTS) // add by CMake +#define DLL_EXPORT __declspec(dllexport) +#define EXPIMP_TEMPLATE +#else +#define DLL_EXPORT __declspec(dllimport) +#define EXPIMP_TEMPLATE extern +#endif +#elif defined(linux) || defined(__linux) || defined(__APPLE__) +#define DLL_EXPORT +#ifndef __cdecl +#define __cdecl +#endif +#endif + +#endif |