diff --git a/CMakeLists.txt b/CMakeLists.txt index 15a5922..d6a5b12 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,7 @@ cmake_minimum_required(VERSION 3.17) set(LIBNAME "ImageStreamIO") +set(LIBNAME_PCFILES "ImageStreamIO") set(SRCNAME "ImageStreamIO") message("") @@ -13,24 +14,55 @@ project(${LIBNAME} LANGUAGES C) # Version number set ( VERSION_MAJOR 2 ) -set ( VERSION_MINOR 00 ) +set ( VERSION_MINOR 01 ) set ( VERSION_PATCH 00 ) set ( VERSION_OPTION "" ) set ( PROJECT_VERSION "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}" ) configure_file ( - "${PROJECT_SOURCE_DIR}/${PROJECT_NAME}_config.h.in" - "${PROJECT_SOURCE_DIR}//${PROJECT_NAME}_config.h" + "${PROJECT_SOURCE_DIR}/${LIBNAME_PCFILES}_config.h.in" + "${PROJECT_SOURCE_DIR}//${LIBNAME_PCFILES}_config.h" ) message(" VERSION = ${PROJECT_VERSION}") -add_compile_options(-Ofast) +if(USE_CUDA) + set(IPC_REGISTRY_SRC ImageStreamIO_ipc_registry.c) +else() + set(IPC_REGISTRY_SRC "") +endif() -add_library(${LIBNAME} SHARED ${SRCNAME}.c) +add_library(${LIBNAME} SHARED ${SRCNAME}.c ${IPC_REGISTRY_SRC}) # set -C99 flag for 'for' loop initial declartaions set_property(TARGET ${LIBNAME} PROPERTY C_STANDARD 99) +# ======================================= +# BUILD TYPE +# ======================================= +# cmake -DCMAKE_BUILD_TYPE=Debug +# option: add -fanalyzer if gcc10+ (warning: this will take a while to run) +set(CMAKE_C_FLAGS_DEBUG "-O0 -g -Wall -Wextra -std=gnu17 -Wno-sign-compare") +# cmake -DCMAKE_BUILD_TYPE=Coverage +set(CMAKE_C_FLAGS_COVERAGE "-O0 -g --coverage -fprofile-arcs -ftest-coverage") +set(CMAKE_CXX_FLAGS_COVERAGE "-O0 -g --coverage -fprofile-arcs -ftest-coverage") +# cmake -DCMAKE_BUILD_TYPE=Release +set(CMAKE_C_FLAGS_RELEASE "-Ofast -DNDEBUG -std=gnu17") + + +# Set a default build type if none was specified +set(default_build_type "Release") +if(NOT CMAKE_BUILD_TYPE) + message(STATUS "Setting build type to '${default_build_type}' as none was specified.") + set(CMAKE_BUILD_TYPE "${default_build_type}") +endif() + +message("CMAKE_BUILD_TYPE : ${CMAKE_BUILD_TYPE}") + + +target_include_directories(${LIBNAME} PUBLIC $ + $ + $) find_package(PkgConfig REQUIRED) + pkg_check_modules(CFITSIO cfitsio) if(${CFITSIO_FOUND}) link_directories(${CFITSIO_LIBRARY_DIRS}) @@ -39,14 +71,10 @@ if(${CFITSIO_FOUND}) target_link_directories(${LIBNAME} PUBLIC ${CFITSIO_INCLUDE_DIRS}) endif() -target_include_directories(${LIBNAME} PUBLIC $ - $ - $) - if(USE_CUDA) - find_package(CUDAToolkit 9.0 REQUIRED) - message(STATUS "Found CUDA ${CUDAToolkit_VERSION} at ${CUDAToolkit_LIBRARY_DIR}") - message("---- CUDA_INCLUDE_DIRS = ${CUDAToolkit_INCLUDE_DIRS}") + find_package(CUDAToolkit 9.0 REQUIRED) + message(STATUS "Found CUDA ${CUDAToolkit_VERSION} at ${CUDAToolkit_LIBRARY_DIR}") + message("---- CUDA_INCLUDE_DIRS = ${CUDAToolkit_INCLUDE_DIRS}") target_include_directories(${LIBNAME} PUBLIC ${CUDAToolkit_INCLUDE_DIRS}) target_link_libraries(${LIBNAME} PRIVATE CUDA::toolkit CUDA::cudart) target_compile_options(${LIBNAME} PUBLIC -DHAVE_CUDA) @@ -54,9 +82,19 @@ if(USE_CUDA) set(BUILD_FLAGS "${BUILD_FLAGS} -DHAVE_CUDA" ) endif(USE_CUDA) +# ======================================= +# COVERAGE BUILD +# ======================================= +if (CMAKE_BUILD_TYPE STREQUAL "Coverage") + target_compile_definitions(${LIBNAME} PRIVATE COVERAGE_BUILD=1) + target_link_options(${LIBNAME} PRIVATE --coverage) + target_link_libraries(${LIBNAME} PRIVATE gcov) +endif() + if(DAO_COMPAT) target_compile_options(${LIBNAME} PUBLIC -DDAO_COMPAT) endif(DAO_COMPAT) + # # Python wrap. # @@ -68,6 +106,7 @@ if(build_python_module) add_subdirectory(python_module) endif() + ######################################################################## # # CONFIG.CMAKE @@ -107,7 +146,6 @@ install(FILES DESTINATION lib/cmake ) - ######################################################################## # # pkg-config @@ -116,10 +154,10 @@ string(APPEND LINKSTRING "-l${LIBNAME} ") set(INSTALL_PKGCONFIG_DIR "lib/pkgconfig" CACHE PATH "Installation directory for pkgconfig (.pc) files") -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${LIBNAME}.pc.in - ${CMAKE_CURRENT_BINARY_DIR}/${LIBNAME}.pc @ONLY) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${LIBNAME_PCFILES}.pc.in + ${CMAKE_CURRENT_BINARY_DIR}/${LIBNAME_PCFILES}.pc @ONLY) -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${LIBNAME}.pc +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${LIBNAME_PCFILES}.pc DESTINATION "${INSTALL_PKGCONFIG_DIR}") @@ -141,5 +179,8 @@ endif() install(TARGETS ${LIBNAME} EXPORT ${LIBNAME}Targets DESTINATION lib) -install(FILES ${SRCNAME}.h DESTINATION include/${SRCNAME}) -install(FILES ImageStruct.h ImageStreamIOError.h DESTINATION include/${SRCNAME}) +install(FILES + ${SRCNAME}.h + ImageStruct.h + ImageStreamIOError.h + DESTINATION include/${SRCNAME}) \ No newline at end of file diff --git a/ImageStreamIO.c b/ImageStreamIO.c index a0881b2..e7346c0 100644 --- a/ImageStreamIO.c +++ b/ImageStreamIO.c @@ -87,11 +87,11 @@ errno_t init_ImageStreamIO() // Forward dec'l errno_t ImageStreamIO_printERROR_(const char *file, const char *func, int line, - errno_t code, char *errmessage); + errno_t code, const char *errmessage); errno_t ImageStreamIO_printWARNING(char *warnmessage); errno_t (*internal_printError)(const char *, const char *, int, errno_t, - char *) = &ImageStreamIO_printERROR_; + const char *) = &ImageStreamIO_printERROR_; errno_t ImageStreamIO_set_default_printError() { @@ -101,7 +101,7 @@ errno_t ImageStreamIO_set_default_printError() } errno_t ImageStreamIO_set_printError(errno_t (*new_printError)(const char *, - const char *, int, errno_t, char *)) + const char *, int, errno_t, const char *)) { internal_printError = new_printError; @@ -112,24 +112,42 @@ errno_t ImageStreamIO_set_printError(errno_t (*new_printError)(const char *, if (internal_printError) \ internal_printError(__FILE__, __func__, __LINE__, code, (char*)msg); + #ifdef HAVE_CUDA -void check(cudaError_t result, char const *const func, const char *const file, +int IMAGESTRUCT_COMPILED_HAVE_CUDA = 1; + +cudaError_t check(cudaError_t result, char const *const func, const char *const file, int const line) { if (result) { - cudaDeviceReset(); - // Make sure we call CUDA Device Reset - ImageStreamIO_printERROR_(file, func, line, result, "CUDA error"); + int _errno = errno; // CUDA doesn't use errno, could be a stale errno + errno = 0; + char to_print[100] = ""; + snprintf(to_print, 100, "CUDA error: %s", cudaGetErrorString(result)); + ImageStreamIO_printERROR_(file, func, line, result, to_print); + errno = _errno; // Restore for later use. } + return result; } // This will output the proper CUDA error strings in the event // that a CUDA host call returns an error #define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__) -#endif +#include "ImageStreamIO_ipc_registry.h" +#else +int IMAGESTRUCT_COMPILED_HAVE_CUDA = 0; +#endif + +// Technical function for coverage build. +#ifdef COVERAGE_BUILD +extern void __gcov_dump(); // From libgcov, may not exist outside of coverage build +void _gcov_dump() { + __gcov_dump(); +} +#endif /** * @brief Write entry into debug log @@ -182,7 +200,7 @@ errno_t ImageStreamIO_printERROR_( const char *func, int line, __attribute__((unused)) errno_t code, - char *errmessage) + const char *errmessage) { fprintf(stderr, "%c[%d;%dmERROR [ FILE: %s FUNCTION: %s LINE: %d ] %c[%d;m\n", @@ -806,8 +824,10 @@ uint64_t ImageStreamIO_initialize_buffer( cudaMalloc(&image->array.raw, size_element * image->md->nelement + GPU_IMAGE_PLACEHOLDER)); if (image->md->shared == 1) { - checkCudaErrors( + cudaError_t err = checkCudaErrors( cudaIpcGetMemHandle(&image->md->cudaMemHandle, image->array.raw)); + if (err == cudaSuccess) + _isio_ipc_insert(&image->md->cudaMemHandle, image->array.raw); } # else ImageStreamIO_printERROR(IMAGESTREAMIO_NOTIMPL, @@ -934,14 +954,13 @@ errno_t ImageStreamIO_autorelink_if_need_if_can(IMAGE *image) // mmap. This would result in a use-after-free. IMAGE candidate_img = {0}; // stack temp image. - IMAGE* new_candidate_img = &candidate_img; - if (IMAGESTREAMIO_SUCCESS != ImageStreamIO_openIm(new_candidate_img, image->name)) { + if (IMAGESTREAMIO_SUCCESS != ImageStreamIO_openIm(&candidate_img, image->name)) { printf("_openIm failed @ _autorelink_if_need_if_can\n"); return IMAGESTREAMIO_FAILURE; } - if (IMAGESTREAMIO_SUCCESS != ImageStreamIO_new_image_compatible(image, new_candidate_img)) { + if (IMAGESTREAMIO_SUCCESS != ImageStreamIO_new_image_compatible(image, &candidate_img)) { printf("New image incompatible @ _autorelink_if_need_if_can\n"); return IMAGESTREAMIO_FAILURE; } @@ -952,7 +971,7 @@ errno_t ImageStreamIO_autorelink_if_need_if_can(IMAGE *image) printf("_closeIm failed @ _autorelink_if_need_if_can\n"); } // 2. Copy the temp stack frame into the caller struct, including pointers to the new mappings. - memcpy(image, new_candidate_img, sizeof(IMAGE)); + memcpy(image, &candidate_img, sizeof(IMAGE)); return IMAGESTREAMIO_SUCCESS; } @@ -1405,14 +1424,16 @@ errno_t ImageStreamIO_createIm_gpu( } // Shared vs. non-shared logic follows + // Names not used in case of non-shared but the alternance of scopes requires them + // defined here. + char SM_fname[STRINGMAXLEN_FILE_NAME] = {0}; + char SM_fname_tmp[STRINGMAXLEN_FILE_NAME] = {0}; if (shared == 1) { //////////////////////////////////////////////////////////////// // Open and map shmim file of the calculated size image->memsize //////////////////////////////////////////////////////////////// - - char SM_fname[STRINGMAXLEN_FILE_NAME] = {0}; if (IMAGESTREAMIO_SUCCESS != ImageStreamIO_filename(SM_fname, sizeof(SM_fname), name)) { @@ -1428,11 +1449,17 @@ errno_t ImageStreamIO_createIm_gpu( return IMAGESTREAMIO_FILEEXISTS; } + char name_tmp[STRINGMAXLEN_IMAGE_NAME] = {0}; + strcat(name_tmp, name); + strcat(name_tmp, "_tmpcreate"); + if (IMAGESTREAMIO_SUCCESS != ImageStreamIO_filename(SM_fname_tmp, sizeof(SM_fname_tmp), name_tmp)) + return IMAGESTREAMIO_FAILURE; // _filename did _printERROR + // - Create and open shmim file as a new, empty (truncated) file // - image->shmfd stores the shared memory file descriptor umask(0); errno = 0; - image->shmfd = open(SM_fname + image->shmfd = open(SM_fname_tmp // (O_CREAT|O_EXCL) flags force new file , O_RDWR | O_CREAT | O_EXCL | O_TRUNC , (mode_t)FILEMODE_ISIO @@ -1440,9 +1467,9 @@ errno_t ImageStreamIO_createIm_gpu( if (image->shmfd == -1 && errno == EEXIST) { // - File was not created: a file exists at path SM_fname; - unlink(SM_fname); // - unlink that file from its directory; + unlink(SM_fname_tmp); // - unlink that file from its directory; errno = 0; // - ignore any error from unlink; - image->shmfd = open(SM_fname // - and try again ... + image->shmfd = open(SM_fname_tmp // - and try again ... , O_RDWR | O_CREAT | O_EXCL | O_TRUNC , (mode_t)FILEMODE_ISIO ); @@ -1611,13 +1638,58 @@ errno_t ImageStreamIO_createIm_gpu( image->used = 1; image->createcnt++; - // Do this last so shmim cannot be used until it is ready strncpy(image->md->version, IMAGESTRUCT_VERSION, 32); + if (shared == 1) { + // - Atomically move the temp file to the final path + if (rename(SM_fname_tmp, SM_fname) != 0) + { + close(image->shmfd); + ImageStreamIO_printERROR(IMAGESTREAMIO_FILEWRITE, + "Error renaming temp file to final path"); + return IMAGESTREAMIO_FILEWRITE; + } + } return IMAGESTREAMIO_SUCCESS; } // errno_t ImageStreamIO_createIm_gpu(...) +errno_t _destroyIm_unshared_nochecks(IMAGE* image) { + free(image->array.raw); + free(image->kw); + free(image->md); + + image->used = 0; + image->semptr = NULL; + image->md = NULL; + image->kw = NULL; + image->array.raw = NULL; + image->shmfd = 0; + image->memsize = 0; + + return IMAGESTREAMIO_SUCCESS; +} +errno_t _destroyIm_shared_nochecks(IMAGE* image) { + if (image->semptr) + for (int semindex=0; semindexmd->sem; ++semindex) + sem_destroy(image->semptr[semindex]); + if (image->semlog) + sem_destroy(image->semlog); + + char fname[512]; + errno_t ierrno; + if((ierrno = ImageStreamIO_filename(fname, sizeof(fname), image->md->name)) + != IMAGESTREAMIO_SUCCESS) + return ierrno; + + if((ierrno = ImageStreamIO_closeIm(image)) != IMAGESTREAMIO_SUCCESS) + return ierrno; + + if (remove(fname) != 0) + return IMAGESTREAMIO_FAILURE; + + return IMAGESTREAMIO_SUCCESS; +} /** * @brief Unmap and destroy shmim created by ImageStreamIO_createIm_gpu @@ -1625,58 +1697,21 @@ errno_t ImageStreamIO_createIm_gpu( * \returns IMAGESTREAMIO_SUCCESS * */ -errno_t ImageStreamIO_destroyIm( - IMAGE *image) +errno_t ImageStreamIO_destroyIm(IMAGE *image) { - if(image->used == 1) - { - if (image->semptr) - { - for (int semindex=0; semindexmd->sem; ++semindex) - { - sem_destroy(image->semptr[semindex]); - } - free(image->semptr); - image->semptr = NULL; - } - if (image->semlog) - { - sem_destroy(image->semlog); - image->semlog = NULL; - } - - if (image->md->shared != 1) - { - if (image->kw != NULL) - { - free(image->kw); - } - } - image->kw = NULL; - + if(image == NULL) + return IMAGESTREAMIO_INVALIDARG; - if (image->memsize > 0) - { - char fname[512]; - close(image->shmfd); - // Get this before unmapping. - ImageStreamIO_filename(fname, sizeof(fname), image->md->name); - munmap(image->md, image->memsize); - image->md = NULL; - image->kw = NULL; - // Remove the file - remove(fname); - } - else - { - free(image->array.UI8); - } - image->array.UI8 = NULL; + if(image->used == 0) + return IMAGESTREAMIO_SUCCESS; - image->used = 0; - } + if(image->md == NULL) + return IMAGESTREAMIO_INVALIDARG; - return IMAGESTREAMIO_SUCCESS; + if (image->md->shared == 0) + return _destroyIm_unshared_nochecks(image); + else + return _destroyIm_shared_nochecks(image); } @@ -1720,8 +1755,17 @@ void *ImageStreamIO_get_image_d_ptr( { # ifdef HAVE_CUDA checkCudaErrors(cudaSetDevice(image->md->location)); - checkCudaErrors(cudaIpcOpenMemHandle(&d_ptr, image->md->cudaMemHandle, - cudaIpcMemLazyEnablePeerAccess)); + // Check process-local registry first: cudaIpcOpenMemHandle may only be + // called once per handle per process; a second call returns cudaErrorInvalidDeviceContext. + if ((d_ptr = _isio_ipc_lookup(&image->md->cudaMemHandle)) != NULL) { + // Already open in this process: increment refcount, reuse pointer. + _isio_ipc_retain(&image->md->cudaMemHandle); + } else { + checkCudaErrors(cudaIpcOpenMemHandle(&d_ptr, image->md->cudaMemHandle, + cudaIpcMemLazyEnablePeerAccess)); + if (d_ptr != NULL) + _isio_ipc_insert(&image->md->cudaMemHandle, d_ptr); + } # else ImageStreamIO_printERROR(IMAGESTREAMIO_NOTIMPL, "Error calling ImageStreamIO_get_image_d_ptr(), CACAO needs to be " @@ -1840,10 +1884,10 @@ errno_t ImageStreamIO_read_sharedmem_image_toIMAGE( return IMAGESTREAMIO_FAILURE; } - // gain image data array pointer + // gain image data array pointer for GPU SHMs. array.raw was NULL'd by image_sizing if (image->md->location >= 0) { - ImageStreamIO_offset_data(image, image->array.raw); + image->array.raw = ImageStreamIO_get_image_d_ptr(image); } if (image->array.raw == NULL) { @@ -1867,6 +1911,7 @@ errno_t ImageStreamIO_read_sharedmem_image_toIMAGE( image->semptr[semindex] = &image->semfile[semindex].semdata; } + image->used = 1; image->shmfd = SM_fd; return IMAGESTREAMIO_SUCCESS; } // errno_t ImageStreamIO_read_sharedmem_image_toIMAGE(const char *name, IMAGE *image) @@ -1878,20 +1923,51 @@ errno_t ImageStreamIO_read_sharedmem_image_toIMAGE( -errno_t ImageStreamIO_closeIm( - IMAGE *image) +errno_t ImageStreamIO_closeIm(IMAGE *image) { + if(image == NULL) + return IMAGESTREAMIO_INVALIDARG; + + if(image->used == 0) + return IMAGESTREAMIO_SUCCESS; + + if(image->md == NULL) + return IMAGESTREAMIO_INVALIDARG; + + if(image->md->shared == 0) + { + ImageStreamIO_printERROR(IMAGESTREAMIO_INVALIDARG, + "closeIm called on process-local image (shared=0)"); + return IMAGESTREAMIO_INVALIDARG; + } + free(image->semptr); +# ifdef HAVE_CUDA + // Decrement the refcount; cudaIpcCloseMemHandle and slot removal happen + // automatically inside _isio_ipc_release when the count reaches 0. + if (image->md != NULL && image->md->location >= 0) + _isio_ipc_release(&image->md->cudaMemHandle); +# endif + // Close file before unmap, in case unmap fails close(image->shmfd); - + if (munmap(image->md, image->memsize) != 0) { ImageStreamIO_printERROR(IMAGESTREAMIO_MMAP, "error unmapping memory"); return IMAGESTREAMIO_MMAP; } + // Isn't that needed ?? + image->used = 0; + image->semptr = NULL; + image->md = NULL; + image->kw = NULL; + + image->array.raw = NULL; + + return IMAGESTREAMIO_SUCCESS; } diff --git a/ImageStreamIO.h b/ImageStreamIO.h index f3aecfa..62d7ab7 100644 --- a/ImageStreamIO.h +++ b/ImageStreamIO.h @@ -26,6 +26,10 @@ extern "C" #include "ImageStruct.h" +#ifdef COVERAGE_BUILD +void _gcov_dump(); +#endif + void __attribute__((constructor)) libinit_ImageStreamIO(); #define ROUND_UP_8(x) (((x) + 7) & (-8)) @@ -49,7 +53,7 @@ errno_t ImageStreamIO_set_default_printError(); * \returns IMAGESTREAMIO_FAILURE on an error */ errno_t ImageStreamIO_set_printError(errno_t (*new_printError)(const char *, - const char *, int, errno_t, char *)); + const char *, int, errno_t, const char *)); /* =============================================================================================== */ diff --git a/ImageStreamIO_ipc_registry.c b/ImageStreamIO_ipc_registry.c new file mode 100644 index 0000000..8e12345 --- /dev/null +++ b/ImageStreamIO_ipc_registry.c @@ -0,0 +1,65 @@ +/** + * @file ImageStreamIO_ipc_registry.c + * @brief Process-local registry for CUDA IPC memory handles — implementation. + * + * See ImageStreamIO_ipc_registry.h for the API description. + * + * NOTE: these functions are not thread-safe. If concurrent access from + * multiple threads is needed, protect each function body with a + * pthread_mutex_t. + */ + +#include "ImageStreamIO_ipc_registry.h" + +#include /* memcmp */ + +/* Storage for the registry table (defined here, declared extern in the header). */ +IsioIpcEntry _isio_ipc_registry[ISIO_MAX_IPC_HANDLES]; + +void *_isio_ipc_lookup(const cudaIpcMemHandle_t *h) +{ + for (int i = 0; i < ISIO_MAX_IPC_HANDLES; i++) { + if (_isio_ipc_registry[i].refcount > 0 && + memcmp(&_isio_ipc_registry[i].handle, h, sizeof(*h)) == 0) + return _isio_ipc_registry[i].ptr; + } + return NULL; +} + +void _isio_ipc_insert(const cudaIpcMemHandle_t *h, void *ptr) +{ + for (int i = 0; i < ISIO_MAX_IPC_HANDLES; i++) { + if (_isio_ipc_registry[i].refcount == 0) { + _isio_ipc_registry[i].handle = *h; + _isio_ipc_registry[i].ptr = ptr; + _isio_ipc_registry[i].refcount = 1; + return; + } + } +} + +void _isio_ipc_retain(const cudaIpcMemHandle_t *h) +{ + for (int i = 0; i < ISIO_MAX_IPC_HANDLES; i++) { + if (_isio_ipc_registry[i].refcount > 0 && + memcmp(&_isio_ipc_registry[i].handle, h, sizeof(*h)) == 0) { + _isio_ipc_registry[i].refcount++; + return; + } + } +} + +void _isio_ipc_release(const cudaIpcMemHandle_t *h) +{ + for (int i = 0; i < ISIO_MAX_IPC_HANDLES; i++) { + if (_isio_ipc_registry[i].refcount > 0 && + memcmp(&_isio_ipc_registry[i].handle, h, sizeof(*h)) == 0) { + _isio_ipc_registry[i].refcount--; + if (_isio_ipc_registry[i].refcount == 0) { + cudaIpcCloseMemHandle(_isio_ipc_registry[i].ptr); + _isio_ipc_registry[i].ptr = NULL; + } + return; + } + } +} diff --git a/ImageStreamIO_ipc_registry.h b/ImageStreamIO_ipc_registry.h new file mode 100644 index 0000000..0ba1a9f --- /dev/null +++ b/ImageStreamIO_ipc_registry.h @@ -0,0 +1,53 @@ +/** + * @file ImageStreamIO_ipc_registry.h + * @brief Process-local registry for CUDA IPC memory handles. + * + * cudaIpcOpenMemHandle is inter-process only: calling it a second time for + * the same handle within the same process returns cudaErrorInvalidDeviceContext. + * This registry maps handle bytes -> (device pointer, refcount) so subsequent + * opens within the same process are served from a cache instead of calling the + * CUDA API again. The handle is closed (and its slot freed) only when the + * refcount reaches 0. + * + * Storage and implementations live in ImageStreamIO_ipc_registry.c. + * + * NOTE: these functions are not thread-safe. If concurrent access from + * multiple threads is needed, protect each function body in the .c file with + * a pthread_mutex_t. + */ + +#ifndef IMAGESTREAMIO_IPC_REGISTRY_H +#define IMAGESTREAMIO_IPC_REGISTRY_H + +#ifndef HAVE_CUDA +#error "ImageStreamIO_ipc_registry.h requires HAVE_CUDA to be defined (CUDA support must be enabled)" +#endif + +#include + +#define ISIO_MAX_IPC_HANDLES 128 + +typedef struct { + cudaIpcMemHandle_t handle; + void *ptr; + int refcount; /* 0 means slot is free */ +} IsioIpcEntry; + +extern IsioIpcEntry _isio_ipc_registry[ISIO_MAX_IPC_HANDLES]; + +/** @brief Return the cached device pointer for h, or NULL if not found. */ +void *_isio_ipc_lookup(const cudaIpcMemHandle_t *h); + +/** @brief Insert a new handle+pointer pair with refcount=1. + * No-op if the table is full. */ +void _isio_ipc_insert(const cudaIpcMemHandle_t *h, void *ptr); + +/** @brief Increment the refcount for h. No-op if h is not in the registry. */ +void _isio_ipc_retain(const cudaIpcMemHandle_t *h); + +/** @brief Decrement the refcount for h. + * Calls cudaIpcCloseMemHandle and frees the slot when count reaches 0. + * No-op if h is not in the registry. */ +void _isio_ipc_release(const cudaIpcMemHandle_t *h); + +#endif /* IMAGESTREAMIO_IPC_REGISTRY_H */ diff --git a/python_module/CMakeLists.txt b/python_module/CMakeLists.txt index 3ae2da6..a4146ff 100644 --- a/python_module/CMakeLists.txt +++ b/python_module/CMakeLists.txt @@ -1,39 +1,47 @@ -cmake_minimum_required(VERSION 3.8) +cmake_minimum_required(VERSION 3.17) -project(ImageStreamIOWrap LANGUAGES CXX) +set(LIBNAME "ImageStreamIOWrap") +project(${LIBNAME} LANGUAGES CXX) set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) -find_package(pybind11) - -if(NOT TARGET pybind11) - execute_process(COMMAND bash -c "${PYTHON_EXECUTABLE} -m pybind11 --includes" - OUTPUT_VARIABLE pybind11_inc) - execute_process(COMMAND bash -c "${PYTHON_EXECUTABLE} -c 'import sysconfig; print(sysconfig.get_config_var(\"EXT_SUFFIX\"))'" - OUTPUT_VARIABLE PYTHON_MODULE_EXTENSION) - string(REPLACE "-I" "" pybind11_inc ${pybind11_inc}) - string(REPLACE " " ";" pybind11_inc ${pybind11_inc}) - string(REGEX REPLACE "\n$" "" pybind11_inc "${pybind11_inc}") - string(REGEX REPLACE "\n$" "" PYTHON_MODULE_EXTENSION "${PYTHON_MODULE_EXTENSION}") - - add_library(ImageStreamIOWrap MODULE ImageStreamIOWrap.cpp) - - target_compile_features(ImageStreamIOWrap PUBLIC cxx_std_14) - target_include_directories(ImageStreamIOWrap PUBLIC "${pybind11_inc}") - target_compile_options(ImageStreamIOWrap PUBLIC "-Wno-deprecated-declarations") - set_target_properties(ImageStreamIOWrap - PROPERTIES PREFIX - "${PYTHON_MODULE_PREFIX}" - SUFFIX - "${PYTHON_MODULE_EXTENSION}") -else() - pybind11_add_module(ImageStreamIOWrap ImageStreamIOWrap.cpp) +# ======================================= +# BUILD TYPE +# ======================================= +# Set a default build type if none was specified +set(default_build_type "Release") +if(NOT CMAKE_BUILD_TYPE) + message(STATUS "Setting build type to '${default_build_type}' as none was specified.") + set(CMAKE_BUILD_TYPE "${default_build_type}") endif() -target_link_libraries(ImageStreamIOWrap PUBLIC ImageStreamIO) +message("CMAKE_BUILD_TYPE : ${CMAKE_BUILD_TYPE}") + +# ======================================= +# NANOBIND +# ======================================= +find_package(Python 3.9 COMPONENTS Interpreter Development.Module REQUIRED) +execute_process( + COMMAND "${Python_EXECUTABLE}" -m nanobind --cmake_dir + OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE nanobind_ROOT) +find_package(nanobind CONFIG REQUIRED) +nanobind_add_module(${LIBNAME} ImageStreamIOWrap.cpp) + +# ======================================= +# COVERAGE BUILD +# ======================================= +if (CMAKE_BUILD_TYPE STREQUAL "Coverage") + target_compile_definitions(${LIBNAME} PRIVATE COVERAGE_BUILD=1) + target_link_options(${LIBNAME} PRIVATE --coverage) + target_link_libraries(${LIBNAME} PRIVATE gcov) +endif() + + + +target_link_libraries(${LIBNAME} PUBLIC ImageStreamIO) -install(TARGETS ImageStreamIOWrap - EXPORT ImageStreamIOWrapConfig +install(TARGETS ${LIBNAME} + EXPORT ${LIBNAME}Config ARCHIVE DESTINATION python LIBRARY DESTINATION python) diff --git a/python_module/ImageStreamIOWrap.cpp b/python_module/ImageStreamIOWrap.cpp index 5571139..eb4890a 100755 --- a/python_module/ImageStreamIOWrap.cpp +++ b/python_module/ImageStreamIOWrap.cpp @@ -1,15 +1,18 @@ -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include #include +#include #include "ImageStreamIO.h" #include "ImageStruct.h" -namespace py = pybind11; +namespace nb = nanobind; std::string toString(const IMAGE_KEYWORD &kw) { std::ostringstream tmp_str; @@ -97,173 +100,128 @@ const std::vector ImageStreamIODataType::Size( SIZEOF_DATATYPE_DOUBLE, SIZEOF_DATATYPE_COMPLEX_FLOAT, SIZEOF_DATATYPE_COMPLEX_DOUBLE, SIZEOF_DATATYPE_HALF}); -std::string ImageStreamIODataTypeToPyFormat(ImageStreamIODataType dt) { - switch (dt.datatype) { - case ImageStreamIODataType::DataType::UINT8: - return py::format_descriptor::format(); - case ImageStreamIODataType::DataType::INT8: - return py::format_descriptor::format(); - case ImageStreamIODataType::DataType::UINT16: - return py::format_descriptor::format(); - case ImageStreamIODataType::DataType::INT16: - return py::format_descriptor::format(); - case ImageStreamIODataType::DataType::UINT32: - return py::format_descriptor::format(); - case ImageStreamIODataType::DataType::INT32: - return py::format_descriptor::format(); - case ImageStreamIODataType::DataType::UINT64: - return py::format_descriptor::format(); - case ImageStreamIODataType::DataType::INT64: - return py::format_descriptor::format(); - case ImageStreamIODataType::DataType::FLOAT: - return py::format_descriptor::format(); - case ImageStreamIODataType::DataType::DOUBLE: - return py::format_descriptor::format(); - // case ImageStreamIODataType::DataType::COMPLEX_FLOAT: return - // py::format_descriptor<(std::complex>::format(); case - // ImageStreamIODataType::DataType::COMPLEX_DOUBLE: return - // py::format_descriptor<(std::complex>::format(); - default: - throw std::runtime_error("Number format implemented"); - } -} - -ImageStreamIODataType PyFormatToImageStreamIODataType(const py::buffer_info &info) { - if (info.item_type_is_equivalent_to()) { - return ImageStreamIODataType::DataType::UINT8; - } - if (info.item_type_is_equivalent_to()) { - return ImageStreamIODataType::DataType::INT8; - } - if (info.item_type_is_equivalent_to()) { - return ImageStreamIODataType::DataType::UINT16; - } - if (info.item_type_is_equivalent_to()) { - return ImageStreamIODataType::DataType::INT16; - } - if (info.item_type_is_equivalent_to()) { - return ImageStreamIODataType::DataType::UINT32; - } - if (info.item_type_is_equivalent_to()) { - return ImageStreamIODataType::DataType::INT32; - } - if (info.item_type_is_equivalent_to()) { - return ImageStreamIODataType::DataType::UINT64; - } - if (info.item_type_is_equivalent_to()) { - return ImageStreamIODataType::DataType::INT64; - } - if (info.item_type_is_equivalent_to()) { - return ImageStreamIODataType::DataType::FLOAT; - } - if (info.item_type_is_equivalent_to()) { - return ImageStreamIODataType::DataType::DOUBLE; - } - // case ImageStreamIODataType::DataType::COMPLEX_FLOAT: return - // py::format_descriptor<(std::complex>::format(); case - // ImageStreamIODataType::DataType::COMPLEX_DOUBLE: return - // py::format_descriptor<(std::complex>::format(); - throw std::runtime_error("PyFormatToImageStreamIODataType -- Not implemented datatype (possibly endianess issue.)"); +// Helper: map ndarray dtype to ImageStreamIO datatype +template +uint8_t NdarrayDtypeToImageStreamIODataType(const nb::ndarray &arr) { + auto dt = arr.dtype(); + if (dt == nb::dtype()) return _DATATYPE_UINT8; + if (dt == nb::dtype()) return _DATATYPE_INT8; + if (dt == nb::dtype()) return _DATATYPE_UINT16; + if (dt == nb::dtype()) return _DATATYPE_INT16; + if (dt == nb::dtype()) return _DATATYPE_UINT32; + if (dt == nb::dtype()) return _DATATYPE_INT32; + if (dt == nb::dtype()) return _DATATYPE_UINT64; + if (dt == nb::dtype()) return _DATATYPE_INT64; + if (dt == nb::dtype()) return _DATATYPE_FLOAT; + if (dt == nb::dtype()) return _DATATYPE_DOUBLE; + throw std::runtime_error( + "NdarrayDtypeToImageStreamIODataType -- Not implemented datatype"); } template -py::array_t convert_img(const IMAGE &img) { +nb::object convert_img(const IMAGE &img) { if (ImageStreamIO_typesize(img.md->datatype) != sizeof(T)) { throw std::runtime_error("IMAGE is not compatible with output format"); } - std::vector shape(img.md->naxis); - std::vector strides(img.md->naxis); - ssize_t stride = sizeof(T); - - // Row Major representation - // for (int8_t axis(img.md->naxis-1); axis >= 0; --axis) { - // Col Major representation - for (int8_t axis(0); axis < img.md->naxis; ++axis) { - shape[axis] = img.md->size[axis]; - strides[axis] = stride; - stride *= shape[axis]; - } + size_t nelement = img.md->nelement; + T *data = new T[nelement]; - auto ret_buffer = py::array_t(shape, strides); - void *current_image = img.array.raw; - size_t size_data = img.md->nelement * sizeof(T); if (img.md->location == -1) { - memcpy(ret_buffer.mutable_data(), current_image, size_data); + memcpy(data, img.array.raw, nelement * sizeof(T)); } else { #ifdef HAVE_CUDA cudaSetDevice(img.md->location); - cudaMemcpy(ret_buffer.mutable_data(), current_image, size_data, + cudaMemcpy(data, img.array.raw, nelement * sizeof(T), cudaMemcpyDeviceToHost); #else + delete[] data; throw std::runtime_error( "unsupported location, CACAO needs to be compiled with -DUSE_CUDA=ON"); #endif } - return ret_buffer; + + nb::capsule owner(data, [](void *p) noexcept { delete[] (T *)p; }); + + std::vector shape(img.md->naxis); + for (int8_t axis = 0; axis < img.md->naxis; ++axis) { + shape[axis] = img.md->size[axis]; + } + + return nb::cast(nb::ndarray( + data, img.md->naxis, shape.data(), owner, nullptr, nb::dtype(), + nb::device::cpu::value, 0, 'F')); } template -void write(IMAGE &img, - py::array_t b) { +nb::object view_img(const IMAGE &img) { + if (ImageStreamIO_typesize(img.md->datatype) != sizeof(T)) { + throw std::runtime_error("IMAGE is not compatible with output format"); + } + + std::vector shape(img.md->naxis); + for (int8_t axis = 0; axis < img.md->naxis; ++axis) { + shape[axis] = img.md->size[axis]; + } + + // No-op capsule: shared memory is externally managed + nb::capsule owner((void *)img.array.raw, [](void *) noexcept {}); + + return nb::cast(nb::ndarray( + (T *)img.array.raw, img.md->naxis, shape.data(), owner, nullptr, + nb::dtype(), nb::device::cpu::value, 0, 'F')); +} + +void write_img(IMAGE &img, nb::ndarray b) { if (img.array.raw == nullptr) { throw std::runtime_error("image not initialized"); } - /* Request a buffer descriptor from Python */ - py::buffer_info info = b.request(); - if (img.md->datatype != - PyFormatToImageStreamIODataType(info)) { + uint8_t datatype = NdarrayDtypeToImageStreamIODataType(b); + if (img.md->datatype != datatype) { throw std::invalid_argument("incompatible type"); } - if (info.ndim != img.md->naxis) { + if ((size_t)b.ndim() != (size_t)img.md->naxis) { throw std::invalid_argument("incompatible number of axis"); } - const uint32_t *size_ptr = img.md->size; - for (auto &dim : info.shape) { - if (*size_ptr != dim) { + for (size_t i = 0; i < b.ndim(); ++i) { + if (b.shape(i) != img.md->size[i]) { throw std::invalid_argument("incompatible shape"); } - ++size_ptr; } - ImageStreamIODataType dt(img.md->datatype); - uint8_t *buffer_ptr = (uint8_t *)info.ptr; - uint64_t size = img.md->nelement * dt.asize; + uint64_t size = img.md->nelement * ImageStreamIO_typesize(datatype); img.md->write = 1; // set this flag to 1 when writing data void *current_image = img.array.raw; if (img.md->location == -1) { - memcpy(current_image, buffer_ptr, size); + memcpy(current_image, b.data(), size); } else { #ifdef HAVE_CUDA cudaSetDevice(img.md->location); - cudaMemcpy(current_image, buffer_ptr, size, cudaMemcpyHostToDevice); + cudaMemcpy(current_image, b.data(), size, cudaMemcpyHostToDevice); #else throw std::runtime_error( "unsupported location, CACAO needs to be compiled with -DUSE_CUDA=ON"); #endif } - ImageStreamIO_sempost(&img, -1); - clock_gettime(CLOCK_ISIO, &img.md->lastaccesstime); - img.md->write = 0; // Done writing data - img.md->cnt0++; - img.md->cnt1++; + ImageStreamIO_UpdateIm(&img); } -PYBIND11_MODULE(ImageStreamIOWrap, m) { +NB_MODULE(ImageStreamIOWrap, m) { m.doc() = "CACAO ImageStreamIO python module"; +#ifdef COVERAGE_BUILD + m.def("_gcov_dump", &_gcov_dump); +#endif + auto imageDatatype = - py::class_(m, "ImageStreamIODataType") - .def(py::init([](uint8_t datatype) { - return std::unique_ptr( - new ImageStreamIODataType(datatype)); - })) - .def_readonly("size", &ImageStreamIODataType::asize) - .def_readonly("type", &ImageStreamIODataType::datatype) + nb::class_(m, "ImageStreamIODataType") + .def(nb::init()) + .def_ro("size", &ImageStreamIODataType::asize) + .def_ro("type", &ImageStreamIODataType::datatype) .def("__repr__", [](const ImageStreamIODataType &img_datatype) { std::ostringstream tmp_str; tmp_str << "datatype: " << img_datatype.datatype << std::endl; @@ -271,7 +229,7 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { return tmp_str.str(); }); - py::enum_(imageDatatype, "Type") + nb::enum_(imageDatatype, "Type") .value("UINT8", ImageStreamIODataType::DataType::UINT8) .value("INT8", ImageStreamIODataType::DataType::INT8) .value("UINT16", ImageStreamIODataType::DataType::UINT16) @@ -288,13 +246,10 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { .export_values(); auto imagetype = - py::class_(m, "ImageStreamIOType") - .def(py::init([](uint8_t type) { - return std::unique_ptr( - new ImageStreamIOType(type)); - })) - .def_property_readonly("axis", &ImageStreamIOType::get_axis) - .def_property_readonly("type", &ImageStreamIOType::get_type) + nb::class_(m, "ImageStreamIOType") + .def(nb::init()) + .def_prop_ro("axis", &ImageStreamIOType::get_axis) + .def_prop_ro("type", &ImageStreamIOType::get_type) .def("__repr__", [](const ImageStreamIOType &image_type) { std::ostringstream tmp_str; tmp_str << "type: " << image_type.get_type() << std::endl; @@ -302,7 +257,7 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { return tmp_str.str(); }); - py::enum_(imagetype, "Type") + nb::enum_(imagetype, "Type") .value("CIRCULAR_BUFFER_TYPE", ImageStreamIOType::Type::CIRCULAR_BUFFER_TYPE) .value("MATH_DATA_TYPE", ImageStreamIOType::Type::MATH_DATA_TYPE) @@ -318,97 +273,98 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { .export_values(); // IMAGE_KEYWORD interface - py::class_(m, "Image_kw") - .def(py::init( - []() { return std::unique_ptr(new IMAGE_KEYWORD()); })) - .def(py::init([](std::string name, int64_t numl, std::string comment) { + nb::class_(m, "Image_kw") + .def(nb::init<>()) + .def("__init__", + [](IMAGE_KEYWORD *kw, std::string name, int64_t numl, + std::string comment) { if (name.size() > KEYWORD_MAX_STRING) { throw std::invalid_argument("name too long"); } if (comment.size() > KEYWORD_MAX_COMMENT) { throw std::invalid_argument("comment too long"); } - auto kw = std::unique_ptr(new IMAGE_KEYWORD()); + new (kw) IMAGE_KEYWORD(); std::copy(name.begin(), name.end(), kw->name); kw->type = 'L'; kw->value.numl = numl; std::copy(comment.begin(), comment.end(), kw->comment); - return kw; - }), - py::arg("name"), py::arg("numl"), py::arg("comment") = "") - .def(py::init([](std::string name, double numf, std::string comment) { + }, + nb::arg("name"), nb::arg("numl"), nb::arg("comment") = "") + .def("__init__", + [](IMAGE_KEYWORD *kw, std::string name, double numf, + std::string comment) { if (name.size() > KEYWORD_MAX_STRING) { throw std::invalid_argument("name too long"); } if (comment.size() > KEYWORD_MAX_COMMENT) { throw std::invalid_argument("comment too long"); } - auto kw = std::unique_ptr(new IMAGE_KEYWORD()); + new (kw) IMAGE_KEYWORD(); std::copy(name.begin(), name.end(), kw->name); kw->type = 'D'; kw->value.numf = numf; std::copy(comment.begin(), comment.end(), kw->comment); - return kw; - }), - py::arg("name"), py::arg("numf"), py::arg("comment") = "") - .def(py::init( - [](std::string name, std::string valstr, std::string comment) { - if (name.size() > KEYWORD_MAX_STRING) { - throw std::invalid_argument("name too long"); - } - if (valstr.size() > KEYWORD_MAX_STRING) { - throw std::invalid_argument("valstr too long"); - } - if (comment.size() > KEYWORD_MAX_COMMENT) { - throw std::invalid_argument("comment too long"); - } - auto kw = std::unique_ptr(new IMAGE_KEYWORD()); - std::copy(name.begin(), name.end(), kw->name); - kw->type = 'S'; - std::copy(valstr.begin(), valstr.end(), kw->value.valstr); - std::copy(comment.begin(), comment.end(), kw->comment); - return kw; - }), - py::arg("name"), py::arg("valstr"), py::arg("comment") = "") - .def_readonly("name", &IMAGE_KEYWORD::name) - .def_readonly("type", &IMAGE_KEYWORD::type) - .def_property_readonly("value", - [](const IMAGE_KEYWORD &kw) -> py::object { + }, + nb::arg("name"), nb::arg("numf"), nb::arg("comment") = "") + .def("__init__", + [](IMAGE_KEYWORD *kw, std::string name, std::string valstr, + std::string comment) { + if (name.size() > KEYWORD_MAX_STRING) { + throw std::invalid_argument("name too long"); + } + if (valstr.size() > KEYWORD_MAX_STRING) { + throw std::invalid_argument("valstr too long"); + } + if (comment.size() > KEYWORD_MAX_COMMENT) { + throw std::invalid_argument("comment too long"); + } + new (kw) IMAGE_KEYWORD(); + std::copy(name.begin(), name.end(), kw->name); + kw->type = 'S'; + std::copy(valstr.begin(), valstr.end(), kw->value.valstr); + std::copy(comment.begin(), comment.end(), kw->comment); + }, + nb::arg("name"), nb::arg("valstr"), nb::arg("comment") = "") + .def_ro("name", &IMAGE_KEYWORD::name) + .def_ro("type", &IMAGE_KEYWORD::type) + .def_prop_ro("value", + [](const IMAGE_KEYWORD &kw) -> nb::object { switch (kw.type) { case 'L': - return py::int_(kw.value.numl); + return nb::int_(kw.value.numl); case 'D': - return py::float_(kw.value.numf); + return nb::float_(kw.value.numf); case 'S': - return py::str(kw.value.valstr); + return nb::str(kw.value.valstr); default: throw std::runtime_error("Unknown format"); } }) .def("__str__", [](const IMAGE_KEYWORD &kw) { return toString(kw); }) .def("__repr__", [](const IMAGE_KEYWORD &kw) { return toString(kw); }) - .def_readonly("comment", &IMAGE_KEYWORD::comment); + .def_ro("comment", &IMAGE_KEYWORD::comment); // STREAM_PROC_TRACE interface - py::class_(m, "Proc_trace") - .def_readonly("triggermode", &STREAM_PROC_TRACE::triggermode) - .def_readonly("pid_write", &STREAM_PROC_TRACE::procwrite_PID) - .def_readonly("trigger_inode", &STREAM_PROC_TRACE::trigger_inode) - .def_readonly("ts_procstart", &STREAM_PROC_TRACE::ts_procstart) - .def_readonly("ts_streamupdate", &STREAM_PROC_TRACE::ts_streamupdate) - .def_readonly("trigger_semindex", &STREAM_PROC_TRACE::trigsemindex) - .def_readonly("trigger_status", &STREAM_PROC_TRACE::triggerstatus) - .def_readonly("cnt0", &STREAM_PROC_TRACE::cnt0); + nb::class_(m, "Proc_trace") + .def_ro("triggermode", &STREAM_PROC_TRACE::triggermode) + .def_ro("pid_write", &STREAM_PROC_TRACE::procwrite_PID) + .def_ro("trigger_inode", &STREAM_PROC_TRACE::trigger_inode) + .def_ro("ts_procstart", &STREAM_PROC_TRACE::ts_procstart) + .def_ro("ts_streamupdate", &STREAM_PROC_TRACE::ts_streamupdate) + .def_ro("trigger_semindex", &STREAM_PROC_TRACE::trigsemindex) + .def_ro("trigger_status", &STREAM_PROC_TRACE::triggerstatus) + .def_ro("cnt0", &STREAM_PROC_TRACE::cnt0); // IMAGE_METADATA interface - py::class_(m, "Image_md") - // .def(py::init([]() { + nb::class_(m, "Image_md") + // .def(nb::init([]() { // return std::unique_ptr(new IMAGE_METADATA()); // })) - .def_readonly("version", &IMAGE_METADATA::version) - .def_readonly("name", &IMAGE_METADATA::name) - .def_readonly("naxis", &IMAGE_METADATA::naxis) - .def_property_readonly("size", + .def_ro("version", &IMAGE_METADATA::version) + .def_ro("name", &IMAGE_METADATA::name) + .def_ro("naxis", &IMAGE_METADATA::naxis) + .def_prop_ro("size", [](const IMAGE_METADATA &md) { std::vector dims(md.naxis); const uint32_t *ptr = md.size; @@ -418,54 +374,44 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { } return dims; }) - .def_readonly("nelement", &IMAGE_METADATA::nelement) - .def_property_readonly( + .def_ro("nelement", &IMAGE_METADATA::nelement) + .def_prop_ro( "datatype", [](const IMAGE_METADATA &md) { return ImageStreamIODataType(md.datatype).datatype; }) - .def_property_readonly( + .def_prop_ro( "imagetype", [](const IMAGE_METADATA &md) { return ImageStreamIOType(md.imagetype).get_type(); }) - .def_property_readonly( + .def_prop_ro( "creationtime", [](const IMAGE_METADATA &md) { - auto creation_time = - std::chrono::seconds{md.creationtime.tv_sec} + - std::chrono::nanoseconds{md.creationtime.tv_nsec}; - std::chrono::system_clock::time_point tp{creation_time}; - return tp; + return (double)md.creationtime.tv_sec + + (double)md.creationtime.tv_nsec * 1e-9; }) - .def_property_readonly( + .def_prop_ro( "lastaccesstime", [](const IMAGE_METADATA &md) { - auto creation_time = - std::chrono::seconds{md.lastaccesstime.tv_sec} + - std::chrono::nanoseconds{md.lastaccesstime.tv_nsec}; - std::chrono::system_clock::time_point tp{creation_time}; - return tp; + return (double)md.lastaccesstime.tv_sec + + (double)md.lastaccesstime.tv_nsec * 1e-9; }) - .def_property_readonly( + .def_prop_ro( "acqtime", [](const IMAGE_METADATA &md) { - auto acqtime = std::chrono::seconds{md.atime.tv_sec} + - std::chrono::nanoseconds{md.atime.tv_nsec}; - std::chrono::system_clock::time_point tp{acqtime}; - return tp; + return (double)md.atime.tv_sec + + (double)md.atime.tv_nsec * 1e-9; }) - .def_property_readonly( + .def_prop_ro( "writetime", [](const IMAGE_METADATA &md) { - auto writetime = std::chrono::seconds{md.writetime.tv_sec} + - std::chrono::nanoseconds{md.writetime.tv_nsec}; - std::chrono::system_clock::time_point tp{writetime}; - return tp; + return (double)md.writetime.tv_sec + + (double)md.writetime.tv_nsec * 1e-9; }) - .def_readonly("shared", &IMAGE_METADATA::shared) - .def_readonly("location", &IMAGE_METADATA::location) - .def_property_readonly("location_str", + .def_ro("shared", &IMAGE_METADATA::shared) + .def_ro("location", &IMAGE_METADATA::location) + .def_prop_ro("location_str", [](const IMAGE_METADATA &md) { if (md.location < 0) { return std::string("CPU RAM"); @@ -475,16 +421,16 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { tmp_str << "GPU" << int(md.location) << " RAM"; return tmp_str.str(); }) - .def_readonly("status", &IMAGE_METADATA::status) - .def_readonly("inode", &IMAGE_METADATA::inode) - .def_readonly("logflag", &IMAGE_METADATA::logflag) - .def_readonly("sem", &IMAGE_METADATA::sem) - .def_readonly("cnt0", &IMAGE_METADATA::cnt0) - .def_readonly("cnt1", &IMAGE_METADATA::cnt1) - .def_readonly("cnt2", &IMAGE_METADATA::cnt2) - .def_readonly("write", &IMAGE_METADATA::write) - .def_readonly("flag", &IMAGE_METADATA::flag) - .def_readonly("NBkw", &IMAGE_METADATA::NBkw) + .def_ro("status", &IMAGE_METADATA::status) + .def_ro("inode", &IMAGE_METADATA::inode) + .def_ro("logflag", &IMAGE_METADATA::logflag) + .def_ro("sem", &IMAGE_METADATA::sem) + .def_ro("cnt0", &IMAGE_METADATA::cnt0) + .def_ro("cnt1", &IMAGE_METADATA::cnt1) + .def_ro("cnt2", &IMAGE_METADATA::cnt2) + .def_ro("write", &IMAGE_METADATA::write) + .def_ro("flag", &IMAGE_METADATA::flag) + .def_ro("NBkw", &IMAGE_METADATA::NBkw) .def("__repr__", [](const IMAGE_METADATA &md) { std::ostringstream tmp_str; tmp_str << "Name: " << md.name << std::endl; @@ -497,28 +443,22 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { tmp_str << "nelement: " << md.nelement << std::endl; // tmp_str << "datatype: " << md.datatype << std::endl; // tmp_str << "imagetype: " << md.imagetype << std::endl; - { - auto creationtime = std::chrono::seconds{md.creationtime.tv_sec} + - std::chrono::nanoseconds{md.creationtime.tv_nsec}; - std::chrono::system_clock::time_point tp{creationtime}; - std::time_t t = std::chrono::system_clock::to_time_t(tp); + { // FIXME we lost the time decimals in this process ! + std::time_t t = (std::time_t)md.creationtime.tv_sec; tmp_str << "creationtime: " << std::ctime(&t); } { - auto lastaccesstime = - std::chrono::seconds{md.lastaccesstime.tv_sec} + - std::chrono::nanoseconds{md.lastaccesstime.tv_nsec}; - std::chrono::system_clock::time_point tp{lastaccesstime}; - std::time_t t = std::chrono::system_clock::to_time_t(tp); + std::time_t t = (std::time_t)md.lastaccesstime.tv_sec; tmp_str << "lastaccesstime: " << std::ctime(&t); } { - auto acqtime = std::chrono::seconds{md.atime.tv_sec} + - std::chrono::nanoseconds{md.atime.tv_nsec}; - std::chrono::system_clock::time_point tp{acqtime}; - std::time_t t = std::chrono::system_clock::to_time_t(tp); + std::time_t t = (std::time_t)md.atime.tv_sec; tmp_str << "acqtime: " << std::ctime(&t); } + { + std::time_t t = (std::time_t)md.writetime.tv_sec; + tmp_str << "writetime: " << std::ctime(&t); + } tmp_str << "shared: " << int(md.shared) << std::endl; tmp_str << "location: "; if (md.location < 0) { @@ -537,28 +477,27 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { }); // IMAGE interface - py::class_(m, "Image", py::buffer_protocol()) - .def(py::init([]() { return std::unique_ptr(new IMAGE()); })) - .def_readonly("used", &IMAGE::used) - .def_readonly("memsize", &IMAGE::memsize) - .def_readonly("md", &IMAGE::md) - .def_readonly("streamproctrace0", &IMAGE::streamproctrace) - .def_property_readonly( + nb::class_(m, "Image") + .def(nb::init<>()) + .def_ro("used", &IMAGE::used) + .def_ro("memsize", &IMAGE::memsize) + .def_ro("md", &IMAGE::md) + .def_ro("streamproctrace0", &IMAGE::streamproctrace) + .def_prop_ro( "shape", [](const IMAGE &img) { if (img.array.raw == nullptr) { throw std::runtime_error("image not initialized"); } - py::tuple dims(img.md->naxis); + std::vector dims(img.md->naxis); const uint32_t *ptr = img.md->size; - // std::copy(ptr, ptr + img.md->naxis, dims); for (int i{}; i < img.md->naxis; ++i) { dims[i] = ptr[i]; } return dims; }) - .def_property_readonly( + .def_prop_ro( "semReadPID", [](const IMAGE &img) { if (img.array.raw == nullptr) { @@ -570,7 +509,7 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { } return semReadPID; }) - .def_property_readonly( + .def_prop_ro( "acqtimearray", [](const IMAGE &img) { if (img.array.raw == nullptr) { @@ -579,18 +518,14 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { if (img.atimearray == NULL) { throw std::runtime_error("acqtimearray not initialized"); } - std::vector acqtimearray( - img.md->size[2]); + std::vector acqtimearray(img.md->size[2]); for (int i = 0; i < img.md->size[2]; ++i) { - auto acqtime = - std::chrono::seconds{img.atimearray[i].tv_sec} + - std::chrono::nanoseconds{img.atimearray[i].tv_nsec}; - std::chrono::system_clock::time_point tp{acqtime}; - acqtimearray[i] = tp; + acqtimearray[i] = (double)img.atimearray[i].tv_sec + + (double)img.atimearray[i].tv_nsec * 1e-9; } return acqtimearray; }) - .def_property_readonly( + .def_prop_ro( "writetimearray", [](const IMAGE &img) { if (img.array.raw == nullptr) { @@ -599,18 +534,14 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { if (img.writetimearray == NULL) { throw std::runtime_error("writetimearray not initialized"); } - std::vector writetimearray( - img.md->size[2]); + std::vector writetimearray(img.md->size[2]); for (int i = 0; i < img.md->size[2]; ++i) { - auto writetime = - std::chrono::seconds{img.writetimearray[i].tv_sec} + - std::chrono::nanoseconds{img.writetimearray[i].tv_nsec}; - std::chrono::system_clock::time_point tp{writetime}; - writetimearray[i] = tp; + writetimearray[i] = (double)img.writetimearray[i].tv_sec + + (double)img.writetimearray[i].tv_nsec * 1e-9; } return writetimearray; }) - .def_property_readonly( + .def_prop_ro( "cntarray", [](const IMAGE &img) { if (img.array.raw == nullptr) { @@ -626,7 +557,7 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { return cntarray; }) // TODO: fix flagarray never allocated and cause segfaults - // .def_property_readonly( + // .def_prop_ro( // "flagarray", // [](const IMAGE &img) { // if (img.array.raw == nullptr) { @@ -641,7 +572,7 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { // } // return flagarray; // }) - .def_property_readonly( + .def_prop_ro( "semWritePID", [](const IMAGE &img) { if (img.array.raw == nullptr) { @@ -725,42 +656,8 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { } }) - .def_buffer([](const IMAGE &img) -> py::buffer_info { - if (img.array.raw == nullptr) { - py::print("image not initialized"); - return py::buffer_info(); - } - if (img.md->location >= 0) { - py::print("Can not use this with a GPU buffer"); - return py::buffer_info(); - } - - ImageStreamIODataType dt(img.md->datatype); - std::string format = ImageStreamIODataTypeToPyFormat(dt); - std::vector shape(img.md->naxis); - std::vector strides(img.md->naxis); - ssize_t stride = dt.asize; - - // Row Major representation - // for (int8_t axis(img.md->naxis-1); axis >= 0; --axis) { - // Col Major representation - for (int8_t axis(0); axis < img.md->naxis; ++axis) { - shape[axis] = img.md->size[axis]; - strides[axis] = stride; - stride *= shape[axis]; - } - return py::buffer_info( - img.array.raw, /* Pointer to buffer */ - dt.asize, /* Size of one scalar */ - format, /* Python struct-style format descriptor */ - img.md->naxis, /* Number of dimensions */ - shape, /* Buffer dimensions */ - strides /* Strides (in bytes) for each index */ - ); - }) - .def("copy", - [](const IMAGE &img) -> py::object { + [](const IMAGE &img) -> nb::object { if (img.array.raw == nullptr) throw std::runtime_error("image not initialized"); ImageStreamIODataType dt(img.md->datatype); @@ -792,151 +689,81 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { } }) + .def("view", + [](const IMAGE &img) -> nb::object { + if (img.array.raw == nullptr) + throw std::runtime_error("image not initialized"); + if (img.md->location >= 0) + throw std::runtime_error( + "Cannot create a zero-copy view of a GPU buffer"); + ImageStreamIODataType dt(img.md->datatype); + switch (dt.datatype) { + case ImageStreamIODataType::DataType::UINT8: + return view_img(img); + case ImageStreamIODataType::DataType::INT8: + return view_img(img); + case ImageStreamIODataType::DataType::UINT16: + return view_img(img); + case ImageStreamIODataType::DataType::INT16: + return view_img(img); + case ImageStreamIODataType::DataType::UINT32: + return view_img(img); + case ImageStreamIODataType::DataType::INT32: + return view_img(img); + case ImageStreamIODataType::DataType::UINT64: + return view_img(img); + case ImageStreamIODataType::DataType::INT64: + return view_img(img); + case ImageStreamIODataType::DataType::FLOAT: + return view_img(img); + case ImageStreamIODataType::DataType::DOUBLE: + return view_img(img); + default: + throw std::runtime_error("Not implemented"); + } + }) + .def("update", [](IMAGE &img) { if (img.array.raw == nullptr) throw std::runtime_error("image not initialized"); return ImageStreamIO_UpdateIm(&img); }) - .def("update_atime", [](IMAGE &img, std::chrono::system_clock::time_point &tp) { + .def("update_atime", [](IMAGE &img, double timestamp) { if (img.array.raw == nullptr) throw std::runtime_error("image not initialized"); - auto secs = std::chrono::time_point_cast(tp); - auto ns = std::chrono::time_point_cast(tp) - - std::chrono::time_point_cast(secs); - - struct timespec atime = {secs.time_since_epoch().count(), ns.count()}; - + struct timespec atime; + atime.tv_sec = (time_t)timestamp; + atime.tv_nsec = (long)((timestamp - (double)atime.tv_sec) * 1e9); return ImageStreamIO_UpdateIm_atime(&img, &atime); }) - .def("write", &write, + .def("write", &write_img, R"pbdoc( Write into memory image stream Parameters: buffer [in]: buffer to put into memory image stream )pbdoc", - py::arg("buffer")) - - .def("write", &write, - R"pbdoc( - Write into memory image stream - Parameters: - buffer [in]: buffer to put into memory image stream - )pbdoc", - py::arg("buffer")) - - .def("write", &write, - R"pbdoc( - Write into memory image stream - Parameters: - buffer [in]: buffer to put into memory image stream - )pbdoc", - py::arg("buffer")) - - .def("write", &write, - R"pbdoc( - Write into memory image stream - Parameters: - buffer [in]: buffer to put into memory image stream - )pbdoc", - py::arg("buffer")) - - .def("write", &write, - R"pbdoc( - Write into memory image stream - Parameters: - buffer [in]: buffer to put into memory image stream - )pbdoc", - py::arg("buffer")) - - .def("write", &write, - R"pbdoc( - Write into memory image stream - Parameters: - buffer [in]: buffer to put into memory image stream - )pbdoc", - py::arg("buffer")) - - .def("write", &write, - R"pbdoc( - Write into memory image stream - Parameters: - buffer [in]: buffer to put into memory image stream - )pbdoc", - py::arg("buffer")) - - .def("write", &write, - R"pbdoc( - Write into memory image stream - Parameters: - buffer [in]: buffer to put into memory image stream - )pbdoc", - py::arg("buffer")) - - .def("write", &write, - R"pbdoc( - Write into memory image stream - Parameters: - buffer [in]: buffer to put into memory image stream - )pbdoc", - py::arg("buffer")) - - .def("write", &write, - R"pbdoc( - Write into memory image stream - Parameters: - buffer [in]: buffer to put into memory image stream - )pbdoc", - py::arg("buffer")) + nb::arg("buffer")) .def( "create", - [](IMAGE &img, const std::string &name, const py::buffer &buffer, + [](IMAGE &img, const std::string &name, + nb::ndarray buffer, int8_t location, uint8_t shared, int NBsem, int NBkw, uint64_t imagetype, uint32_t CBsize) { - py::buffer_info info = buffer.request(); - - auto buf = pybind11::array::ensure(buffer); - - if (!buf) { - throw std::invalid_argument("input buffer is not an np.array"); - } - - uint8_t datatype = PyFormatToImageStreamIODataType(info); + uint8_t datatype = NdarrayDtypeToImageStreamIODataType(buffer); - uint32_t dims[buf.ndim()]; - for (int i = 0; i < buf.ndim(); ++i) { - dims[i] = buf.shape()[i]; + uint32_t dims[buffer.ndim()]; + for (size_t i = 0; i < buffer.ndim(); ++i) { + dims[i] = buffer.shape(i); } int res = ImageStreamIO_createIm_gpu( - &img, name.c_str(), buf.ndim(), dims, datatype, location, + &img, name.c_str(), buffer.ndim(), dims, datatype, location, shared, NBsem, NBkw, imagetype, CBsize); if (res == 0) { - if (info.item_type_is_equivalent_to()) { - write(img, buffer); - } else if (info.item_type_is_equivalent_to()) { - write(img, buffer); - } else if (info.item_type_is_equivalent_to()) { - write(img, buffer); - } else if (info.item_type_is_equivalent_to()) { - write(img, buffer); - } else if (info.item_type_is_equivalent_to()) { - write(img, buffer); - } else if (info.item_type_is_equivalent_to()) { - write(img, buffer); - } else if (info.item_type_is_equivalent_to()) { - write(img, buffer); - } else if (info.item_type_is_equivalent_to()) { - write(img, buffer); - } else if (info.item_type_is_equivalent_to()) { - write(img, buffer); - } else if (info.item_type_is_equivalent_to()) { - write(img, buffer); - } else { - throw std::invalid_argument("IMAGE::create -- unsupported array datatype"); - } + write_img(img, buffer); } return res; }, @@ -954,10 +781,10 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { Return: ret [out]: error code )pbdoc", - py::arg("name"), py::arg("buffer"), py::arg("location") = -1, - py::arg("shared") = 1, py::arg("NBsem") = IMAGE_NB_SEMAPHORE, - py::arg("NBkw") = 1, py::arg("imagetype") = MATH_DATA, - py::arg("CBsize") = 0) + nb::arg("name"), nb::arg("buffer"), nb::arg("location") = -1, + nb::arg("shared") = 1, nb::arg("NBsem") = IMAGE_NB_SEMAPHORE, + nb::arg("NBkw") = 1, nb::arg("imagetype") = MATH_DATA, + nb::arg("CBsize") = 0) // .def( // "create", @@ -989,9 +816,9 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { // Return: // ret [out]: error code // )pbdoc", - // py::arg("name"), py::arg("dims"), - // py::arg("datatype") = ImageStreamIODataType::DataType::FLOAT, - // py::arg("shared") = 1, py::arg("NBkw") = 1) + // nb::arg("name"), nb::arg("dims"), + // nb::arg("datatype") = ImageStreamIODataType::DataType::FLOAT, + // nb::arg("shared") = 1, nb::arg("NBkw") = 1) // .def( // "create", @@ -1025,11 +852,11 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { // Return: // ret [out]: error code // )pbdoc", - // py::arg("name"), py::arg("dims"), - // py::arg("datatype") = ImageStreamIODataType::DataType::FLOAT, - // py::arg("location") = -1, py::arg("shared") = 1, - // py::arg("NBsem") = IMAGE_NB_SEMAPHORE, py::arg("NBkw") = 1, - // py::arg("imagetype") = MATH_DATA) + // nb::arg("name"), nb::arg("dims"), + // nb::arg("datatype") = ImageStreamIODataType::DataType::FLOAT, + // nb::arg("location") = -1, nb::arg("shared") = 1, + // nb::arg("NBsem") = IMAGE_NB_SEMAPHORE, nb::arg("NBkw") = 1, + // nb::arg("imagetype") = MATH_DATA) .def( "open", @@ -1043,7 +870,7 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { Return: ret [out]: error code )pbdoc", - py::arg("name")) + nb::arg("name")) .def( "close", @@ -1071,7 +898,7 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { }, R"pbdoc( For a shared image: - Closes all semaphores, deallcoates sem pointers, + Closes all semaphores, deallocates sem pointers, and removes associated files. Unmaps the shared memory segment, and finally removes the file. Sets the metadata and keyword pointers to NULL. @@ -1097,7 +924,7 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { Return: ret [out]: semaphore index available )pbdoc", - py::arg("index")) + nb::arg("index")) .def( "semwait", @@ -1114,7 +941,7 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { Return: ret [out]: error code )pbdoc", - py::arg("index")) + nb::arg("index")) .def( "semtimedwait", @@ -1136,7 +963,7 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { Return: ret [out]: error code )pbdoc", - py::arg("index"), py::arg("timeoutsec")) + nb::arg("index"), nb::arg("timeoutsec")) .def( "semtrywait", @@ -1153,7 +980,7 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { Return: ret [out]: error code )pbdoc", - py::arg("index")) + nb::arg("index")) .def( "sempost", @@ -1170,7 +997,7 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { Return: ret [out]: error code )pbdoc", - py::arg("index") = -1) + nb::arg("index") = -1) .def( "semflush", @@ -1187,7 +1014,7 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { Return: ret [out]: error code )pbdoc", - py::arg("index")) + nb::arg("index")) .def( "semvalue", @@ -1204,5 +1031,5 @@ PYBIND11_MODULE(ImageStreamIOWrap, m) { Return: ret [out]: error code )pbdoc", - py::arg("index")); + nb::arg("index")); }