diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 62f2477112c207a9743a66fca95580b3f06e3f2f..276a1841982bf1ef8d4393a6e8e780e7ccffe24f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -16,7 +16,9 @@ variables: # Configuration variables GKFS_LOG_LEVEL: "100" GKFS_DAEMON_LOG_PATH: "${CI_PROJECT_DIR}/logs/daemon.log" - GKFS_PRELOAD_LOG_PATH: "${CI_PROJECT_DIR}/logs/preload.log" + LIBGKFS_LOG: "all" + LIBGKFS_LOG_OUTPUT: "${CI_PROJECT_DIR}/logs/gkfs_client.log" + GIT_SUBMODULE_STRATEGY: recursive image: gekkofs/gekkofs:build_env @@ -49,6 +51,7 @@ compile GekkoFS: -Wdev -Wdeprecate -DCMAKE_BUILD_TYPE=Debug + -DRPC_PROTOCOL="ofi+sockets" -DCMAKE_PREFIX_PATH=${DEPS_INSTALL_PATH} -DCMAKE_INSTALL_PREFIX=${INSTALL_PATH} ${CI_PROJECT_DIR} @@ -77,6 +80,7 @@ test wr: - sleep 4 - LD_PRELOAD=${INSTALL_PATH}/lib/libgkfs_intercept.so ${TESTS_BUILD_PATH}/gkfs_test_wr artifacts: + when: on_failure paths: - "${LOG_PATH}" @@ -88,6 +92,7 @@ test directories: - sleep 4 - LD_PRELOAD=${INSTALL_PATH}/lib/libgkfs_intercept.so ${TESTS_BUILD_PATH}/gkfs_test_dir artifacts: + when: on_failure paths: - "${LOG_PATH}" @@ -99,6 +104,7 @@ test truncate: - sleep 4 - LD_PRELOAD=${INSTALL_PATH}/lib/libgkfs_intercept.so ${TESTS_BUILD_PATH}/gkfs_test_truncate artifacts: + when: on_failure paths: - "${LOG_PATH}" @@ -110,6 +116,7 @@ test path resolution: - sleep 4 - LD_PRELOAD=${INSTALL_PATH}/lib/libgkfs_intercept.so ${TESTS_BUILD_PATH}/gkfs_test_path_resolution artifacts: + when: on_failure paths: - "${LOG_PATH}" @@ -121,6 +128,6 @@ test lseek: - sleep 4 - LD_PRELOAD=${INSTALL_PATH}/lib/libgkfs_intercept.so ${TESTS_BUILD_PATH}/gkfs_test_lseek artifacts: + when: on_failure paths: - "${LOG_PATH}" - diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..0a6a93c55f74be6f3b45ce4b38ebdad6a230beae --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "external/hermes"] + path = external/hermes + url = https://github.com/bsc-ssrg/hermes.git diff --git a/CMake/FindDate.cmake b/CMake/FindDate.cmake new file mode 100644 index 0000000000000000000000000000000000000000..c60ce790619ba9c76f2f5e8b0bd27da85b96cfcd --- /dev/null +++ b/CMake/FindDate.cmake @@ -0,0 +1,40 @@ +find_path(DATE_INCLUDE_DIR + NAMES date/date.h +) + +find_path(TZ_INCLUDE_DIR + NAMES date/tz.h +) + +find_library(TZ_LIBRARY + NAMES tz +) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( Date + DEFAULT_MSG + DATE_INCLUDE_DIR + TZ_INCLUDE_DIR + TZ_LIBRARY +) + +if(Date_FOUND) + set(DATE_INCLUDE_DIRS ${DATE_INCLUDE_DIR}) + set(TZ_INCLUDE_DIRS ${TZ_INCLUDE_DIR}) + set(TZ_LIBRARIES ${TZ_LIBRARY}) + + if(NOT TARGET Date::TZ) + add_library(Date::TZ UNKNOWN IMPORTED) + set_target_properties(Date::TZ PROPERTIES + IMPORTED_LOCATION "${TZ_LIBRARY}" + INTERFACE_COMPILE_DEFINITIONS "USE_OS_TZDB=1" + INTERFACE_INCLUDE_DIRECTORIES "${TZ_INCLUDE_DIR}" + ) + endif() +endif() + +mark_as_advanced( + DATE_INCLUDE_DIR + TZ_INCLUDE_DIR + TZ_LIBRARY +) diff --git a/CMakeLists.txt b/CMakeLists.txt index 04901c4cb9f7c20bcd96729209ab3f892d246d6a..30d7747011454a60e4075902271438dfe4a40ad2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,11 +25,15 @@ ENDIF (NOT CMAKE_BUILD_TYPE) message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") # Compiler flags for various cmake build types -set(WARNINGS_FLAGS "-Wall -Wextra --pedantic -Wno-unused-parameter") +set(WARNINGS_FLAGS "-Wall -Wextra --pedantic -Wno-unused-parameter -Wno-missing-field-initializers") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG -O3") -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${WARNINGS_FLAGS} -g -O0") +set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${WARNINGS_FLAGS} -g -O0 -DGKFS_DEBUG_BUILD") set(CMAKE_CXX_FLAGS_MEMCHECK "${WARNINGS_FLAGS} -g -O0 -fsanitize=address -fno-omit-frame-pointer") set(CMAKE_CXX_FLAGS_MAINTAINER "${WARNINGS_FLAGS} -g -O0 -pg -no-pie") +set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -DNDEBUG -O3") +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${WARNINGS_FLAGS} -g -O0 -DGKFS_DEBUG_BUILD") +set(CMAKE_C_FLAGS_MEMCHECK "${WARNINGS_FLAGS} -g -O0 -fsanitize=address -fno-omit-frame-pointer") +set(CMAKE_C_FLAGS_MAINTAINER "${WARNINGS_FLAGS} -g -O0 -pg -no-pie") mark_as_advanced(CMAKE_CXX_FLAGS_MAINTAINER) # Project version @@ -94,6 +98,8 @@ find_package(Boost 1.53 REQUIRED find_package(Threads REQUIRED) +find_package(Date REQUIRED) + set(RPC_PROTOCOL "ofi+tcp" CACHE STRING "Communication plugin used for RPCs") set_property(CACHE RPC_PROTOCOL PROPERTY STRINGS "bmi+tcp" @@ -112,6 +118,29 @@ if(SYMLINK_SUPPORT) endif() message(STATUS "Symlink support: ${SYMLINK_SUPPORT}") +set(MAX_INTERNAL_FDS 256 CACHE STRING "Number of file descriptors reserved for internal use") +add_definitions(-DMAX_INTERNAL_FDS=${MAX_INTERNAL_FDS}) +message(STATUS "File descriptors reserved for internal use: ${MAX_INTERNAL_FDS}") + +execute_process(COMMAND getconf OPEN_MAX + OUTPUT_VARIABLE GETCONF_MAX_FDS + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET) +if(NOT GETCONF_MAX_FDS) + set(GETCONF_MAX_FDS=512) +endif() +add_definitions(-DMAX_OPEN_FDS=${GETCONF_MAX_FDS}) + +option(ENABLE_CLIENT_LOG "Enable logging messages" ON) +if(ENABLE_CLIENT_LOG) + set(CLIENT_LOG_MESSAGE_SIZE 1024 CACHE STRING "Maximum size of a log message in the client library") + add_definitions(-DLIBGKFS_LOG_MESSAGE_SIZE=${CLIENT_LOG_MESSAGE_SIZE}) + message(STATUS "Maximum log message size in the client library: ${CLIENT_LOG_MESSAGE_SIZE}") +else() + add_definitions(-DGKFS_DISABLE_LOGGING) +endif() +message(STATUS "Client logging output: ${ENABLE_CLIENT_LOGGING}") + configure_file(include/global/configure.hpp.in include/global/configure.hpp) # Imported target @@ -153,6 +182,13 @@ set_target_properties(fmt INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_SOURCE_DIR}/external/spdlog" ) +add_library(hermes INTERFACE) +# we cannot use target_include_directories with CMake < 3.11 +set_target_properties(hermes + PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_SOURCE_DIR}/external/hermes/include" +) + set(INCLUDE_DIR "${CMAKE_SOURCE_DIR}/include") # define include directories that are relevant for all targets diff --git a/README.md b/README.md index c5cb98b956a837fb70509965c8cc554950a96e0c..4918f429cef75366e10df3b0811bf7e98f1c020d 100644 --- a/README.md +++ b/README.md @@ -42,51 +42,62 @@ This is a file system. (execute the script for help): ```bash -usage: dl_dep.sh [-h] [-n ] [-c ] +usage: dl_dep.sh [-h] [-l] [-n ] [-c ] [-d ] source_path - + This script gets all GekkoFS dependency sources (excluding the fs itself) - + positional arguments: source_path path where the dependency downloads are put - - + + optional arguments: -h, --help shows this help message and exits + -l, --list-dependencies + list dependencies available for download -n , --na network layer that is used for communication. Valid: {bmi,ofi,all} defaults to 'all' -c , --cluster additional configurations for specific compute clusters - supported clusters: {mogon1,fh2} + supported clusters: {mogon1,mogon2,fh2} + -d , --dependency + download a specific dependency. If unspecified + all dependencies are built and installed. ``` - Now use the install script to compile them and install them to the desired directory. You can choose the according na_plugin (execute the script for help): ```bash -usage: compile_dep.sh [-h] [-n ] [-c ] [-j ] +usage: compile_dep.sh [-h] [-l] [-n ] [-c ] [-d ] [-j ] source_path install_path - + This script compiles all GekkoFS dependencies (excluding the fs itself) - + positional arguments: source_path path to the cloned dependencies path from clone_dep.sh install_path path to the install path of the compiled dependencies - - + + optional arguments: - -h, --help shows this help message and exits + -h, --help shows this help message and exits + -l, --list-dependencies + list dependencies available for building and installation -n , --na network layer that is used for communication. Valid: {bmi,ofi,all} defaults to 'all' -c , --cluster additional configurations for specific compute clusters supported clusters: {mogon1,mogon2,fh2} + -d , --dependency + build and install a specific dependency. If unspecified + all dependencies are built and installed. -j , --compilecores - number of cores that are used to compile the depdencies + number of cores that are used to compile the dependencies defaults to number of available cores + -t, --test Perform libraries tests. ``` ## Compile GekkoFS @@ -127,15 +138,54 @@ Run the application with the preload library: `LD_PRELOAD=/build/lib/libio an MPI application use the `{mpirun, mpiexec} -x` argument. ### Logging -To enable logging the following environment variables are used: -GKFS_PRELOAD_LOG_PATH="" to set the path to the logging file of the client library. -GKFS_DAEMON_LOG_PATH="" to set the path to the logging file of the daemon. -GKFS_LOG_LEVEL={off,critical,err,warn,info,debug,trace} to set the trace level verbosity. -Numbers from 0-6 may also be used where as 0 is off and 6 represents trace. +The following environment variables can be used to enable logging in the client +library: `LIBGKFS_LOG=` and `LIBGKFS_LOG_OUTPUT=` to +configure the output module and set the path to the log file of the client +library. If not path is specified in `LIBGKFS_LOG_OUTPUT`, the client library +will send log messages to `/tmp/gkfs_client.log`. + +The following modules are available: + + - `none`: don't print any messages + - `syscalls`: Trace system calls: print the name of each system call, its + arguments, and its return value. All system calls are printed after being + executed save for those that may not return, such as `execve()`, + `execve_at()`, `exit()`, and `exit_group()`. This module will only be + available if the client library is built in `Debug` mode. + - `syscalls_at_entry`: Trace system calls: print the name of each system call + and its arguments. All system calls are printed before being executed and + therefore their return values are not available in the log. This module will + only be available if the client library is built in `Debug` mode. + - `info`: Print information messages. + - `critical`: Print critical errors. + - `errors`: Print errors. + - `warnings`: Print warnings. + - `mercury`: Print Mercury messages. + - `debug`: Print debug messages. This module will only be available if the + client library is built in `Debug` mode. + - `most`: All previous options combined except `syscalls_at_entry`. This + module will only be available if the client library is built in `Debug` + mode. + - `all`: All previous options combined. + - `help`: Print a help message and exit. + +When tracing sytem calls, specific syscalls can be removed from log messages by +setting the `LIBGKFS_LOG_SYSCALL_FILTER` environment variable. For instance, +setting it to `LIBGKFS_LOG_SYSCALL_FILTER=epoll_wait,epoll_create` will filter +out any log entries from the `epoll_wait()` and `epoll_create()` system calls. + +Additionally, setting the `LIBGKFS_LOG_OUTPUT_TRUNC` environment variable with +a value different from `0` will instruct the logging subsystem to truncate +the file used for logging, rather than append to it. + +For the daemon, the `GKFS_DAEMON_LOG_PATH=` environment variable +can be provided to set the path to the log file, and the log module can be +selected with the `GKFS_LOG_LEVEL={off,critical,err,warn,info,debug,trace}` +environment variable. ### Acknowledgment This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). -This software was partially supported by the ADA-FS project under the SPPEXA project funded by the DFG. \ No newline at end of file +This software was partially supported by the ADA-FS project under the SPPEXA project funded by the DFG. diff --git a/external/hermes b/external/hermes new file mode 160000 index 0000000000000000000000000000000000000000..0c62b3319e660a5a30d0ad24a8ddaec8924b6388 --- /dev/null +++ b/external/hermes @@ -0,0 +1 @@ +Subproject commit 0c62b3319e660a5a30d0ad24a8ddaec8924b6388 diff --git a/include/client/adafs_functions.hpp b/include/client/adafs_functions.hpp index 2c304f61603fc5ea3475a8e578282487298ff674..6e82913e8b6cef78248030f6cd30573e682d5106 100644 --- a/include/client/adafs_functions.hpp +++ b/include/client/adafs_functions.hpp @@ -69,6 +69,10 @@ int getdents(unsigned int fd, struct linux_dirent *dirp, unsigned int count); +int getdents64(unsigned int fd, + struct linux_dirent64 *dirp, + unsigned int count); + int adafs_rmdir(const std::string& path); #endif //IFS_ADAFS_FUNCTIONS_HPP diff --git a/include/client/env.hpp b/include/client/env.hpp new file mode 100644 index 0000000000000000000000000000000000000000..47e4b3692e98d163030921a4e94733fbca3a9c59 --- /dev/null +++ b/include/client/env.hpp @@ -0,0 +1,42 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GKFS_CLIENT_ENV +#define GKFS_CLIENT_ENV + +#include + +#define ADD_PREFIX(str) CLIENT_ENV_PREFIX str + +/* Environment variables for the GekkoFS client */ +namespace gkfs { +namespace env { + +static constexpr auto LOG = ADD_PREFIX("LOG"); + +#ifdef GKFS_DEBUG_BUILD +static constexpr auto LOG_SYSCALL_FILTER = ADD_PREFIX("LOG_SYSCALL_FILTER"); +#endif + +static constexpr auto LOG_OUTPUT = ADD_PREFIX("LOG_OUTPUT"); +static constexpr auto LOG_OUTPUT_TRUNC = ADD_PREFIX("LOG_OUTPUT_TRUNC"); +static constexpr auto CWD = ADD_PREFIX("CWD"); +static constexpr auto HOSTS_FILE = ADD_PREFIX("HOSTS_FILE"); + +} // namespace env +} // namespace gkfs + +#undef ADD_PREFIX + +#endif // GKFS_CLIENT_ENV + diff --git a/include/client/hooks.hpp b/include/client/hooks.hpp index bf5d621dc6b348962850e12a5826c86a3c6c057e..4e978a795a4248aaa82921df91207faddfe0d642 100644 --- a/include/client/hooks.hpp +++ b/include/client/hooks.hpp @@ -14,9 +14,9 @@ #ifndef IFS_HOOKS_HPP #define IFS_HOOKS_HPP +#include #include - int hook_openat(int dirfd, const char *cpath, int flags, mode_t mode); int hook_close(int fd); int hook_stat(const char* path, struct stat* buf); @@ -41,6 +41,7 @@ int hook_dup(unsigned int fd); int hook_dup2(unsigned int oldfd, unsigned int newfd); int hook_dup3(unsigned int oldfd, unsigned int newfd, int flags); int hook_getdents(unsigned int fd, struct linux_dirent *dirp, unsigned int count); +int hook_getdents64(unsigned int fd, struct linux_dirent64 *dirp, unsigned int count); int hook_mkdirat(int dirfd, const char * cpath, mode_t mode); int hook_fchmodat(int dirfd, const char* path, mode_t mode); int hook_fchmod(unsigned int dirfd, mode_t mode); diff --git a/include/client/intercept.hpp b/include/client/intercept.hpp index 9ba21eeb500a4d7a8e4f916f0d6651b9f74d1c82..f3b590d92aff5913cc679e634ae51043905bd29f 100644 --- a/include/client/intercept.hpp +++ b/include/client/intercept.hpp @@ -14,12 +14,19 @@ #ifndef IFS_INTERCEPT_HPP #define IFS_INTERCEPT_HPP +int +internal_hook_guard_wrapper(long syscall_number, + long arg0, long arg1, long arg2, + long arg3, long arg4, long arg5, + long *syscall_return_value); + int hook_guard_wrapper(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5, long *syscall_return_value); +void start_self_interception(); void start_interception(); void stop_interception(); diff --git a/include/client/logging.hpp b/include/client/logging.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2fe5c89459282944365ac45b7c03da0c3b948092 --- /dev/null +++ b/include/client/logging.hpp @@ -0,0 +1,526 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef LIBGKFS_LOGGING_HPP +#define LIBGKFS_LOGGING_HPP + +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifdef GKFS_DEBUG_BUILD +#include +#endif + +namespace gkfs { +namespace log { + +enum class log_level : short { + print_syscalls = 1 << 0, + print_syscalls_entry = 1 << 1, + print_info = 1 << 2, + print_critical = 1 << 3, + print_errors = 1 << 4, + print_warnings = 1 << 5, + print_mercury = 1 << 6, + print_debug = 1 << 7, + + // for internal use + print_none = 0, + print_all = print_syscalls | print_syscalls_entry | print_info | + print_critical | print_errors | print_warnings | + print_mercury | print_debug, + print_most = print_all & ~print_syscalls_entry, + print_help = 1 << 10 +}; + +inline constexpr log_level +operator&(log_level l1, log_level l2) { + return log_level(static_cast(l1) & + static_cast(l2)); +} + +inline constexpr log_level +operator|(log_level l1, log_level l2) { + return log_level(static_cast(l1) | + static_cast(l2)); +} + +inline constexpr log_level +operator^(log_level l1, log_level l2) { + return log_level(static_cast(l1) ^ + static_cast(l2)); +} + +inline constexpr log_level +operator~(log_level l1) { + return log_level(~static_cast(l1)); +} + +inline constexpr bool +operator!(log_level dm) { + return static_cast(dm) == 0; +} + +inline const log_level& +operator|=(log_level& l1, log_level l2) { + return l1 = l1 | l2; +} + +inline const log_level& +operator&=(log_level& l1, log_level l2) { + return l1 = l1 & l2; +} + +inline const log_level& +operator^=(log_level& l1, log_level l2) { + return l1 = l1 ^ l2; +} + + +static const auto constexpr syscall = log_level::print_syscalls; +static const auto constexpr syscall_at_entry = log_level::print_syscalls_entry; +static const auto constexpr info = log_level::print_info; +static const auto constexpr critical = log_level::print_critical; +static const auto constexpr error = log_level::print_errors; +static const auto constexpr warning = log_level::print_warnings; +static const auto constexpr mercury = log_level::print_mercury; +static const auto constexpr debug = log_level::print_debug; +static const auto constexpr none = log_level::print_none; +static const auto constexpr most = log_level::print_most; +static const auto constexpr all = log_level::print_all; +static const auto constexpr help = log_level::print_help; + +static const auto constexpr level_names = + utils::make_array( + "syscall", + "syscall", // sycall_entry uses the same name as syscall + "info", + "critical", + "error", + "warning", + "mercury", + "debug" +); + +inline constexpr auto +lookup_level_name(log_level l) { + + assert(l != log::none && l != log::help); + + // since all log levels are powers of 2, we can find a name + // very efficiently by counting the number of trailing 0-bits in l + const auto i = __builtin_ctz(static_cast(l)); + assert(i >= 0 && static_cast(i) < level_names.size()); + + return level_names.at(i); +} + + +// forward declaration +struct logger; + +namespace detail { + +template +static inline void +log_buffer(std::FILE* fp, + Buffer&& buffer) { + log_buffer(::fileno(fp), std::forward(buffer)); +} + +template +static inline void +log_buffer(int fd, + Buffer&& buffer) { + + if(fd < 0) { + throw std::runtime_error("Invalid file descriptor"); + } + + ::syscall_no_intercept(SYS_write, fd, buffer.data(), buffer.size()); +} + +static inline void +log_buffer(int fd, + const void* buffer, + std::size_t length) { + if(fd < 0) { + throw std::runtime_error("Invalid file descriptor"); + } + + ::syscall_no_intercept(SYS_write, fd, buffer, length); +} + +/** + * format_timestamp_to - safely format a timestamp for logging messages + * + * This function produes a timestamp that can be used to prefix logging + * messages. Since we are actively intercepting system calls, the formatting + * MUST NOT rely on internal system calls, otherwise we risk recursively + * calling ourselves for each syscall generated. Also, we cannot rely on + * the C formatting functions asctime, ctime, gmtime, localtime, mktime, + * asctime_r, ctime_r, gmtime_r, localtime_r, since they acquire a + * non-reentrant lock to determine the caller's timezone (yes, the assumedly + * reentrant *_r versions of the functions exhibit this problem as well, + * see https://sourceware.org/bugzilla/show_bug.cgi?id=16145). To solve this + * issue and still get readable timestamps, we determine and cache the + * timezone when the logger is created so that the lock is only held once, by + * one thread exactly, and we pass it as an argument whenever we need to + * format a timestamp. If no timezone is provided, we just format the epoch. + * + * NOTE: we use the date C++ library to query the timezone database and + * to format the timestamps. + */ +template +static inline void +format_timestamp_to(Buffer&& buffer, + const date::time_zone * const timezone = nullptr) { + + struct ::timeval tv; + + int rv = ::syscall_no_intercept(SYS_gettimeofday, &tv, NULL); + + if(::syscall_error_code(rv) != 0) { + return; + } + + date::sys_time now{ + std::chrono::seconds{tv.tv_sec} + + std::chrono::microseconds{tv.tv_usec}}; + + if(!timezone) { + fmt::format_to(buffer, "[{}] ", now.time_since_epoch().count()); + return; + } + + fmt::format_to(buffer, "[{}] ", + date::zoned_time{timezone, now}); +} + +template +static inline void +format_syscall_info_to(Buffer&& buffer, + gkfs::syscall::info info) { + + const auto ttid = syscall_no_intercept(SYS_gettid); + fmt::format_to(buffer, "[{}] [syscall] ", ttid); + + char o; + char t; + + switch(gkfs::syscall::origin(info)) { + case gkfs::syscall::from_internal_code: + o = 'i'; + break; + case gkfs::syscall::from_external_code: + o = 'a'; + break; + default: + o = '?'; + break; + } + + switch(gkfs::syscall::target(info)) { + case gkfs::syscall::to_hook: + t = 'h'; + break; + case gkfs::syscall::to_kernel: + t = 'k'; + break; + default: + t = '?'; + break; + } + + const std::array tmp = {'[', o, t, ']', ' '}; + fmt::format_to(buffer, fmt::string_view(tmp.data(), tmp.size())); +} + +} // namespace detail + +enum { max_buffer_size = LIBGKFS_LOG_MESSAGE_SIZE }; + +struct static_buffer : public fmt::basic_memory_buffer { + +protected: + void grow(std::size_t size) override final; +}; + + +struct logger { + + logger(const std::string& opts, + const std::string& path, + bool trunc, +#ifdef GKFS_DEBUG_BUILD + const std::string& filter +#endif + ); + + ~logger(); + + template + inline void + log(log_level level, + const char * const func, + const int lineno, + Args&&... args) { + + if(!(level & log_mask_)) { + return; + } + + static_buffer buffer; + detail::format_timestamp_to(buffer, timezone_); + fmt::format_to(buffer, "[{}] [{}] ", + ::syscall_no_intercept(SYS_gettid), + lookup_level_name(level)); + + if(!!(level & log::debug)) { + fmt::format_to(buffer, "<{}():{}> ", func, lineno); + } + + fmt::format_to(buffer, std::forward(args)...); + fmt::format_to(buffer, "\n"); + detail::log_buffer(log_fd_, buffer); + } + + inline int + log(log_level level, + const char *fmt, + va_list ap) { + + if(!(level & log_mask_)) { + return 0; + } + + // we use buffer views to compose the logging messages to + // avoid copying buffers as much as possible + struct buffer_view { + const void* addr; + std::size_t size; + }; + + // helper lambda to print an iterable of buffer_views + const auto log_buffer_views = + [this](const auto& buffers) { + + std::size_t n = 0; + + for(const auto& bv : buffers) { + if(bv.addr != nullptr) { + detail::log_buffer(log_fd_, bv.addr, bv.size); + n += bv.size; + } + } + + return n; + }; + + + + static_buffer prefix; + detail::format_timestamp_to(prefix); + fmt::format_to(prefix, "[{}] [{}] ", + ::syscall_no_intercept(SYS_gettid), + lookup_level_name(level)); + + char buffer[max_buffer_size]; + const int n = vsnprintf(buffer, sizeof(buffer), fmt, ap); + + std::array buffers{}; + + int i = 0; + int m = 0; + const char* addr = buffer; + const char* p = nullptr; + while((p = std::strstr(addr, "\n")) != nullptr) { + buffers[0] = buffer_view{prefix.data(), prefix.size()}; + buffers[1] = buffer_view{addr, static_cast(p - addr) + 1}; + + m += log_buffer_views(buffers); + addr = p + 1; + ++i; + } + + // original line might not end with (or include) '\n' + if(buffer[n-1] != '\n') { + buffers[0] = buffer_view{prefix.data(), prefix.size()}; + buffers[1] = buffer_view{addr, static_cast(&buffer[n] - addr)}; + buffers[2] = buffer_view{"\n", 1}; + + m += log_buffer_views(buffers); + } + + return m; + } + + template + static inline void + log_message(std::FILE* fp, Args&&... args) { + log_message(::fileno(fp), std::forward(args)...); + } + + template + static inline void + log_message(int fd, Args&&... args) { + + if(fd < 0) { + throw std::runtime_error("Invalid file descriptor"); + } + + static_buffer buffer; + fmt::format_to(buffer, std::forward(args)...); + fmt::format_to(buffer, "\n"); + detail::log_buffer(fd, buffer); + } + + void + log_syscall(syscall::info info, + const long syscall_number, + const long args[6], + boost::optional result = boost::none); + + static std::shared_ptr& global_logger() { + static std::shared_ptr s_global_logger; + return s_global_logger; + } + + int log_fd_; + log_level log_mask_; + +#ifdef GKFS_DEBUG_BUILD + std::bitset<512> filtered_syscalls_; +#endif + + const date::time_zone * const timezone_; +}; + +// the following static functions can be used to interact +// with a globally registered logger instance + +template +static inline void +create_global_logger(Args&&... args) { + + auto foo = std::make_shared(std::forward(args)...); + logger::global_logger() = foo; + +} + +static inline void +register_global_logger(logger&& lg) { + logger::global_logger() = std::make_shared(std::move(lg)); +} + +static inline std::shared_ptr& +get_global_logger() { + return logger::global_logger(); +} + +static inline void +destroy_global_logger() { + logger::global_logger().reset(); +} + +inline void +static_buffer::grow(std::size_t size) { + + const auto logger = get_global_logger(); + + if(logger) { + logger->log_mask_ &= ~(syscall | syscall_at_entry); + } + + std::fprintf(stderr, +"FATAL: message too long for gkfs::log::static_buffer, increase the size of\n" +"LIBGKFS_LOG_MESSAGE_SIZE in CMake or reduce the length of the offending " +"message.\n"); + abort(); +} + +} // namespace log +} // namespace gkfs + +#define LOG(XXX, ...) LOG_##XXX(__VA_ARGS__) + +#ifdef GKFS_DISABLE_LOGGING + +#define LOG_INFO(...) do {} while(0); +#define LOG_WARNING(...) do {} while(0); +#define LOG_ERROR(...) do {} while(0); +#define LOG_CRITICAL(...) do {} while(0); +#define LOG_SYSCALL(...) do {} while(0); +#define LOG_DEBUG(...) do {} while(0); + +#else // !GKFS_DISABLE_LOGGING + +#define LOG_INFO(...) do { \ + if(gkfs::log::get_global_logger()) { \ + gkfs::log::get_global_logger()->log( \ + gkfs::log::info, __func__, __LINE__, __VA_ARGS__); \ + } \ +} while(0); + +#define LOG_WARNING(...) do { \ + if(gkfs::log::get_global_logger()) { \ + gkfs::log::get_global_logger()->log( \ + gkfs::log::warning, __func__, __LINE__, __VA_ARGS__); \ + } \ +} while(0); + +#define LOG_ERROR(...) do { \ + if(gkfs::log::get_global_logger()) { \ + gkfs::log::get_global_logger()->log( \ + gkfs::log::error, __func__, __LINE__, __VA_ARGS__); \ + } \ +} while(0); + +#define LOG_CRITICAL(...) do { \ + if(gkfs::log::get_global_logger()) { \ + gkfs::log::get_global_logger()->log( \ + gkfs::log::critical, __func__, __LINE__, __VA_ARGS__); \ + } \ +} while(0); + +#ifdef GKFS_DEBUG_BUILD + +#define LOG_SYSCALL(...) do { \ +if(gkfs::log::get_global_logger()) { \ + gkfs::log::get_global_logger()->log_syscall(__VA_ARGS__); \ + } \ +} while(0); + +#define LOG_DEBUG(...) do { \ + if(gkfs::log::get_global_logger()) { \ + gkfs::log::get_global_logger()->log( \ + gkfs::log::debug, __func__, __LINE__, __VA_ARGS__); \ + } \ +} while(0); + +#else // ! GKFS_DEBUG_BUILD + +#define LOG_SYSCALL(...) do {} while(0); +#define LOG_DEBUG(...) do {} while(0); + +#endif // ! GKFS_DEBUG_BUILD +#endif // !GKFS_DISABLE_LOGGING + +#endif // LIBGKFS_LOGGING_HPP diff --git a/include/client/make_array.hpp b/include/client/make_array.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0c1a84e0a2a68f2b5b5657f416e0eeb594fefb64 --- /dev/null +++ b/include/client/make_array.hpp @@ -0,0 +1,35 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef LIBGKFS_UTILS_MAKE_ARRAY_HPP +#define LIBGKFS_UTILS_MAKE_ARRAY_HPP + +namespace gkfs { +namespace utils { + +template +constexpr auto make_array(T&&... values) -> + std::array< + typename std::decay< + typename std::common_type::type>::type, + sizeof...(T)> { + return std::array< + typename std::decay< + typename std::common_type::type>::type, + sizeof...(T)>{std::forward(values)...}; +} + +} // namespace utils +} // namespace gkfs + +#endif // LIBGKFS_UTILS_MAKE_ARRAY_HPP diff --git a/include/client/open_file_map.hpp b/include/client/open_file_map.hpp index b7c4b1265a4e4a165ba82db29f1d60371b1305ff..aeb188833c0b826c2ceaf5c0dfcb98f190f8b961 100644 --- a/include/client/open_file_map.hpp +++ b/include/client/open_file_map.hpp @@ -44,7 +44,7 @@ class OpenFile { protected: FileType type_; std::string path_; - std::array(OpenFile_flags::flag_count)> flags_ = {false}; + std::array(OpenFile_flags::flag_count)> flags_ = {{false}}; unsigned long pos_; std::mutex pos_mutex_; std::mutex flag_mutex_; diff --git a/include/client/preload_context.hpp b/include/client/preload_context.hpp index cb42c9227d29c76a42b9b9c3bc2dc53621068754..3a4d288a3171312c633ac21d35e7368100d5cc3f 100644 --- a/include/client/preload_context.hpp +++ b/include/client/preload_context.hpp @@ -14,17 +14,23 @@ #ifndef IFS_PRELOAD_CTX_HPP #define IFS_PRELOAD_CTX_HPP -#include +#include #include #include #include #include #include +#include + /* Forward declarations */ class OpenFileMap; class Distributor; +namespace gkfs { namespace log { + struct logger; +}} + struct FsConfig { // configurable metadata @@ -49,10 +55,13 @@ enum class RelativizeStatus { }; class PreloadContext { + + static auto constexpr MIN_INTERNAL_FD = MAX_OPEN_FDS - MAX_INTERNAL_FDS; + static auto constexpr MAX_USER_FDS = MIN_INTERNAL_FD; + private: PreloadContext(); - std::shared_ptr log_; std::shared_ptr ofm_; std::shared_ptr distributor_; std::shared_ptr fs_conf_; @@ -61,11 +70,16 @@ class PreloadContext { std::vector mountdir_components_; std::string mountdir_; - std::vector hosts_; + std::vector hosts_; uint64_t local_host_id_; bool interception_enabled_; + std::bitset internal_fds_; + mutable std::mutex internal_fds_mutex_; + bool internal_fds_must_relocate_; + std::bitset protected_fds_; + public: static PreloadContext* getInstance() { static PreloadContext instance; @@ -75,9 +89,7 @@ class PreloadContext { PreloadContext(PreloadContext const&) = delete; void operator=(PreloadContext const&) = delete; - void log(std::shared_ptr logger); - std::shared_ptr log() const; - + void init_logging(); void mountdir(const std::string& path); const std::string& mountdir() const; const std::vector& mountdir_components() const; @@ -85,8 +97,10 @@ class PreloadContext { void cwd(const std::string& path); const std::string& cwd() const; - const std::vector& hosts() const; - void hosts(const std::vector& addrs); + const std::vector& hosts() const; + void hosts(const std::vector& addrs); + void clear_hosts(); + uint64_t local_host_id() const; void local_host_id(uint64_t id); @@ -106,6 +120,13 @@ class PreloadContext { void enable_interception(); void disable_interception(); bool interception_enabled() const; + + int register_internal_fd(int fd); + void unregister_internal_fd(int fd); + bool is_internal_fd(int fd) const; + + void protect_user_fds(); + void unprotect_user_fds(); }; diff --git a/include/client/preload_util.hpp b/include/client/preload_util.hpp index 99fde438eb43c9896bab2ed03fb8122294f96609..4879aec76f709227b72774f5cca3f6742e187057 100644 --- a/include/client/preload_util.hpp +++ b/include/client/preload_util.hpp @@ -22,10 +22,6 @@ #include #include -extern "C" { -#include -} - struct MetadentryUpdateFlags { bool atime = false; bool mtime = false; @@ -39,8 +35,10 @@ struct MetadentryUpdateFlags { bool path = false; }; -// Margo instances -extern margo_instance_id ld_margo_rpc_id; +// Hermes instance +namespace hermes { class async_engine; } +extern std::unique_ptr ld_network_service; + // RPC IDs extern hg_id_t rpc_config_id; extern hg_id_t rpc_mk_node_id; @@ -72,13 +70,4 @@ hg_addr_t get_local_addr(); void load_hosts(); bool lookup_all_hosts(); -void cleanup_addresses(); - -hg_return margo_create_wrap_helper(const hg_id_t rpc_id, uint64_t recipient, - hg_handle_t& handle); - -hg_return margo_create_wrap(const hg_id_t rpc_id, const std::string&, - hg_handle_t& handle); - - #endif //IFS_PRELOAD_UTIL_HPP diff --git a/include/client/rpc/hg_rpcs.hpp b/include/client/rpc/hg_rpcs.hpp new file mode 100644 index 0000000000000000000000000000000000000000..240a82433c6e2999aa6c21435f998c6a6034e6ba --- /dev/null +++ b/include/client/rpc/hg_rpcs.hpp @@ -0,0 +1,2044 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GKFS_RPCS_HPP +#define GKFS_RPCS_HPP + +// C includes +#include +#include +#include + +// C++ includes +#include + +// hermes includes +#include + +#ifndef HG_GEN_PROC_NAME +#define HG_GEN_PROC_NAME(struct_type_name) \ + hg_proc_ ## struct_type_name +#endif + + +#include +#include + +namespace hermes { namespace detail { + +struct hg_void_t { }; + +static HG_INLINE hg_return_t +hg_proc_void_t(hg_proc_t proc, void *data) +{ + (void) proc; + (void) data; + + return HG_SUCCESS; +} + +}} // namespace hermes::detail + +namespace gkfs { +namespace rpc { + +//============================================================================== +// definitions for fs_config +struct fs_config { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = fs_config; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = hermes::detail::hg_void_t; + using mercury_output_type = rpc_config_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 3033006080; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::fs_config; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + hermes::detail::hg_proc_void_t; + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_config_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input() { } + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + explicit + input(const hermes::detail::hg_void_t& other) { } + + explicit + operator hermes::detail::hg_void_t() { + return {}; + } + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_mountdir(), + m_rootdir(), + m_atime_state(), + m_mtime_state(), + m_ctime_state(), + m_link_cnt_state(), + m_blocks_state(), + m_uid(), + m_gid() {} + + output(const std::string& mountdir, + const std::string& rootdir, + bool atime_state, + bool mtime_state, + bool ctime_state, + bool link_cnt_state, + bool blocks_state, + uint32_t uid, + uint32_t gid) : + m_mountdir(mountdir), + m_rootdir(rootdir), + m_atime_state(atime_state), + m_mtime_state(mtime_state), + m_ctime_state(ctime_state), + m_link_cnt_state(link_cnt_state), + m_blocks_state(blocks_state), + m_uid(uid), + m_gid(gid) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_config_out_t& out) { + + if(out.mountdir != nullptr) { + m_mountdir = out.mountdir; + } + + if(out.rootdir != nullptr) { + m_rootdir = out.rootdir; + } + + m_atime_state = out.atime_state; + m_mtime_state = out.mtime_state; + m_ctime_state = out.ctime_state; + m_link_cnt_state = out.link_cnt_state; + m_blocks_state = out.blocks_state; + m_uid = out.uid; + m_gid = out.gid; + } + + std::string + mountdir() const { + return m_mountdir; + } + + std::string + rootdir() const { + return m_rootdir; + } + + bool + atime_state() const { + return m_atime_state; + } + + bool + mtime_state() const { + return m_mtime_state; + } + + bool + ctime_state() const { + return m_ctime_state; + } + + bool + link_cnt_state() const { + return m_link_cnt_state; + } + + bool + blocks_state() const { + return m_blocks_state; + } + + uint32_t + uid() const { + return m_uid; + } + + uint32_t + gid() const { + return m_gid; + } + + private: + std::string m_mountdir; + std::string m_rootdir; + bool m_atime_state; + bool m_mtime_state; + bool m_ctime_state; + bool m_link_cnt_state; + bool m_blocks_state; + uint32_t m_uid; + uint32_t m_gid; + }; +}; + + +//============================================================================== +// definitions for create +struct create { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = create; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_mk_node_in_t; + using mercury_output_type = rpc_err_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 796590080; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::create; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_mk_node_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_err_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, + uint32_t mode) : + m_path(path), + m_mode(mode) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + uint32_t + mode() const { + return m_mode; + } + + explicit + input(const rpc_mk_node_in_t& other) : + m_path(other.path), + m_mode(other.mode) { } + + explicit + operator rpc_mk_node_in_t() { + return {m_path.c_str(), m_mode}; + } + + private: + std::string m_path; + uint32_t m_mode; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err() {} + + output(int32_t err) : + m_err(err) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_err_out_t& out) { + m_err = out.err; + } + + int32_t + err() const { + return m_err; + } + + private: + int32_t m_err; + }; +}; + +//============================================================================== +// definitions for stat +struct stat { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = stat; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_path_only_in_t; + using mercury_output_type = rpc_stat_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 1396244480; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::stat; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_path_only_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_stat_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path ) : + m_path(path) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + explicit + input(const rpc_path_only_in_t& other) : + m_path(other.path) { } + + explicit + operator rpc_path_only_in_t() { + return {m_path.c_str()}; + } + + private: + std::string m_path; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err(), + m_db_val() {} + + output(int32_t err, const std::string& db_val) : + m_err(err), + m_db_val(db_val) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_stat_out_t& out) { + m_err = out.err; + + if(out.db_val != nullptr) { + m_db_val = out.db_val; + } + } + + int32_t + err() const { + return m_err; + } + + std::string + db_val() const { + return m_db_val; + } + + private: + int32_t m_err; + std::string m_db_val; + }; +}; + +//============================================================================== +// definitions for remove +struct remove { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = remove; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_rm_node_in_t; + using mercury_output_type = rpc_err_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 2549415936; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::remove; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_rm_node_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_err_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path) : + m_path(path) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + explicit + input(const rpc_rm_node_in_t& other) : + m_path(other.path) { } + + explicit + operator rpc_rm_node_in_t() { + return {m_path.c_str()}; + } + + private: + std::string m_path; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err() {} + + output(int32_t err) : + m_err(err) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_err_out_t& out) { + m_err = out.err; + } + + int32_t + err() const { + return m_err; + } + + private: + int32_t m_err; + }; +}; + +//============================================================================== +// definitions for decr_size +struct decr_size { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = decr_size; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_trunc_in_t; + using mercury_output_type = rpc_err_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 1291649024; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::decr_size; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_trunc_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_err_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, uint64_t length) : + m_path(path), + m_length(length) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + uint64_t + length() const { + return m_length; + } + + explicit + input(const rpc_trunc_in_t& other) : + m_path(other.path), + m_length(other.length) { } + + explicit + operator rpc_trunc_in_t() { + return {m_path.c_str(), m_length}; + } + + private: + std::string m_path; + uint64_t m_length; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err() {} + + output(int32_t err) : + m_err(err) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_err_out_t& out) { + m_err = out.err; + } + + int32_t + err() const { + return m_err; + } + + private: + int32_t m_err; + }; +}; + +//============================================================================== +// definitions for update_metadentry +struct update_metadentry { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = update_metadentry; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_update_metadentry_in_t; + using mercury_output_type = rpc_err_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 99483648; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::update_metadentry; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_update_metadentry_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_err_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, + uint64_t nlink, + uint32_t mode, + uint32_t uid, + uint32_t gid, + int64_t size, + int64_t blocks, + int64_t atime, + int64_t mtime, + int64_t ctime, + bool nlink_flag, + bool mode_flag, + bool size_flag, + bool block_flag, + bool atime_flag, + bool mtime_flag, + bool ctime_flag) : + m_path(path), + m_nlink(nlink), + m_mode(mode), + m_uid(uid), + m_gid(gid), + m_size(size), + m_blocks(blocks), + m_atime(atime), + m_mtime(mtime), + m_ctime(ctime), + m_nlink_flag(nlink_flag), + m_mode_flag(mode_flag), + m_size_flag(size_flag), + m_block_flag(block_flag), + m_atime_flag(atime_flag), + m_mtime_flag(mtime_flag), + m_ctime_flag(ctime_flag) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + uint64_t + nlink() const { + return m_nlink; + } + + uint32_t + mode() const { + return m_mode; + } + + uint32_t + uid() const { + return m_uid; + } + + uint32_t + gid() const { + return m_gid; + } + + int64_t + size() const { + return m_size; + } + + int64_t + blocks() const { + return m_blocks; + } + + int64_t + atime() const { + return m_atime; + } + + int64_t + mtime() const { + return m_mtime; + } + + int64_t + ctime() const { + return m_ctime; + } + + bool + nlink_flag() const { + return m_nlink_flag; + } + + bool + mode_flag() const { + return m_mode_flag; + } + + bool + size_flag() const { + return m_size_flag; + } + + bool + block_flag() const { + return m_block_flag; + } + + bool + atime_flag() const { + return m_atime_flag; + } + + bool + mtime_flag() const { + return m_mtime_flag; + } + + bool + ctime_flag() const { + return m_ctime_flag; + } + + explicit + input(const rpc_update_metadentry_in_t& other) : + m_path(other.path), + m_nlink(other.nlink), + m_mode(other.mode), + m_uid(other.uid), + m_gid(other.gid), + m_size(other.size), + m_blocks(other.blocks), + m_atime(other.atime), + m_mtime(other.mtime), + m_ctime(other.ctime), + m_nlink_flag(other.nlink_flag), + m_mode_flag(other.mode_flag), + m_size_flag(other.size_flag), + m_block_flag(other.block_flag), + m_atime_flag(other.atime_flag), + m_mtime_flag(other.mtime_flag), + m_ctime_flag(other.ctime_flag) { } + + explicit + operator rpc_update_metadentry_in_t() { + return {m_path.c_str(), + m_nlink, + m_mode, + m_uid, + m_gid, + m_size, + m_blocks, + m_atime, + m_mtime, + m_ctime, + m_nlink_flag, + m_mode_flag, + m_size_flag, + m_block_flag, + m_atime_flag, + m_mtime_flag, + m_ctime_flag}; + } + + private: + std::string m_path; + uint64_t m_nlink; + uint32_t m_mode; + uint32_t m_uid; + uint32_t m_gid; + int64_t m_size; + int64_t m_blocks; + int64_t m_atime; + int64_t m_mtime; + int64_t m_ctime; + bool m_nlink_flag; + bool m_mode_flag; + bool m_size_flag; + bool m_block_flag; + bool m_atime_flag; + bool m_mtime_flag; + bool m_ctime_flag; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err() {} + + output(int32_t err) : + m_err(err) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_err_out_t& out) { + m_err = out.err; + } + + int32_t + err() const { + return m_err; + } + + private: + int32_t m_err; + }; +}; + +//============================================================================== +// definitions for get_metadentry_size +struct get_metadentry_size { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = get_metadentry_size; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_path_only_in_t; + using mercury_output_type = rpc_get_metadentry_size_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 3426484224; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::get_metadentry_size; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_path_only_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_err_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path ) : + m_path(path) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + explicit + input(const rpc_path_only_in_t& other) : + m_path(other.path) { } + + explicit + operator rpc_path_only_in_t() { + return {m_path.c_str()}; + } + + private: + std::string m_path; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err(), + m_ret_size() {} + + output(int32_t err, int64_t ret_size) : + m_err(err), + m_ret_size(ret_size) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_get_metadentry_size_out_t& out) { + m_err = out.err; + m_ret_size = out.ret_size; + } + + int32_t + err() const { + return m_err; + } + + int64_t + ret_size() const { + return m_ret_size; + } + + private: + int32_t m_err; + int64_t m_ret_size; + }; +}; + +//============================================================================== +// definitions for update_metadentry_size +struct update_metadentry_size { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = update_metadentry_size; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_update_metadentry_size_in_t; + using mercury_output_type = rpc_update_metadentry_size_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 2760900608; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::update_metadentry_size; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_update_metadentry_size_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_update_metadentry_size_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, + uint64_t size, + int64_t offset, + bool append) : + m_path(path), + m_size(size), + m_offset(offset), + m_append(append) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + uint64_t + size() const { + return m_size; + } + + int64_t + offset() const { + return m_offset; + } + + bool + append() const { + return m_append; + } + + explicit + input(const rpc_update_metadentry_size_in_t& other) : + m_path(other.path), + m_size(other.size), + m_offset(other.offset), + m_append(other.append) { } + + explicit + operator rpc_update_metadentry_size_in_t() { + return {m_path.c_str(), m_size, m_offset, m_append}; + } + + private: + std::string m_path; + uint64_t m_size; + int64_t m_offset; + bool m_append; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err(), + m_ret_size() {} + + output(int32_t err, int64_t ret_size) : + m_err(err), + m_ret_size(ret_size) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_update_metadentry_size_out_t& out) { + m_err = out.err; + m_ret_size = out.ret_size; + } + + int32_t + err() const { + return m_err; + } + + int64_t + ret_size() const { + return m_ret_size; + } + + private: + int32_t m_err; + int64_t m_ret_size; + }; +}; + +#ifdef HAS_SYMLINKS + +//============================================================================== +// definitions for mk_symlink +struct mk_symlink { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = mk_symlink; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_mk_symlink_in_t; + using mercury_output_type = rpc_err_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 3207004160; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::mk_symlink; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_mk_symlink_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_err_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, + const std::string& target_path) : + m_path(path), + m_target_path(target_path) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + std::string + target_path() const { + return m_target_path; + } + + explicit + input(const rpc_mk_symlink_in_t& other) : + m_path(other.path), + m_target_path(other.target_path) { } + + explicit + operator rpc_mk_symlink_in_t() { + return {m_path.c_str(), m_target_path.c_str()}; + } + + private: + std::string m_path; + std::string m_target_path; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err() {} + + output(int32_t err) : + m_err(err) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_err_out_t& out) { + m_err = out.err; + } + + int32_t + err() const { + return m_err; + } + + private: + int32_t m_err; + }; +}; + +#endif // HAS_SYMLINKS + +//============================================================================== +// definitions for write_data +struct write_data { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = write_data; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_write_data_in_t; + using mercury_output_type = rpc_data_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 3716481024; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::write_data; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_write_data_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_data_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, + int64_t offset, + uint64_t host_id, + uint64_t host_size, + uint64_t chunk_n, + uint64_t chunk_start, + uint64_t chunk_end, + uint64_t total_chunk_size, + const hermes::exposed_memory& buffers) : + m_path(path), + m_offset(offset), + m_host_id(host_id), + m_host_size(host_size), + m_chunk_n(chunk_n), + m_chunk_start(chunk_start), + m_chunk_end(chunk_end), + m_total_chunk_size(total_chunk_size), + m_buffers(buffers) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + int64_t + offset() const { + return m_offset; + } + + uint64_t + host_id() const { + return m_host_id; + } + + uint64_t + host_size() const { + return m_host_size; + } + + uint64_t + chunk_n() const { + return m_chunk_n; + } + + uint64_t + chunk_start() const { + return m_chunk_start; + } + + uint64_t + chunk_end() const { + return m_chunk_end; + } + + uint64_t + total_chunk_size() const { + return m_total_chunk_size; + } + + hermes::exposed_memory + buffers() const { + return m_buffers; + } + + explicit + input(const rpc_write_data_in_t& other) : + m_path(other.path), + m_offset(other.offset), + m_host_id(other.host_id), + m_host_size(other.host_size), + m_chunk_n(other.chunk_n), + m_chunk_start(other.chunk_start), + m_chunk_end(other.chunk_end), + m_total_chunk_size(other.total_chunk_size), + m_buffers(other.bulk_handle) { } + + explicit + operator rpc_write_data_in_t() { + return { + m_path.c_str(), + m_offset, + m_host_id, + m_host_size, + m_chunk_n, + m_chunk_start, + m_chunk_end, + m_total_chunk_size, + hg_bulk_t(m_buffers) + }; + } + + private: + std::string m_path; + int64_t m_offset; + uint64_t m_host_id; + uint64_t m_host_size; + uint64_t m_chunk_n; + uint64_t m_chunk_start; + uint64_t m_chunk_end; + uint64_t m_total_chunk_size; + hermes::exposed_memory m_buffers; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err(), + m_io_size() {} + + output(int32_t err, size_t io_size) : + m_err(err), + m_io_size(io_size) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_data_out_t& out) { + m_err = out.err; + m_io_size = out.io_size; + } + + int32_t + err() const { + return m_err; + } + + int64_t + io_size() const { + return m_io_size; + } + + private: + int32_t m_err; + size_t m_io_size; + }; +}; + +//============================================================================== +// definitions for read_data +struct read_data { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = read_data; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_read_data_in_t; + using mercury_output_type = rpc_data_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 892207104; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::read_data; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_read_data_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_data_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, + int64_t offset, + uint64_t host_id, + uint64_t host_size, + uint64_t chunk_n, + uint64_t chunk_start, + uint64_t chunk_end, + uint64_t total_chunk_size, + const hermes::exposed_memory& buffers) : + m_path(path), + m_offset(offset), + m_host_id(host_id), + m_host_size(host_size), + m_chunk_n(chunk_n), + m_chunk_start(chunk_start), + m_chunk_end(chunk_end), + m_total_chunk_size(total_chunk_size), + m_buffers(buffers) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + int64_t + offset() const { + return m_offset; + } + + uint64_t + host_id() const { + return m_host_id; + } + + uint64_t + host_size() const { + return m_host_size; + } + + uint64_t + chunk_n() const { + return m_chunk_n; + } + + uint64_t + chunk_start() const { + return m_chunk_start; + } + + uint64_t + chunk_end() const { + return m_chunk_end; + } + + uint64_t + total_chunk_size() const { + return m_total_chunk_size; + } + + hermes::exposed_memory + buffers() const { + return m_buffers; + } + + explicit + input(const rpc_read_data_in_t& other) : + m_path(other.path), + m_offset(other.offset), + m_host_id(other.host_id), + m_host_size(other.host_size), + m_chunk_n(other.chunk_n), + m_chunk_start(other.chunk_start), + m_chunk_end(other.chunk_end), + m_total_chunk_size(other.total_chunk_size), + m_buffers(other.bulk_handle) { } + + explicit + operator rpc_read_data_in_t() { + return { + m_path.c_str(), + m_offset, + m_host_id, + m_host_size, + m_chunk_n, + m_chunk_start, + m_chunk_end, + m_total_chunk_size, + hg_bulk_t(m_buffers) + }; + } + + private: + std::string m_path; + int64_t m_offset; + uint64_t m_host_id; + uint64_t m_host_size; + uint64_t m_chunk_n; + uint64_t m_chunk_start; + uint64_t m_chunk_end; + uint64_t m_total_chunk_size; + hermes::exposed_memory m_buffers; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err(), + m_io_size() {} + + output(int32_t err, size_t io_size) : + m_err(err), + m_io_size(io_size) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_data_out_t& out) { + m_err = out.err; + m_io_size = out.io_size; + } + + int32_t + err() const { + return m_err; + } + + int64_t + io_size() const { + return m_io_size; + } + + private: + int32_t m_err; + size_t m_io_size; + }; +}; + +//============================================================================== +// definitions for trunc_data +struct trunc_data { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = trunc_data; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_trunc_in_t; + using mercury_output_type = rpc_err_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 1850933248; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::trunc_data; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_trunc_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_err_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, + uint64_t length) : + m_path(path), + m_length(length) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + uint64_t + length() const { + return m_length; + } + + explicit + input(const rpc_trunc_in_t& other) : + m_path(other.path), + m_length(other.length) { } + + explicit + operator rpc_trunc_in_t() { + return { + m_path.c_str(), + m_length, + }; + } + + private: + std::string m_path; + uint64_t m_length; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err() {} + + output(int32_t err) : + m_err(err) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_err_out_t& out) { + m_err = out.err; + } + + int32_t + err() const { + return m_err; + } + + private: + int32_t m_err; + }; +}; + +//============================================================================== +// definitions for get_dirents +struct get_dirents { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = get_dirents; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_get_dirents_in_t; + using mercury_output_type = rpc_get_dirents_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 4121034752; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::get_dirents; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_get_dirents_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_get_dirents_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, + const hermes::exposed_memory& buffers) : + m_path(path), + m_buffers(buffers) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + hermes::exposed_memory + buffers() const { + return m_buffers; + } + + explicit + input(const rpc_get_dirents_in_t& other) : + m_path(other.path), + m_buffers(other.bulk_handle) { } + + explicit + operator rpc_get_dirents_in_t() { + return { + m_path.c_str(), + hg_bulk_t(m_buffers) + }; + } + + private: + std::string m_path; + hermes::exposed_memory m_buffers; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err(), + m_dirents_size() {} + + output(int32_t err, size_t dirents_size) : + m_err(err), + m_dirents_size(dirents_size) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_get_dirents_out_t& out) { + m_err = out.err; + m_dirents_size = out.dirents_size; + } + + int32_t + err() const { + return m_err; + } + + size_t + dirents_size() const { + return m_dirents_size; + } + + private: + int32_t m_err; + size_t m_dirents_size; + }; +}; + +//============================================================================== +// definitions for chunk_stat +struct chunk_stat { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = chunk_stat; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_chunk_stat_in_t; + using mercury_output_type = rpc_chunk_stat_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 532742144; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::chunk_stat; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_chunk_stat_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_chunk_stat_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(int32_t dummy) : + m_dummy(dummy) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + int32_t + dummy() const { + return m_dummy; + } + + explicit + input(const rpc_chunk_stat_in_t& other) : + m_dummy(other.dummy) { } + + explicit + operator rpc_chunk_stat_in_t() { + return { m_dummy }; + } + + private: + int32_t m_dummy; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_chunk_size(), + m_chunk_total(), + m_chunk_free() {} + + output(uint64_t chunk_size, uint64_t chunk_total, uint64_t chunk_free) : + m_chunk_size(chunk_size), + m_chunk_total(chunk_total), + m_chunk_free(chunk_free) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_chunk_stat_out_t& out) { + m_chunk_size = out.chunk_size; + m_chunk_total = out.chunk_total; + m_chunk_free = out.chunk_free; + } + + uint64_t + chunk_size() const { + return m_chunk_size; + } + + uint64_t + chunk_total() const { + return m_chunk_total; + } + + uint64_t + chunk_free() const { + return m_chunk_free; + } + + private: + uint64_t m_chunk_size; + uint64_t m_chunk_total; + uint64_t m_chunk_free; + }; +}; + +} // namespace rpc +} // namespace gkfs + + +#endif // GKFS_RPCS_HPP diff --git a/include/client/rpc/ld_rpc_metadentry.hpp b/include/client/rpc/ld_rpc_metadentry.hpp index 1abb330869da873fd4bfe4f96a1fb396ef830305..fe260a5ea9e799195c28b735316eb53bdc885e9e 100644 --- a/include/client/rpc/ld_rpc_metadentry.hpp +++ b/include/client/rpc/ld_rpc_metadentry.hpp @@ -29,7 +29,7 @@ int mk_node(const std::string& path, mode_t mode); int stat(const std::string& path, std::string& attr); -int rm_node(const std::string& path, const bool remove_metadentry_only); +int rm_node(const std::string& path, const bool remove_metadentry_only, const ssize_t size); int decr_size(const std::string& path, size_t length); diff --git a/include/client/syscall_names.hpp b/include/client/syscall_names.hpp deleted file mode 100644 index 823d859a58ac9e28825908d502e826b3f8b6cd34..0000000000000000000000000000000000000000 --- a/include/client/syscall_names.hpp +++ /dev/null @@ -1,353 +0,0 @@ -/* - Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany - - This software was partially supported by the - EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). - - This software was partially supported by the - ADA-FS project under the SPPEXA project funded by the DFG. - - SPDX-License-Identifier: MIT -*/ - -#ifndef IFS_SYSCALL_NAMES_HPP -#define IFS_SYSCALL_NAMES_HPP - -const char* syscall_names[] = { -"read", -"write", -"open", -"close", -"stat", -"fstat", -"lstat", -"poll", -"lseek", -"mmap", -"mprotect", -"munmap", -"brk", -"rt_sigaction", -"rt_sigprocmask", -"rt_sigreturn", -"ioctl", -"pread64", -"pwrite64", -"readv", -"writev", -"access", -"pipe", -"select", -"sched_yield", -"mremap", -"msync", -"mincore", -"madvise", -"shmget", -"shmat", -"shmctl", -"dup", -"dup2", -"pause", -"nanosleep", -"getitimer", -"alarm", -"setitimer", -"getpid", -"sendfile", -"socket", -"connect", -"accept", -"sendto", -"recvfrom", -"sendmsg", -"recvmsg", -"shutdown", -"bind", -"listen", -"getsockname", -"getpeername", -"socketpair", -"setsockopt", -"getsockopt", -"clone", -"fork", -"vfork", -"execve", -"exit", -"wait4", -"kill", -"uname", -"semget", -"semop", -"semctl", -"shmdt", -"msgget", -"msgsnd", -"msgrcv", -"msgctl", -"fcntl", -"flock", -"fsync", -"fdatasync", -"truncate", -"ftruncate", -"getdents", -"getcwd", -"chdir", -"fchdir", -"rename", -"mkdir", -"rmdir", -"creat", -"link", -"unlink", -"symlink", -"readlink", -"chmod", -"fchmod", -"chown", -"fchown", -"lchown", -"umask", -"gettimeofday", -"getrlimit", -"getrusage", -"sysinfo", -"times", -"ptrace", -"getuid", -"syslog", -"getgid", -"setuid", -"setgid", -"geteuid", -"getegid", -"setpgid", -"getppid", -"getpgrp", -"setsid", -"setreuid", -"setregid", -"getgroups", -"setgroups", -"setresuid", -"getresuid", -"setresgid", -"getresgid", -"getpgid", -"setfsuid", -"setfsgid", -"getsid", -"capget", -"capset", -"rt_sigpending", -"rt_sigtimedwait", -"rt_sigqueueinfo", -"rt_sigsuspend", -"sigaltstack", -"utime", -"mknod", -"uselib", -"personality", -"ustat", -"statfs", -"fstatfs", -"sysfs", -"getpriority", -"setpriority", -"sched_setparam", -"sched_getparam", -"sched_setscheduler", -"sched_getscheduler", -"sched_get_priority_max", -"sched_get_priority_min", -"sched_rr_get_interval", -"mlock", -"munlock", -"mlockall", -"munlockall", -"vhangup", -"modify_ldt", -"pivot_root", -"_sysctl", -"prctl", -"arch_prctl", -"adjtimex", -"setrlimit", -"chroot", -"sync", -"acct", -"settimeofday", -"mount", -"umount2", -"swapon", -"swapoff", -"reboot", -"sethostname", -"setdomainname", -"iopl", -"ioperm", -"create_module", -"init_module", -"delete_module", -"get_kernel_syms", -"query_module", -"quotactl", -"nfsservctl", -"getpmsg", -"putpmsg", -"afs_syscall", -"tuxcall", -"security", -"gettid", -"readahead", -"setxattr", -"lsetxattr", -"fsetxattr", -"getxattr", -"lgetxattr", -"fgetxattr", -"listxattr", -"llistxattr", -"flistxattr", -"removexattr", -"lremovexattr", -"fremovexattr", -"tkill", -"time", -"futex", -"sched_setaffinity", -"sched_getaffinity", -"set_thread_area", -"io_setup", -"io_destroy", -"io_getevents", -"io_submit", -"io_cancel", -"get_thread_area", -"lookup_dcookie", -"epoll_create", -"epoll_ctl_old", -"epoll_wait_old", -"remap_file_pages", -"getdents64", -"set_tid_address", -"restart_syscall", -"semtimedop", -"fadvise64", -"timer_create", -"timer_settime", -"timer_gettime", -"timer_getoverrun", -"timer_delete", -"clock_settime", -"clock_gettime", -"clock_getres", -"clock_nanosleep", -"exit_group", -"epoll_wait", -"epoll_ctl", -"tgkill", -"utimes", -"vserver", -"mbind", -"set_mempolicy", -"get_mempolicy", -"mq_open", -"mq_unlink", -"mq_timedsend", -"mq_timedreceive", -"mq_notify", -"mq_getsetattr", -"kexec_load", -"waitid", -"add_key", -"request_key", -"keyctl", -"ioprio_set", -"ioprio_get", -"inotify_init", -"inotify_add_watch", -"inotify_rm_watch", -"migrate_pages", -"openat", -"mkdirat", -"mknodat", -"fchownat", -"futimesat", -"newfstatat", -"unlinkat", -"renameat", -"linkat", -"symlinkat", -"readlinkat", -"fchmodat", -"faccessat", -"pselect6", -"ppoll", -"unshare", -"set_robust_list", -"get_robust_list", -"splice", -"tee", -"sync_file_range", -"vmsplice", -"move_pages", -"utimensat", -"epoll_pwait", -"signalfd", -"timerfd_create", -"eventfd", -"fallocate", -"timerfd_settime", -"timerfd_gettime", -"accept4", -"signalfd4", -"eventfd2", -"epoll_create1", -"dup3", -"pipe2", -"inotify_init1", -"preadv", -"pwritev", -"rt_tgsigqueueinfo", -"perf_event_open", -"recvmmsg", -"fanotify_init", -"fanotify_mark", -"prlimit64", -"name_to_handle_at", -"open_by_handle_at", -"clock_adjtime", -"syncfs", -"sendmmsg", -"setns", -"getcpu", -"process_vm_readv", -"process_vm_writev", -"kcmp", -"finit_module", -"sched_setattr", -"sched_getattr", -"renameat2", -"seccomp", -"getrandom", -"memfd_create", -"kexec_file_load", -"bpf", -"execveat", -"userfaultfd", -"membarrier", -"mlock2", -"copy_file_range", -"preadv2", -"pwritev2", -"pkey_mprotect", -"pkey_alloc", -"pkey_free", -"statx" -}; - -#endif diff --git a/include/client/syscalls.hpp b/include/client/syscalls.hpp new file mode 100644 index 0000000000000000000000000000000000000000..74067a4896270fdca32f9745487968114814a55c --- /dev/null +++ b/include/client/syscalls.hpp @@ -0,0 +1,21 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef LIBGKFS_SYSCALLS_HPP +#define LIBGKFS_SYSCALLS_HPP + +#include +#include +#include + +#endif // LIBGKFS_SYSCALLS_HPP diff --git a/include/client/syscalls/args.hpp b/include/client/syscalls/args.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0677ea54febf54b4160a339e25de56ddf17dd4cf --- /dev/null +++ b/include/client/syscalls/args.hpp @@ -0,0 +1,724 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GKFS_SYSCALLS_ARGS_HPP +#define GKFS_SYSCALLS_ARGS_HPP + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace gkfs { +namespace syscall { +namespace arg { + +/** Allowed arg types (based on the values of the corresponding C enum) */ +enum class type { + none = ::arg_type_t::none, + fd = ::arg_type_t::fd, + atfd = ::arg_type_t::atfd, + cstr = ::arg_type_t::cstr, + open_flags = ::arg_type_t::open_flags, + octal_mode = ::arg_type_t::octal_mode, + ptr = ::arg_type_t::ptr, + dec = ::arg_type_t::dec, + dec32 = ::arg_type_t::dec32, + offset = ::arg_type_t::offset, + whence = ::arg_type_t::whence, + mmap_prot = ::arg_type_t::mmap_prot, + mmap_flags = ::arg_type_t::mmap_flags, + clone_flags = ::arg_type_t::clone_flags, + signum = ::arg_type_t::signum, + sigproc_how = ::arg_type_t::sigproc_how, + generic = ::arg_type_t::arg, +}; + +/* Some constant definitions for convenience */ +static constexpr auto none = type::none; +static constexpr auto fd = type::fd; +static constexpr auto atfd = type::atfd; +static constexpr auto cstr = type::cstr; +static constexpr auto open_flags = type::open_flags; +static constexpr auto octal_mode = type::octal_mode; +static constexpr auto ptr = type::ptr; +static constexpr auto dec = type::dec; +static constexpr auto dec32 = type::dec32; +static constexpr auto offset = type::offset; +static constexpr auto whence = type::whence; +static constexpr auto mmap_prot = type::mmap_prot; +static constexpr auto mmap_flags = type::mmap_flags; +static constexpr auto clone_flags = type::clone_flags; +static constexpr auto signum = type::signum; +static constexpr auto sigproc_how = type::sigproc_how; +static constexpr auto generic = type::generic; + + +/** An argument value with an optional size */ +struct printable_arg { + const char * const name; + const long value; + boost::optional size; +}; + + +/** All arg formatters must follow this prototype */ +template +using formatter = + std::add_pointer_t; + + + +/** forward declare formatters */ +template inline void +format_none_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_fd_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_atfd_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_cstr_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_open_flags_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_octal_mode_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_ptr_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_dec_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_dec32_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_whence_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_mmap_prot_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_mmap_flags_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_clone_flags_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_signum_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_sigproc_how_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_arg_to(FmtBuffer& buffer, const printable_arg& parg); + + +/** Known formatters */ +template +static const constexpr +std::array, arg_type_max> formatters = { + /* [none] = */ format_none_arg_to, + /* [fd] = */ format_fd_arg_to, + /* [atfd] = */ format_atfd_arg_to, + /* [cstr] = */ format_cstr_arg_to, + /* [open_flags] = */ format_open_flags_to, + /* [octal_mode] = */ format_octal_mode_to, + /* [ptr] = */ format_ptr_arg_to, + /* [dec] = */ format_dec_arg_to, + /* [dec32] = */ format_dec32_arg_to, + /* [offset] = */ format_dec_arg_to, + /* [whence] = */ format_whence_arg_to, + /* [mmap_prot] = */ format_mmap_prot_arg_to, + /* [mmap_flags] = */ format_mmap_flags_arg_to, + /* [clone_flags] = */ format_clone_flags_arg_to, + /* [signum] = */ format_signum_arg_to, + /* [sigproc_how] = */ format_sigproc_how_arg_to, + /* [arg] = */ format_arg_to, +}; + +/** An argument descriptor */ +struct desc { + arg::type type_; + const char* name_; + + arg::type + type() const { + return type_; + } + + const char* + name() const { + return name_; + } + + template + formatter + formatter() const { + const auto idx = static_cast(type_); + + // if the type is unknown fall back to the default formatter + if(idx < 0 || idx >= static_cast(formatters.size())) { + return format_arg_to; + } + + assert(formatters.at(idx) != nullptr); + + return formatters.at(idx); + } +}; + + +/** Specific formatter implementations follow */ + +/** Flag descriptor */ +typedef struct { + long flag_; + const char * const name_; +} flag_desc; + +#define FLAG_ENTRY(f) flag_desc{ f, #f } + +#define LIKELY(x) __builtin_expect(!!(x), 1) +#define UNLIKELY(x) __builtin_expect(!!(x), 0) + +template +static void +format_flag(FmtBuffer& buffer, + long flag, + FlagDescriptorArray&& desc) { + + // we assume that if a flag value is zero, its printable + // name will always be at position 0 in the array + if(flag == 0 && desc[0].flag_ == 0) { + fmt::format_to(buffer, "{}", desc[0].name_); + return; + } + + for(std::size_t i = 0; i < desc.size(); ++i) { + + if(desc[i].name_ == nullptr) { + continue; + } + + if((flag == desc[i].flag_)) { + fmt::format_to(buffer, "{}", desc[i].name_); + return; + } + } + + fmt::format_to(buffer, "{:#x}", flag); +} + +template +static void +format_flag_set(FmtBuffer& buffer, + long flags, + FlagDescriptorArray&& desc) { + + // we assume that if a flag value is zero, its printable + // name will always be at position 0 in the array + if(flags == 0 && desc[0].flag_ == 0) { + fmt::format_to(buffer, "{}", desc[0].name_); + return; + } + + std::size_t i = 0; + const auto buffer_start = buffer.size(); + + while(flags != 0 && i < desc.size()) { + + if(desc[i].name_ == nullptr) { + ++i; + continue; + } + + if((flags & desc[i].flag_) != 0) { + fmt::format_to(buffer, "{}{}", + buffer.size() != buffer_start ? "|" : "", + desc[i].name_); + flags &= ~desc[i].flag_; + } + + ++i; + } + + if(flags != 0) { + if(buffer.size() != buffer_start) { + fmt::format_to(buffer, "|"); + } + + fmt::format_to(buffer, "{:#x}", flags); + return; + } + + if(buffer_start == buffer.size()) { + fmt::format_to(buffer, "0x0"); + } +} + +/** + * format_whence_arg_to - format a 'whence' argument + * + * Format a 'whence' argument from the lseek() syscall, modifying the provided + * buffer by appending to it a string representation of the form: + * name = formatted_val + */ +template +inline void +format_whence_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + + /* Names for lseek() whence arg */ + const auto flag_names = + utils::make_array( + FLAG_ENTRY(SEEK_SET), + FLAG_ENTRY(SEEK_CUR), + FLAG_ENTRY(SEEK_END) + ); + + fmt::format_to(buffer, "{}=", parg.name); + format_flag_set(buffer, parg.value, flag_names); +} + + +/** + * format_mmap_prot_arg_to - format a 'prot' argument + * + * Format a 'prot' argument (such as those passed to mmap()) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_mmap_prot_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + + /* Names for mmap() prot arg */ + const auto flag_names = + utils::make_array( + FLAG_ENTRY(PROT_NONE), + FLAG_ENTRY(PROT_READ), + FLAG_ENTRY(PROT_WRITE), + FLAG_ENTRY(PROT_EXEC)); + + fmt::format_to(buffer, "{}=", parg.name); + format_flag_set(buffer, parg.value, flag_names); + + return; +} + + +/** + * format_mmap_flags_arg_to - format a 'flags' argument + * + * Format a 'flags' argument (such as those passed to mmap()) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_mmap_flags_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + + /* Names for mmap() flags arg */ + const auto flag_names = + utils::make_array( + FLAG_ENTRY(MAP_SHARED), + FLAG_ENTRY(MAP_PRIVATE), +#ifdef MAP_SHARED_VALIDATE + FLAG_ENTRY(MAP_SHARED_VALIDATE), +#endif + FLAG_ENTRY(MAP_FIXED), + FLAG_ENTRY(MAP_ANONYMOUS), + FLAG_ENTRY(MAP_GROWSDOWN), + FLAG_ENTRY(MAP_DENYWRITE), + FLAG_ENTRY(MAP_EXECUTABLE), + FLAG_ENTRY(MAP_LOCKED), + FLAG_ENTRY(MAP_NORESERVE), + FLAG_ENTRY(MAP_POPULATE), + FLAG_ENTRY(MAP_NONBLOCK), + FLAG_ENTRY(MAP_STACK), + FLAG_ENTRY(MAP_HUGETLB) +#ifdef MAP_SYNC + , + FLAG_ENTRY(MAP_SYNC) +#endif + ); + + fmt::format_to(buffer, "{}=", parg.name); + format_flag_set(buffer, parg.value, flag_names); + return; +} + +/** + * format_clone_flags_arg_to - format a 'flags' argument + * + * Format a 'flags' argument (such as those passed to clone()) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_clone_flags_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + + /* Names for clone() flags arg */ + const auto flag_names = + utils::make_array( + FLAG_ENTRY(CLONE_VM), + FLAG_ENTRY(CLONE_FS), + FLAG_ENTRY(CLONE_FILES), + FLAG_ENTRY(CLONE_SIGHAND), + FLAG_ENTRY(CLONE_PTRACE), + FLAG_ENTRY(CLONE_VFORK), + FLAG_ENTRY(CLONE_PARENT), + FLAG_ENTRY(CLONE_THREAD), + FLAG_ENTRY(CLONE_NEWNS), + FLAG_ENTRY(CLONE_SYSVSEM), + FLAG_ENTRY(CLONE_SETTLS), + FLAG_ENTRY(CLONE_PARENT_SETTID), + FLAG_ENTRY(CLONE_CHILD_CLEARTID), + FLAG_ENTRY(CLONE_DETACHED), + FLAG_ENTRY(CLONE_UNTRACED), + FLAG_ENTRY(CLONE_CHILD_SETTID), +#ifdef CLONE_NEWCGROUP + FLAG_ENTRY(CLONE_NEWCGROUP), +#endif + FLAG_ENTRY(CLONE_NEWUTS), + FLAG_ENTRY(CLONE_NEWIPC), + FLAG_ENTRY(CLONE_NEWUSER), + FLAG_ENTRY(CLONE_NEWPID), + FLAG_ENTRY(CLONE_NEWNET), + FLAG_ENTRY(CLONE_IO)); + + fmt::format_to(buffer, "{}=", parg.name); + + // the low byte in clone flags contains the number of the termination + // signal sent to the parent when the child dies + format_flag_set(buffer, parg.value & ~0x11l, flag_names); + + if((parg.value & 0x11l) != 0) { + fmt::format_to(buffer, "|", parg.name); + format_signum_arg_to(buffer, {"", parg.value & 0x11l}); + } + return; +} + +/** + * format_signum_arg_to - format a 'signum' argument + * + * Format a 'signum' argument (such as those passed to rt_sigaction()) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_signum_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + + /* Names for signum args */ + const auto flag_names = + utils::make_array( + FLAG_ENTRY(SIGHUP), + FLAG_ENTRY(SIGINT), + FLAG_ENTRY(SIGQUIT), + FLAG_ENTRY(SIGILL), + FLAG_ENTRY(SIGTRAP), + FLAG_ENTRY(SIGABRT), + FLAG_ENTRY(SIGBUS), + FLAG_ENTRY(SIGFPE), + FLAG_ENTRY(SIGKILL), + FLAG_ENTRY(SIGUSR1), + FLAG_ENTRY(SIGSEGV), + FLAG_ENTRY(SIGUSR2), + FLAG_ENTRY(SIGPIPE), + FLAG_ENTRY(SIGALRM), + FLAG_ENTRY(SIGTERM), + FLAG_ENTRY(SIGSTKFLT), + FLAG_ENTRY(SIGCHLD), + FLAG_ENTRY(SIGCONT), + FLAG_ENTRY(SIGSTOP), + FLAG_ENTRY(SIGTSTP), + FLAG_ENTRY(SIGTTIN), + FLAG_ENTRY(SIGTTOU), + FLAG_ENTRY(SIGURG), + FLAG_ENTRY(SIGXCPU), + FLAG_ENTRY(SIGXFSZ), + FLAG_ENTRY(SIGVTALRM), + FLAG_ENTRY(SIGPROF), + FLAG_ENTRY(SIGWINCH), + FLAG_ENTRY(SIGIO), + FLAG_ENTRY(SIGPWR), + FLAG_ENTRY(SIGSYS)); + + if(std::strcmp(parg.name, "")) { + fmt::format_to(buffer, "{}=", parg.name); + } + + format_flag(buffer, parg.value, flag_names); + return; +} + + +/** + * format_sigproc_how_arg_to - format a 'sigproc how' argument + * + * Format a 'sigproc how' argument (such as those passed to sigprocmask()) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_sigproc_how_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + + /* Names for sigproc how args */ + const auto flag_names = + utils::make_array( + FLAG_ENTRY(SIG_BLOCK), + FLAG_ENTRY(SIG_UNBLOCK), + FLAG_ENTRY(SIG_SETMASK)); + + fmt::format_to(buffer, "{}=", parg.name); + format_flag(buffer, parg.value, flag_names); + return; +} + +/** + * format_none_arg_to - format a 'none' argument + * + * Format a 'none' argument and append the resulting "void" string to the + * provided buffer. + */ +template +inline void +format_none_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + fmt::format_to(buffer, "void"); +} + + +/** + * format_fd_arg_to - format a 'fd' argument + * + * Format a 'fd' argument (such as those passed to read()) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_fd_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + fmt::format_to(buffer, "{}={}", parg.name, static_cast(parg.value)); +} + + +/** + * format_atfd_arg_to - format a 'at_fd' argument + * + * Format a 'at_fd' argument (such as those passed to openat()) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_atfd_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + + if(static_cast(parg.value) == AT_FDCWD) { + fmt::format_to(buffer, "{}=AT_FDCWD", parg.name); + return; + } + + fmt::format_to(buffer, "{}={}", parg.name, static_cast(parg.value)); +} + + +/** + * format_cstr_arg_to - format a 'cstr' argument + * + * Format a 'cstr' argument (i.e. a null-terminated C string) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_cstr_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + + if(LIKELY(reinterpret_cast(parg.value) != nullptr)) { + fmt::format_to(buffer, "{}=\"{}\"", parg.name, + reinterpret_cast(parg.value)); + return; + } + + fmt::format_to(buffer, "{}=NULL", parg.name); +} + +/** + * format_open_flags_to - format a 'flags' argument + * + * Format a 'flags' argument (such as those passed to open()) and append + * the resulting string to the provided buffer. + */ +template +inline void +format_open_flags_to(FmtBuffer& buffer, + const printable_arg& parg) { + + /* Names for O_ACCMODE args */ + const auto flag_names = + utils::make_array( + FLAG_ENTRY(O_RDONLY), + FLAG_ENTRY(O_WRONLY), + FLAG_ENTRY(O_RDWR)); + + const auto extra_flag_names = + utils::make_array( +#ifdef O_EXEC + FLAG_ENTRY(O_EXEC), +#endif +#ifdef O_SEARCH + FLAG_ENTRY(O_SEARCH), +#endif + FLAG_ENTRY(O_APPEND), + FLAG_ENTRY(O_CLOEXEC), + FLAG_ENTRY(O_CREAT), + FLAG_ENTRY(O_DIRECTORY), + FLAG_ENTRY(O_DSYNC), + FLAG_ENTRY(O_EXCL), + FLAG_ENTRY(O_NOCTTY), + FLAG_ENTRY(O_NOFOLLOW), + FLAG_ENTRY(O_NONBLOCK), + FLAG_ENTRY(O_RSYNC), + FLAG_ENTRY(O_SYNC), + FLAG_ENTRY(O_TRUNC) +#ifdef O_TTY_INIT + , FLAG_ENTRY(O_TTY_INIT) +#endif + ); + + long flags = parg.value; + + fmt::format_to(buffer, "{}=", parg.name); + format_flag(buffer, flags & O_ACCMODE, flag_names); + + flags &= ~O_ACCMODE; + +#ifdef O_TMPFILE + // processing it with the other flags can result in + // printing O_DIRECTORY when it should not be listed. + // + // See O_TMPFILE' definition in fcntl-linux.h : + // #define __O_TMPFILE (020000000 | __O_DIRECTORY) + if ((flags & O_TMPFILE) == O_TMPFILE) { + format_flag(buffer, O_TMPFILE, flag_names); + flags &= ~O_TMPFILE; + } +#endif // !O_TMPFILE + + if(flags != 0) { + fmt::format_to(buffer, "|", parg.name); + format_flag_set(buffer, flags, extra_flag_names); + } +} + +/** + * format_octal_mode_to - format a 'mode' argument + * + * Format a 'mode' argument (such as those passed to open()) and append the + * generated string to the provided buffer. + */ +template +inline void +format_octal_mode_to(FmtBuffer& buffer, + const printable_arg& parg) { + fmt::format_to(buffer, "{}={:#04o}", parg.name, parg.value); +} + +/** + * format_ptr_arg_to - format a 'ptr' argument + * + * Format a 'ptr' argument (i.e. a C pointer) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_ptr_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + + if(LIKELY(reinterpret_cast(parg.value) != nullptr)) { + fmt::format_to(buffer, "{}={}", parg.name, + reinterpret_cast(parg.value)); + return; + } + + fmt::format_to(buffer, "{}=NULL", parg.name); +} + + +/** + * format_dec_arg_to - format a 'dec' argument + * + * Format a 'dec' argument (i.e. an integer of unknwon size) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_dec_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + fmt::format_to(buffer, "{}={}", parg.name, parg.value); +} + + +/** + * format_dec32_arg_to - format a 'dec32' argument + * + * Format a 'dec32' argument (i.e. a 32-bit integer) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_dec32_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + fmt::format_to(buffer, "{}={}", parg.name, static_cast(parg.value)); +} + + +/** + * format_arg_to - format an arbitrary argument + * + * Format an arbitrary argument and append the resulting + * string to the provided buffer. + */ +template +inline void +format_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + fmt::format_to(buffer, "{}={:#x}", parg.name, parg.value); +} + +#undef FLAG_ENTRY +#undef LIKELY +#undef UNLIKELY + +} // namespace arg +} // namespace syscall +} // namespace gkfs + +#endif // GKFS_SYSCALLS_ARGS_HPP diff --git a/include/client/syscalls/decoder.hpp b/include/client/syscalls/decoder.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d7068962d19f6aa2f3848a422eafb334ff75da1b --- /dev/null +++ b/include/client/syscalls/decoder.hpp @@ -0,0 +1,110 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GKFS_SYSCALLS_DECODER_HPP +#define GKFS_SYSCALLS_DECODER_HPP + +#include +#include +#include + +namespace gkfs { +namespace syscall { + +namespace detail { + +/** a RAII saver/restorer of errno values */ +struct errno_saver { + errno_saver(int errnum) : + saved_errno_(errnum) { } + + ~errno_saver() { + errno = saved_errno_; + } + + const int saved_errno_; +}; + +} // namespace detail + +template +inline void +decode(FmtBuffer& buffer, + const long syscall_number, + const long argv[MAX_ARGS]) { + + detail::errno_saver _(errno); + + const auto sc = lookup_by_number(syscall_number, argv); + + fmt::format_to(buffer, "{}(", sc.name()); + + for(int i = 0; i < sc.num_args(); ++i) { + const auto& arg = sc.args().at(i); + + arg.formatter()(buffer, {arg.name(), argv[i]}); + + if(i < sc.num_args() - 1) { + fmt::format_to(buffer, ", "); + } + } + + fmt::format_to(buffer, ") = ?"); +} + +template +inline void +decode(FmtBuffer& buffer, + const long syscall_number, + const long argv[MAX_ARGS], + const long result) { + + detail::errno_saver _(errno); + + const auto sc = lookup_by_number(syscall_number, argv); + + fmt::format_to(buffer, "{}(", sc.name()); + + for(int i = 0; i < sc.num_args(); ++i) { + const auto& arg = sc.args().at(i); + + arg.formatter()(buffer, {arg.name(), argv[i]}); + + if(i < sc.num_args() - 1) { + fmt::format_to(buffer, ", "); + } + } + + if(never_returns(syscall_number)) { + fmt::format_to(buffer, ") = ?"); + return; + } + + if(error_code(result) != 0) { + fmt::format_to(buffer, ") = {} {} ({})", + static_cast(-1), + errno_name(-result), + errno_message(-result)); + return; + } + + fmt::format_to(buffer, ") = "); + const auto& ret = sc.return_type(); + ret.formatter()(buffer, result); + +} + +} // namespace syscall +} // namespace gkfs + +#endif // GKFS_SYSCALLS_DECODER_HPP diff --git a/include/client/syscalls/detail/syscall_info.h b/include/client/syscalls/detail/syscall_info.h new file mode 100644 index 0000000000000000000000000000000000000000..327cd9dbcf89d53bd2ccd43003591f0f20fd6b2c --- /dev/null +++ b/include/client/syscalls/detail/syscall_info.h @@ -0,0 +1,85 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef SYSCALLS_DETAIL_SYSCALL_INFO_H +#define SYSCALLS_DETAIL_SYSCALL_INFO_H + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#define MAX_SYSCALL_ARGS 6u + +typedef enum { + none = 0, /* no argument */ + fd, /* fd argument for non *at() syscalls */ + atfd, /* fd argument for *at() syscalls */ + cstr, /* a \0-terminated C string */ + open_flags, /* flags for open/create/mq_open ... */ + octal_mode, /* mode_t arguments */ + ptr, /* pointer arguments */ + dec, /* signed decimal number (aka. long) */ + dec32, /* signed 32-bit decimal number (aka. int) */ + offset, /* off_t arguments */ + whence, /* 'whence' argument in lseek-style syscalls */ + mmap_prot, /* protections for the mmap() family of syscalls */ + mmap_flags, /* flags for the mmap() family of syscalls */ + clone_flags, /* flags for the clone() syscall */ + signum, /* signal numbers */ + sigproc_how, /* sigprocmask argument */ + arg, /* generic argument, no special formatting */ + arg_type_max +} arg_type_t; + +typedef struct { + const arg_type_t a_type; + const char * const a_name; +} syscall_arg_t; + +typedef enum { + rnone, + rptr, + rdec, + ret_type_max, +} ret_type_t; + +typedef struct { + const ret_type_t r_type; +} syscall_ret_t; + +struct syscall_info { + const long s_nr; + const char * const s_name; + const int s_nargs; + const syscall_ret_t s_return_type; + const syscall_arg_t s_args[MAX_SYSCALL_ARGS]; +}; + +extern const struct syscall_info syscall_table[]; + +extern const struct syscall_info* +get_syscall_info(const long syscall_number, + const long* argv); + +extern const struct syscall_info* +get_syscall_info_by_name(const char* syscall_name); + +extern bool +syscall_never_returns(long); + + +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif // SYSCALLS_DETAIL_SYSCALL_INFO_H diff --git a/include/client/syscalls/errno.hpp b/include/client/syscalls/errno.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f1131866bbaa074bb138d872bcf37aa8a7a732aa --- /dev/null +++ b/include/client/syscalls/errno.hpp @@ -0,0 +1,581 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GKFS_SYSCALLS_ERRNO_HPP +#define GKFS_SYSCALLS_ERRNO_HPP + +#include +#include + +namespace gkfs { +namespace syscall { + +static const std::array errno_names = { + /* [ 0] = */ NULL, + /* [ 1] = */ "EPERM", + /* [ 2] = */ "ENOENT", + /* [ 3] = */ "ESRCH", + /* [ 4] = */ "EINTR", + /* [ 5] = */ "EIO", + /* [ 6] = */ "ENXIO", + /* [ 7] = */ "E2BIG", + /* [ 8] = */ "ENOEXEC", + /* [ 9] = */ "EBADF", + /* [ 10] = */ "ECHILD", + /* [ 11] = */ "EAGAIN", + /* [ 12] = */ "ENOMEM", + /* [ 13] = */ "EACCES", + /* [ 14] = */ "EFAULT", + /* [ 15] = */ "ENOTBLK", + /* [ 16] = */ "EBUSY", + /* [ 17] = */ "EEXIST", + /* [ 18] = */ "EXDEV", + /* [ 19] = */ "ENODEV", + /* [ 20] = */ "ENOTDIR", + /* [ 21] = */ "EISDIR", + /* [ 22] = */ "EINVAL", + /* [ 23] = */ "ENFILE", + /* [ 24] = */ "EMFILE", + /* [ 25] = */ "ENOTTY", + /* [ 26] = */ "ETXTBSY", + /* [ 27] = */ "EFBIG", + /* [ 28] = */ "ENOSPC", + /* [ 29] = */ "ESPIPE", + /* [ 30] = */ "EROFS", + /* [ 31] = */ "EMLINK", + /* [ 32] = */ "EPIPE", + /* [ 33] = */ "EDOM", + /* [ 34] = */ "ERANGE", + /* [ 35] = */ "EDEADLK", + /* [ 36] = */ "ENAMETOOLONG", + /* [ 37] = */ "ENOLCK", + /* [ 38] = */ "ENOSYS", + /* [ 39] = */ "ENOTEMPTY", + /* [ 40] = */ "ELOOP", + /* [ 41] = */ NULL, + /* [ 42] = */ "ENOMSG", + /* [ 43] = */ "EIDRM", + /* [ 44] = */ "ECHRNG", + /* [ 45] = */ "EL2NSYNC", + /* [ 46] = */ "EL3HLT", + /* [ 47] = */ "EL3RST", + /* [ 48] = */ "ELNRNG", + /* [ 49] = */ "EUNATCH", + /* [ 50] = */ "ENOCSI", + /* [ 51] = */ "EL2HLT", + /* [ 52] = */ "EBADE", + /* [ 53] = */ "EBADR", + /* [ 54] = */ "EXFULL", + /* [ 55] = */ "ENOANO", + /* [ 56] = */ "EBADRQC", + /* [ 57] = */ "EBADSLT", + /* [ 58] = */ NULL, + /* [ 59] = */ "EBFONT", + /* [ 60] = */ "ENOSTR", + /* [ 61] = */ "ENODATA", + /* [ 62] = */ "ETIME", + /* [ 63] = */ "ENOSR", + /* [ 64] = */ "ENONET", + /* [ 65] = */ "ENOPKG", + /* [ 66] = */ "EREMOTE", + /* [ 67] = */ "ENOLINK", + /* [ 68] = */ "EADV", + /* [ 69] = */ "ESRMNT", + /* [ 70] = */ "ECOMM", + /* [ 71] = */ "EPROTO", + /* [ 72] = */ "EMULTIHOP", + /* [ 73] = */ "EDOTDOT", + /* [ 74] = */ "EBADMSG", + /* [ 75] = */ "EOVERFLOW", + /* [ 76] = */ "ENOTUNIQ", + /* [ 77] = */ "EBADFD", + /* [ 78] = */ "EREMCHG", + /* [ 79] = */ "ELIBACC", + /* [ 80] = */ "ELIBBAD", + /* [ 81] = */ "ELIBSCN", + /* [ 82] = */ "ELIBMAX", + /* [ 83] = */ "ELIBEXEC", + /* [ 84] = */ "EILSEQ", + /* [ 85] = */ "ERESTART", + /* [ 86] = */ "ESTRPIPE", + /* [ 87] = */ "EUSERS", + /* [ 88] = */ "ENOTSOCK", + /* [ 89] = */ "EDESTADDRREQ", + /* [ 90] = */ "EMSGSIZE", + /* [ 91] = */ "EPROTOTYPE", + /* [ 92] = */ "ENOPROTOOPT", + /* [ 93] = */ "EPROTONOSUPPORT", + /* [ 94] = */ "ESOCKTNOSUPPORT", + /* [ 95] = */ "EOPNOTSUPP", + /* [ 96] = */ "EPFNOSUPPORT", + /* [ 97] = */ "EAFNOSUPPORT", + /* [ 98] = */ "EADDRINUSE", + /* [ 99] = */ "EADDRNOTAVAIL", + /* [100] = */ "ENETDOWN", + /* [101] = */ "ENETUNREACH", + /* [102] = */ "ENETRESET", + /* [103] = */ "ECONNABORTED", + /* [104] = */ "ECONNRESET", + /* [105] = */ "ENOBUFS", + /* [106] = */ "EISCONN", + /* [107] = */ "ENOTCONN", + /* [108] = */ "ESHUTDOWN", + /* [109] = */ "ETOOMANYREFS", + /* [110] = */ "ETIMEDOUT", + /* [111] = */ "ECONNREFUSED", + /* [112] = */ "EHOSTDOWN", + /* [113] = */ "EHOSTUNREACH", + /* [114] = */ "EALREADY", + /* [115] = */ "EINPROGRESS", + /* [116] = */ "ESTALE", + /* [117] = */ "EUCLEAN", + /* [118] = */ "ENOTNAM", + /* [119] = */ "ENAVAIL", + /* [120] = */ "EISNAM", + /* [121] = */ "EREMOTEIO", + /* [122] = */ "EDQUOT", + /* [123] = */ "ENOMEDIUM", + /* [124] = */ "EMEDIUMTYPE", + /* [125] = */ "ECANCELED", + /* [126] = */ "ENOKEY", + /* [127] = */ "EKEYEXPIRED", + /* [128] = */ "EKEYREVOKED", + /* [129] = */ "EKEYREJECTED", + /* [130] = */ "EOWNERDEAD", + /* [131] = */ "ENOTRECOVERABLE", + /* [132] = */ "ERFKILL", + /* [133] = */ "EHWPOISON", + /* [134] = */ NULL, + /* [135] = */ NULL, + /* [136] = */ NULL, + /* [137] = */ NULL, + /* [138] = */ NULL, + /* [139] = */ NULL, + /* [140] = */ NULL, + /* [141] = */ NULL, + /* [142] = */ NULL, + /* [143] = */ NULL, + /* [144] = */ NULL, + /* [145] = */ NULL, + /* [146] = */ NULL, + /* [147] = */ NULL, + /* [148] = */ NULL, + /* [149] = */ NULL, + /* [150] = */ NULL, + /* [151] = */ NULL, + /* [152] = */ NULL, + /* [153] = */ NULL, + /* [154] = */ NULL, + /* [155] = */ NULL, + /* [156] = */ NULL, + /* [157] = */ NULL, + /* [158] = */ NULL, + /* [159] = */ NULL, + /* [160] = */ NULL, + /* [161] = */ NULL, + /* [162] = */ NULL, + /* [163] = */ NULL, + /* [164] = */ NULL, + /* [165] = */ NULL, + /* [166] = */ NULL, + /* [167] = */ NULL, + /* [168] = */ NULL, + /* [169] = */ NULL, + /* [170] = */ NULL, + /* [171] = */ NULL, + /* [172] = */ NULL, + /* [173] = */ NULL, + /* [174] = */ NULL, + /* [175] = */ NULL, + /* [176] = */ NULL, + /* [177] = */ NULL, + /* [178] = */ NULL, + /* [179] = */ NULL, + /* [180] = */ NULL, + /* [181] = */ NULL, + /* [182] = */ NULL, + /* [183] = */ NULL, + /* [184] = */ NULL, + /* [185] = */ NULL, + /* [186] = */ NULL, + /* [187] = */ NULL, + /* [188] = */ NULL, + /* [189] = */ NULL, + /* [190] = */ NULL, + /* [191] = */ NULL, + /* [192] = */ NULL, + /* [193] = */ NULL, + /* [194] = */ NULL, + /* [195] = */ NULL, + /* [196] = */ NULL, + /* [197] = */ NULL, + /* [198] = */ NULL, + /* [199] = */ NULL, + /* [200] = */ NULL, + /* [201] = */ NULL, + /* [202] = */ NULL, + /* [203] = */ NULL, + /* [204] = */ NULL, + /* [205] = */ NULL, + /* [206] = */ NULL, + /* [207] = */ NULL, + /* [208] = */ NULL, + /* [209] = */ NULL, + /* [210] = */ NULL, + /* [211] = */ NULL, + /* [212] = */ NULL, + /* [213] = */ NULL, + /* [214] = */ NULL, + /* [215] = */ NULL, + /* [216] = */ NULL, + /* [217] = */ NULL, + /* [218] = */ NULL, + /* [219] = */ NULL, + /* [220] = */ NULL, + /* [221] = */ NULL, + /* [222] = */ NULL, + /* [223] = */ NULL, + /* [224] = */ NULL, + /* [225] = */ NULL, + /* [226] = */ NULL, + /* [227] = */ NULL, + /* [228] = */ NULL, + /* [229] = */ NULL, + /* [230] = */ NULL, + /* [231] = */ NULL, + /* [232] = */ NULL, + /* [233] = */ NULL, + /* [234] = */ NULL, + /* [235] = */ NULL, + /* [236] = */ NULL, + /* [237] = */ NULL, + /* [238] = */ NULL, + /* [239] = */ NULL, + /* [240] = */ NULL, + /* [241] = */ NULL, + /* [242] = */ NULL, + /* [243] = */ NULL, + /* [244] = */ NULL, + /* [245] = */ NULL, + /* [246] = */ NULL, + /* [247] = */ NULL, + /* [248] = */ NULL, + /* [249] = */ NULL, + /* [250] = */ NULL, + /* [251] = */ NULL, + /* [252] = */ NULL, + /* [253] = */ NULL, + /* [254] = */ NULL, + /* [255] = */ NULL, + /* [256] = */ NULL, + /* [257] = */ NULL, + /* [258] = */ NULL, + /* [259] = */ NULL, + /* [260] = */ NULL, + /* [261] = */ NULL, + /* [262] = */ NULL, + /* [263] = */ NULL, + /* [264] = */ NULL, + /* [265] = */ NULL, + /* [266] = */ NULL, + /* [267] = */ NULL, + /* [268] = */ NULL, + /* [269] = */ NULL, + /* [270] = */ NULL, + /* [271] = */ NULL, + /* [272] = */ NULL, + /* [273] = */ NULL, + /* [274] = */ NULL, + /* [275] = */ NULL, + /* [276] = */ NULL, + /* [277] = */ NULL, + /* [278] = */ NULL, + /* [279] = */ NULL, + /* [280] = */ NULL, + /* [281] = */ NULL, + /* [282] = */ NULL, + /* [283] = */ NULL, + /* [284] = */ NULL, + /* [285] = */ NULL, + /* [286] = */ NULL, + /* [287] = */ NULL, + /* [288] = */ NULL, + /* [289] = */ NULL, + /* [290] = */ NULL, + /* [291] = */ NULL, + /* [292] = */ NULL, + /* [293] = */ NULL, + /* [294] = */ NULL, + /* [295] = */ NULL, + /* [296] = */ NULL, + /* [297] = */ NULL, + /* [298] = */ NULL, + /* [299] = */ NULL, + /* [300] = */ NULL, + /* [301] = */ NULL, + /* [302] = */ NULL, + /* [303] = */ NULL, + /* [304] = */ NULL, + /* [305] = */ NULL, + /* [306] = */ NULL, + /* [307] = */ NULL, + /* [308] = */ NULL, + /* [309] = */ NULL, + /* [310] = */ NULL, + /* [311] = */ NULL, + /* [312] = */ NULL, + /* [313] = */ NULL, + /* [314] = */ NULL, + /* [315] = */ NULL, + /* [316] = */ NULL, + /* [317] = */ NULL, + /* [318] = */ NULL, + /* [319] = */ NULL, + /* [320] = */ NULL, + /* [321] = */ NULL, + /* [322] = */ NULL, + /* [323] = */ NULL, + /* [324] = */ NULL, + /* [325] = */ NULL, + /* [326] = */ NULL, + /* [327] = */ NULL, + /* [328] = */ NULL, + /* [329] = */ NULL, + /* [330] = */ NULL, + /* [331] = */ NULL, + /* [332] = */ NULL, + /* [333] = */ NULL, + /* [334] = */ NULL, + /* [335] = */ NULL, + /* [336] = */ NULL, + /* [337] = */ NULL, + /* [338] = */ NULL, + /* [339] = */ NULL, + /* [340] = */ NULL, + /* [341] = */ NULL, + /* [342] = */ NULL, + /* [343] = */ NULL, + /* [344] = */ NULL, + /* [345] = */ NULL, + /* [346] = */ NULL, + /* [347] = */ NULL, + /* [348] = */ NULL, + /* [349] = */ NULL, + /* [350] = */ NULL, + /* [351] = */ NULL, + /* [352] = */ NULL, + /* [353] = */ NULL, + /* [354] = */ NULL, + /* [355] = */ NULL, + /* [356] = */ NULL, + /* [357] = */ NULL, + /* [358] = */ NULL, + /* [359] = */ NULL, + /* [360] = */ NULL, + /* [361] = */ NULL, + /* [362] = */ NULL, + /* [363] = */ NULL, + /* [364] = */ NULL, + /* [365] = */ NULL, + /* [366] = */ NULL, + /* [367] = */ NULL, + /* [368] = */ NULL, + /* [369] = */ NULL, + /* [370] = */ NULL, + /* [371] = */ NULL, + /* [372] = */ NULL, + /* [373] = */ NULL, + /* [374] = */ NULL, + /* [375] = */ NULL, + /* [376] = */ NULL, + /* [377] = */ NULL, + /* [378] = */ NULL, + /* [379] = */ NULL, + /* [380] = */ NULL, + /* [381] = */ NULL, + /* [382] = */ NULL, + /* [383] = */ NULL, + /* [384] = */ NULL, + /* [385] = */ NULL, + /* [386] = */ NULL, + /* [387] = */ NULL, + /* [388] = */ NULL, + /* [389] = */ NULL, + /* [390] = */ NULL, + /* [391] = */ NULL, + /* [392] = */ NULL, + /* [393] = */ NULL, + /* [394] = */ NULL, + /* [395] = */ NULL, + /* [396] = */ NULL, + /* [397] = */ NULL, + /* [398] = */ NULL, + /* [399] = */ NULL, + /* [400] = */ NULL, + /* [401] = */ NULL, + /* [402] = */ NULL, + /* [403] = */ NULL, + /* [404] = */ NULL, + /* [405] = */ NULL, + /* [406] = */ NULL, + /* [407] = */ NULL, + /* [408] = */ NULL, + /* [409] = */ NULL, + /* [410] = */ NULL, + /* [411] = */ NULL, + /* [412] = */ NULL, + /* [413] = */ NULL, + /* [414] = */ NULL, + /* [415] = */ NULL, + /* [416] = */ NULL, + /* [417] = */ NULL, + /* [418] = */ NULL, + /* [419] = */ NULL, + /* [420] = */ NULL, + /* [421] = */ NULL, + /* [422] = */ NULL, + /* [423] = */ NULL, + /* [424] = */ NULL, + /* [425] = */ NULL, + /* [426] = */ NULL, + /* [427] = */ NULL, + /* [428] = */ NULL, + /* [429] = */ NULL, + /* [430] = */ NULL, + /* [431] = */ NULL, + /* [432] = */ NULL, + /* [433] = */ NULL, + /* [434] = */ NULL, + /* [435] = */ NULL, + /* [436] = */ NULL, + /* [437] = */ NULL, + /* [438] = */ NULL, + /* [439] = */ NULL, + /* [440] = */ NULL, + /* [441] = */ NULL, + /* [442] = */ NULL, + /* [443] = */ NULL, + /* [444] = */ NULL, + /* [445] = */ NULL, + /* [446] = */ NULL, + /* [447] = */ NULL, + /* [448] = */ NULL, + /* [449] = */ NULL, + /* [450] = */ NULL, + /* [451] = */ NULL, + /* [452] = */ NULL, + /* [453] = */ NULL, + /* [454] = */ NULL, + /* [455] = */ NULL, + /* [456] = */ NULL, + /* [457] = */ NULL, + /* [458] = */ NULL, + /* [459] = */ NULL, + /* [460] = */ NULL, + /* [461] = */ NULL, + /* [462] = */ NULL, + /* [463] = */ NULL, + /* [464] = */ NULL, + /* [465] = */ NULL, + /* [466] = */ NULL, + /* [467] = */ NULL, + /* [468] = */ NULL, + /* [469] = */ NULL, + /* [470] = */ NULL, + /* [471] = */ NULL, + /* [472] = */ NULL, + /* [473] = */ NULL, + /* [474] = */ NULL, + /* [475] = */ NULL, + /* [476] = */ NULL, + /* [477] = */ NULL, + /* [478] = */ NULL, + /* [479] = */ NULL, + /* [480] = */ NULL, + /* [481] = */ NULL, + /* [482] = */ NULL, + /* [483] = */ NULL, + /* [484] = */ NULL, + /* [485] = */ NULL, + /* [486] = */ NULL, + /* [487] = */ NULL, + /* [488] = */ NULL, + /* [489] = */ NULL, + /* [490] = */ NULL, + /* [491] = */ NULL, + /* [492] = */ NULL, + /* [493] = */ NULL, + /* [494] = */ NULL, + /* [495] = */ NULL, + /* [496] = */ NULL, + /* [497] = */ NULL, + /* [498] = */ NULL, + /* [499] = */ NULL, + /* [500] = */ NULL, + /* [501] = */ NULL, + /* [502] = */ NULL, + /* [503] = */ NULL, + /* [504] = */ NULL, + /* [505] = */ NULL, + /* [506] = */ NULL, + /* [507] = */ NULL, + /* [508] = */ NULL, + /* [509] = */ NULL, + /* [510] = */ NULL, + /* [511] = */ NULL, + /* [512] = */ "ERESTARTSYS", + /* [513] = */ "ERESTARTNOINTR", + /* [514] = */ "ERESTARTNOHAND", + /* [515] = */ "ENOIOCTLCMD", + /* [516] = */ "ERESTART_RESTARTBLOCK", + /* [517] = */ "EPROBE_DEFER", + /* [518] = */ "EOPENSTALE", + /* [519] = */ NULL, + /* [520] = */ NULL, + /* [521] = */ "EBADHANDLE", + /* [522] = */ "ENOTSYNC", + /* [523] = */ "EBADCOOKIE", + /* [524] = */ "ENOTSUPP", + /* [525] = */ "ETOOSMALL", + /* [526] = */ "ESERVERFAULT", + /* [527] = */ "EBADTYPE", + /* [528] = */ "EJUKEBOX", + /* [529] = */ "EIOCBQUEUED", + /* [530] = */ "ERECALLCONFLICT", +}; + +static inline std::string +errno_name(int errno_value) { + + const auto name = errno_names.at(errno_value); + + if(!name) { + return "EUNKNOWN"; + } + + return name; +} + +static inline std::string +errno_message(int errno_value) { + // 1024 should be more than enough for most locales + constexpr const std::size_t MAX_ERROR_MSG = 0x400; + std::array errstr; + char* msg = ::strerror_r(errno_value, errstr.data(), MAX_ERROR_MSG); + return std::string{msg}; +} + +} // namespace syscalls +} // namespace gkfs + +#endif // GKFS_SYSCALLS_ERRNO_HPP diff --git a/include/client/syscalls/rets.hpp b/include/client/syscalls/rets.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bbde7d681d816f0fb8631bdaa1b51661d9c1b504 --- /dev/null +++ b/include/client/syscalls/rets.hpp @@ -0,0 +1,136 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GKFS_SYSCALLS_RETS_HPP +#define GKFS_SYSCALLS_RETS_HPP + +#include +#include +#include + +namespace gkfs { +namespace syscall { +namespace ret { + +/** Allowed ret types (based on the values of the corresponding C enum) */ +enum class type { + none = ::ret_type_t::rnone, + ptr = ::ret_type_t::rptr, + dec = ::ret_type_t::rdec, +}; + +/* Some constant definitions for convenience */ +static constexpr auto none = type::none; +static constexpr auto ptr = type::ptr; +static constexpr auto dec = type::dec; + + +/** All ret formatters must follow this prototype */ +template +using formatter = + std::add_pointer_t; + + +/** forward declare formatters */ +template inline void +format_none_ret_to(FmtBuffer& buffer, long val); + +template inline void +format_ptr_ret_to(FmtBuffer& buffer, long val); + +template inline void +format_dec_ret_to(FmtBuffer& buffer, long val); + +/** Known formatters */ +template +static const constexpr +std::array, ret_type_max> formatters = { + /* [rnone] = */ format_none_ret_to, + /* [rptr] = */ format_ptr_ret_to, + /* [rdec] = */ format_dec_ret_to, +}; + +/** A return value descriptor */ +struct desc { + ret::type type_; + + ret::type + type() const { + return type_; + } + + bool + operator==(ret::type t) const { + return type_ == t; + } + + bool + operator!=(ret::type t) const { + return type_ != t; + } + + template + formatter + formatter() const { + const auto idx = static_cast(type_); + + // if the type is unknown fall back to the default formatter + if(idx < 0 || idx >= static_cast(formatters.size())) { + return format_dec_ret_to; + } + + assert(formatters.at(idx) != nullptr); + + return formatters.at(idx); + } +}; + + +/** Specific formatter implementations follow */ +#define LIKELY(x) __builtin_expect(!!(x), 1) +#define UNLIKELY(x) __builtin_expect(!!(x), 0) + +template +inline void +format_none_ret_to(FmtBuffer& buffer, + long val) { + fmt::format_to(buffer, "void"); +} + +template +inline void +format_ptr_ret_to(FmtBuffer& buffer, + long val) { + if(LIKELY(reinterpret_cast(val) != nullptr)) { + fmt::format_to(buffer, "{}", reinterpret_cast(val)); + return; + } + + fmt::format_to(buffer, "NULL"); +} + +template +inline void +format_dec_ret_to(FmtBuffer& buffer, + long val) { + fmt::format_to(buffer, "{}", val); +} + +#undef LIKELY +#undef UNLIKELY + +} // namespace ret +} // namespace syscall +} // namespace gkfs + +#endif // GKFS_SYSCALLS_RETS_HPP diff --git a/include/client/syscalls/syscall.hpp b/include/client/syscalls/syscall.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8bd0b1a6676f197285e84223a6c3895d22919246 --- /dev/null +++ b/include/client/syscalls/syscall.hpp @@ -0,0 +1,216 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GKFS_SYSCALL_HPP +#define GKFS_SYSCALL_HPP + +#include +#include +#include +#include + +namespace gkfs { +namespace syscall { + +static const auto constexpr MAX_ARGS = 6u; +using arg_list = std::array; + +struct descriptor : private ::syscall_info { + + long + number() const { + return s_nr; + } + + const char* + name() const { + return s_name; + } + + int + num_args() const { + return s_nargs; + } + + arg_list + args() const { + + std::array args; + + for(auto i = 0u; i < MAX_ARGS; ++i) { + args[i] = {static_cast(s_args[i].a_type), + s_args[i].a_name}; + } + + return args; + } + + ret::desc + return_type() const { + return ret::desc{static_cast(s_return_type.r_type)}; + } +}; + +static inline descriptor +lookup_by_number(const long syscall_number) { + const auto* info = ::get_syscall_info(syscall_number, nullptr); + return *reinterpret_cast(info); +} + +static inline descriptor +lookup_by_number(const long syscall_number, + const long argv[MAX_ARGS]) { + const auto* info = ::get_syscall_info(syscall_number, argv); + return *reinterpret_cast(info); +} + +static inline descriptor +lookup_by_name(const std::string syscall_name) { + const auto* info = ::get_syscall_info_by_name(syscall_name.c_str()); + return *reinterpret_cast(info); +} + +static inline bool +never_returns(const long syscall_number) { + const auto desc = lookup_by_number(syscall_number); + return desc.return_type() == ret::none; +} + +static inline bool +always_returns(const long syscall_number) { + return !never_returns(syscall_number); +} + +static inline bool +may_not_return(const long syscall_number) { + return syscall_number == SYS_execve +#ifdef SYS_execveat + || syscall_number == SYS_execveat +#endif + ; +} + + +// information about a syscall +enum class info : int { + unknown = 0x00000000, // no info (reset) + + // syscall origin + internal = 0x00000001, // syscall originates from GekkoFS' internals + external = 0x00000002, // syscall originates from client application + + // syscall target + kernel = 0x00000010, // syscall forwarded to the kernel + hook = 0x00000020, // syscall handled by GekkoFS + + // syscall state + executed = 0x00000100, // syscall has been executed + not_executed = 0x00000000, // syscall has not been executed + + // masks + origin_mask = 0x00000003, // mask for syscall's origin information + target_mask = 0x7ffffefc, // mask for syscall's target information + execution_mask = 0x00000100 // mask for syscall's execution state +}; + + +inline constexpr info +operator&(info t1, info t2) { + return info(static_cast(t1) & static_cast(t2)); +} + +inline constexpr info +operator|(info t1, info t2) { + return info(static_cast(t1) | static_cast(t2)); +} + +inline constexpr info +operator^(info t1, info t2) { + return info(static_cast(t1) ^ static_cast(t2)); +} + +inline constexpr info +operator~(info t1) { + return info(~static_cast(t1)); +} + +inline const info& +operator|=(info& t1, info t2) { + return t1 = t1 | t2; +} + +inline const info& +operator&=(info& t1, info t2) { + return t1 = t1 & t2; +} + +inline const info& +operator^=(info& t1, info t2) { + return t1 = t1 ^ t2; +} + + +static const auto constexpr no_info = info::unknown; +static const auto constexpr from_internal_code = info::internal; +static const auto constexpr from_external_code = info::external; +static const auto constexpr to_kernel = info::kernel; +static const auto constexpr to_hook = info::hook; + +static const auto constexpr executed = info::executed; +static const auto constexpr not_executed = info::not_executed; + +static const auto constexpr origin_mask = info::origin_mask; +static const auto constexpr target_mask = info::target_mask; +static const auto constexpr execution_mask = info::execution_mask; + +enum { + hooked = 0x0, + forward_to_kernel = 0x1 +}; + +static constexpr auto +origin(syscall::info info) { + return info & origin_mask; +} + +static constexpr auto +target(syscall::info info) { + return info & target_mask; +} + +static constexpr bool +is_handled_by_kernel(syscall::info info) { + return (info & target_mask) == to_kernel; +} + +static constexpr auto +execution_is_pending(syscall::info info) { + return (info & execution_mask) == not_executed; +} + +/* + * error_code - examines a return value from a syscall execution + * and returns an error code if said return value indicates an error. + */ +static inline int +error_code(long result) { + if (result < 0 && result >= -0x1000) + return (int)-result; + + return 0; +} + +} // namespace syscall +} // namespace gkfs + +#endif // GKFS_SYSCALL_HPP diff --git a/include/daemon/env.hpp b/include/daemon/env.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cb75bf547db914e0686081d5115b8e5a403a4782 --- /dev/null +++ b/include/daemon/env.hpp @@ -0,0 +1,34 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GKFS_DAEMON_ENV +#define GKFS_DAEMON_ENV + +#include + +#define ADD_PREFIX(str) DAEMON_ENV_PREFIX str + +/* Environment variables for the GekkoFS daemon */ +namespace gkfs { +namespace env { + +static constexpr auto HOSTS_FILE = ADD_PREFIX("HOSTS_FILE"); + +} // namespace env +} // namespace gkfs + +#undef ADD_PREFIX + +#endif // GKFS_DAEMON_ENV + + diff --git a/include/global/configure.hpp.in b/include/global/configure.hpp.in index 94051a741cbf5144960307d40d4b06cbdda6b146..accce2d46def5b412f9f367a126788fc9fbada90 100644 --- a/include/global/configure.hpp.in +++ b/include/global/configure.hpp.in @@ -60,13 +60,14 @@ #define RPC_DIRENTS_BUFF_SIZE (8 * 1024 * 1024) // 8 mega // environment prefixes -#define ENV_PREFIX "GKFS_" +#define CLIENT_ENV_PREFIX "LIBGKFS_" +#define DAEMON_ENV_PREFIX "GKFS_" // Log -#define DEFAULT_PRELOAD_LOG_PATH "/tmp/gkfs_preload.log" +#define DEFAULT_CLIENT_LOG_PATH "/tmp/gkfs_client.log" #define DEFAULT_DAEMON_LOG_PATH "/tmp/gkfs_daemon.log" -#define DEFAULT_PRELOAD_LOG_LEVEL 4 // info +#define DEFAULT_CLIENT_LOG_LEVEL "info,errors,critical,mercury" #define DEFAULT_DAEMON_LOG_LEVEL 4 // info #endif //FS_CONFIGURE_H diff --git a/include/global/env_util.hpp b/include/global/env_util.hpp index 70f26c8784fa52680312fb4511ad81ebfa5885bf..993699b20ec1c216e93e79f25a0b242ca48c4e32 100644 --- a/include/global/env_util.hpp +++ b/include/global/env_util.hpp @@ -11,16 +11,19 @@ SPDX-License-Identifier: MIT */ -#ifndef GKFS_ENV_UTIL_HPP -#define GKFS_ENV_UTIL_HPP +#ifndef GKFS_COMMON_ENV_UTIL_HPP +#define GKFS_COMMON_ENV_UTIL_HPP #include namespace gkfs { +namespace env { -std::string get_env(const std::string& env_name); -std::string get_env_own(const std::string& env_name); +std::string +get_var(const std::string& name, + const std::string& default_value = ""); -} +} // namespace env +} // namespace gkfs -#endif //IFS_ENV_UTIL_HPP +#endif // GKFS_COMMON_ENV_UTIL_HPP diff --git a/scripts/compile_dep.sh b/scripts/compile_dep.sh index a3293c2ba588bc4f223f3ef4a004b1d59aeea52e..17847159cd62f63e305b1d4c471aacb4fdec1ab7 100755 --- a/scripts/compile_dep.sh +++ b/scripts/compile_dep.sh @@ -1,8 +1,23 @@ #!/bin/bash +MOGON1_DEPS=( + "zstd" "lz4" "snappy" "bmi" "libfabric" "mercury" "argobots" "margo" + "rocksdb" "syscall_intercept date" +) + +MOGON2_DEPS=( + "zstd" "lz4" "snappy" "bmi" "mercury" "argobots" "margo" "rocksdb" + "syscall_intercept date" +) + +FH2_DEPS=( + "zstd" "lz4" "snappy" "bmi" "libfabric" "mercury" "argobots" "margo" + "rocksdb" "syscall_intercept date" +) + usage_short() { echo " -usage: compile_dep.sh [-h] [-n ] [-c ] [-j ] +usage: compile_dep.sh [-h] [-l] [-n ] [-c ] [-d ] [-j ] source_path install_path " } @@ -19,20 +34,52 @@ positional arguments: optional arguments: - -h, --help shows this help message and exits + -h, --help shows this help message and exits + -l, --list-dependencies + list dependencies available for building and installation -n , --na network layer that is used for communication. Valid: {bmi,ofi,all} defaults to 'all' -c , --cluster additional configurations for specific compute clusters supported clusters: {mogon1,mogon2,fh2} + -d , --dependency + build and install a specific dependency. If unspecified + all dependencies are built and installed. -j , --compilecores - number of cores that are used to compile the depdencies + number of cores that are used to compile the dependencies defaults to number of available cores -t, --test Perform libraries tests. " } + +list_dependencies() { + + echo "Available dependencies: " + + echo -n " Mogon 1: " + for d in "${MOGON1_DEPS[@]}" + do + echo -n "$d " + done + echo "" + + echo -n " Mogon 2: " + for d in "${MOGON2_DEPS[@]}" + do + echo -n "$d " + done + echo "" + + echo -n " fh2: " + for d in "${FH2_DEPS[@]}" + do + echo -n "$d " + done + echo "" +} + prepare_build_dir() { if [ ! -d "$1/build" ]; then mkdir $1/build @@ -52,6 +99,7 @@ find_cmake() { PATCH_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" PATCH_DIR="${PATCH_DIR}/patches" CLUSTER="" +DEPENDENCY="" NA_LAYER="" CORES="" SOURCE="" @@ -72,6 +120,15 @@ case ${key} in CLUSTER="$2" shift # past argument shift # past value + ;; + -d|--dependency) + if [[ -z "$2" ]]; then + echo "Missing argument for -d/--dependency option" + exit + fi + DEPENDENCY="$2" + shift # past argument + shift # past value ;; -j|--compilecores) CORES="$2" @@ -82,6 +139,10 @@ case ${key} in PERFORM_TEST=true shift ;; + -l|--list-dependencies) + list_dependencies + exit + ;; -h|--help) help_msg exit @@ -160,117 +221,164 @@ export LIBRARY_PATH="${LIBRARY_PATH}:${INSTALL}/lib:${INSTALL}/lib64" # Set cluster dependencies first if [[ ( "${CLUSTER}" == "mogon1" ) || ( "${CLUSTER}" == "fh2" ) || ( "${CLUSTER}" == "mogon2" ) ]]; then + # compile zstd - echo "############################################################ Installing: zstd" - CURR=${SOURCE}/zstd/build/cmake + if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "zstd" ) ]]; then + echo "############################################################ Installing: zstd" + CURR=${SOURCE}/zstd/build/cmake + prepare_build_dir ${CURR} + cd ${CURR}/build + $CMAKE -DCMAKE_INSTALL_PREFIX=${INSTALL} -DCMAKE_BUILD_TYPE:STRING=Release .. + make -j${CORES} + make install + fi + + # build lz4 + if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "zstd" ) ]]; then + echo "############################################################ Installing: zstd" + CURR=${SOURCE}/lz4 + cd ${CURR} + make -j${CORES} + make DESTDIR=${INSTALL} PREFIX="" install + fi + + # build snappy + if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "snappy" ) ]]; then + echo "############################################################ Installing: snappy" + CURR=${SOURCE}/snappy + prepare_build_dir ${CURR} + cd ${CURR}/build + $CMAKE -DCMAKE_INSTALL_PREFIX=${INSTALL} -DCMAKE_BUILD_TYPE:STRING=Release .. + make -j${CORES} + make install + fi +fi + +# build bmi +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "bmi" ) ]]; then + if [ "$NA_LAYER" == "bmi" ] || [ "$NA_LAYER" == "all" ]; then + USE_BMI="-DNA_USE_BMI:BOOL=ON" + echo "############################################################ Installing: BMI" + # BMI + CURR=${SOURCE}/bmi + prepare_build_dir ${CURR} + cd ${CURR} + ./prepare + cd ${CURR}/build + CFLAGS="${CFLAGS} -w" ../configure --prefix=${INSTALL} --enable-shared --disable-static --disable-karma --enable-bmi-only --enable-fast --disable-strict + make -j${CORES} + make install + fi +fi + +# build ofi +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "ofi" ) ]]; then + if [ "$NA_LAYER" == "ofi" ] || [ "$NA_LAYER" == "all" ]; then + USE_OFI="-DNA_USE_OFI:BOOL=ON" + # Mogon2 already has libfabric installed in a version that Mercury supports. + if [[ ("${CLUSTER}" != "mogon2") ]]; then + echo "############################################################ Installing: LibFabric" + #libfabric + CURR=${SOURCE}/libfabric + prepare_build_dir ${CURR} + cd ${CURR}/build + ../configure --prefix=${INSTALL} --enable-tcp=yes + make -j${CORES} + make install + [ "${PERFORM_TEST}" ] && make check + fi + fi +fi + + +# Mercury +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "mercury" ) ]]; then + + if [ "$NA_LAYER" == "bmi" ] || [ "$NA_LAYER" == "all" ]; then + USE_BMI="-DNA_USE_BMI:BOOL=ON" + fi + + if [ "$NA_LAYER" == "ofi" ] || [ "$NA_LAYER" == "all" ]; then + USE_OFI="-DNA_USE_OFI:BOOL=ON" + fi + + echo "############################################################ Installing: Mercury" + CURR=${SOURCE}/mercury prepare_build_dir ${CURR} cd ${CURR}/build - $CMAKE -DCMAKE_INSTALL_PREFIX=${INSTALL} -DCMAKE_BUILD_TYPE:STRING=Release .. + PKG_CONFIG_PATH=${INSTALL}/lib/pkgconfig $CMAKE \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + -DBUILD_TESTING:BOOL=ON \ + -DMERCURY_USE_SM_ROUTING:BOOL=ON \ + -DMERCURY_USE_SELF_FORWARD:BOOL=ON \ + -DMERCURY_USE_CHECKSUMS:BOOL=OFF \ + -DMERCURY_USE_BOOST_PP:BOOL=ON \ + -DMERCURY_USE_EAGER_BULK:BOOL=ON \ + -DBUILD_SHARED_LIBS:BOOL=ON \ + -DCMAKE_INSTALL_PREFIX=${INSTALL} \ + ${USE_BMI} ${USE_OFI} \ + .. make -j${CORES} make install - echo "############################################################ Installing: lz4" - CURR=${SOURCE}/lz4 - cd ${CURR} - make -j${CORES} - make DESTDIR=${INSTALL} PREFIX="" install - echo "############################################################ Installing: snappy" - CURR=${SOURCE}/snappy +fi + +# Argobots +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "argobots" ) ]]; then + echo "############################################################ Installing: Argobots" + CURR=${SOURCE}/argobots prepare_build_dir ${CURR} + cd ${CURR} + ./autogen.sh cd ${CURR}/build - $CMAKE -DCMAKE_INSTALL_PREFIX=${INSTALL} -DCMAKE_BUILD_TYPE:STRING=Release .. + ../configure --prefix=${INSTALL} --enable-perf-opt --disable-checks make -j${CORES} make install + [ "${PERFORM_TEST}" ] && make check fi -if [ "$NA_LAYER" == "bmi" ] || [ "$NA_LAYER" == "all" ]; then - USE_BMI="-DNA_USE_BMI:BOOL=ON" - echo "############################################################ Installing: BMI" - # BMI - CURR=${SOURCE}/bmi +# Margo +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "margo" ) ]]; then + echo "############################################################ Installing: Margo" + CURR=${SOURCE}/margo prepare_build_dir ${CURR} cd ${CURR} - ./prepare + ./prepare.sh cd ${CURR}/build - CFLAGS="${CFLAGS} -w" ../configure --prefix=${INSTALL} --enable-shared --disable-static --disable-karma --enable-bmi-only --enable-fast --disable-strict + ../configure --prefix=${INSTALL} PKG_CONFIG_PATH=${INSTALL}/lib/pkgconfig CFLAGS="${CFLAGS} -Wall -O3" make -j${CORES} make install + [ "${PERFORM_TEST}" ] && make check fi -if [ "$NA_LAYER" == "ofi" ] || [ "$NA_LAYER" == "all" ]; then - USE_OFI="-DNA_USE_OFI:BOOL=ON" - # Mogon2 already has libfabric installed in a version that Mercury supports. - if [[ ("${CLUSTER}" != "mogon2") ]]; then - echo "############################################################ Installing: LibFabric" - #libfabric - CURR=${SOURCE}/libfabric - prepare_build_dir ${CURR} - cd ${CURR}/build - ../configure --prefix=${INSTALL} --enable-tcp=yes - make -j${CORES} - make install - [ "${PERFORM_TEST}" ] && make check - fi +# Rocksdb +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "rocksdb" ) ]]; then + echo "############################################################ Installing: Rocksdb" + CURR=${SOURCE}/rocksdb + cd ${CURR} + make clean + USE_RTTI=1 make -j${CORES} static_lib + INSTALL_PATH=${INSTALL} make install fi -echo "############################################################ Installing: Mercury" +# syscall_intercept +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "syscall_intercept" ) ]]; then + echo "############################################################ Installing: Syscall_intercept" + CURR=${SOURCE}/syscall_intercept + prepare_build_dir ${CURR} + cd ${CURR}/build + $CMAKE -DCMAKE_INSTALL_PREFIX=${INSTALL} -DCMAKE_BUILD_TYPE:STRING=Debug -DBUILD_EXAMPLES:BOOL=OFF -DBUILD_TESTS:BOOK=OFF .. + make install +fi -# Mercury -CURR=${SOURCE}/mercury -prepare_build_dir ${CURR} -cd ${CURR}/build -$CMAKE \ - -DCMAKE_BUILD_TYPE:STRING=Release \ - -DBUILD_TESTING:BOOL=ON \ - -DMERCURY_USE_SM_ROUTING:BOOL=ON \ - -DMERCURY_USE_SELF_FORWARD:BOOL=ON \ - -DMERCURY_USE_CHECKSUMS:BOOL=OFF \ - -DMERCURY_USE_BOOST_PP:BOOL=ON \ - -DMERCURY_USE_EAGER_BULK:BOOL=ON \ - -DBUILD_SHARED_LIBS:BOOL=ON \ - -DCMAKE_INSTALL_PREFIX=${INSTALL} \ - ${USE_BMI} ${USE_OFI} \ - .. -make -j${CORES} -make install - -echo "############################################################ Installing: Argobots" +# date +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "date" ) ]]; then + echo "############################################################ Installing: date" + CURR=${SOURCE}/date + prepare_build_dir ${CURR} + cd ${CURR}/build + $CMAKE -DCMAKE_INSTALL_PREFIX=${INSTALL} -DCMAKE_CXX_STANDARD:STRING=14 -DUSE_SYSTEM_TZ_DB:BOOL=ON -DBUILD_SHARED_LIBS:BOOL=ON .. + make install +fi -# Argobots -CURR=${SOURCE}/argobots -prepare_build_dir ${CURR} -cd ${CURR} -./autogen.sh -cd ${CURR}/build -../configure --prefix=${INSTALL} --enable-perf-opt --disable-checks -make -j${CORES} -make install -[ "${PERFORM_TEST}" ] && make check - -echo "############################################################ Installing: Margo" -# Margo -CURR=${SOURCE}/margo -prepare_build_dir ${CURR} -cd ${CURR} -./prepare.sh -cd ${CURR}/build -../configure --prefix=${INSTALL} PKG_CONFIG_PATH=${INSTALL}/lib/pkgconfig CFLAGS="${CFLAGS} -Wall -O3" -make -j${CORES} -make install -[ "${PERFORM_TEST}" ] && make check - -echo "############################################################ Installing: Rocksdb" -# Rocksdb -CURR=${SOURCE}/rocksdb -cd ${CURR} -make clean -USE_RTTI=1 make -j${CORES} static_lib -INSTALL_PATH=${INSTALL} make install - -echo "############################################################ Installing: Syscall_intercept" -CURR=${SOURCE}/syscall_intercept -prepare_build_dir ${CURR} -cd ${CURR}/build -$CMAKE -DCMAKE_INSTALL_PREFIX=${INSTALL} -DCMAKE_BUILD_TYPE:STRING=Release -DBUILD_EXAMPLES:BOOL=OFF -DBUILD_TESTS:BOOK=OFF .. -make install echo "Done" diff --git a/scripts/dl_dep.sh b/scripts/dl_dep.sh index 6700ca07e7393e1275eb2de84948386efc80c20a..bcfbe0b365c8e33124d02409cbc48250bd271b21 100755 --- a/scripts/dl_dep.sh +++ b/scripts/dl_dep.sh @@ -4,6 +4,27 @@ COMMON_CURL_FLAGS="--silent --fail --show-error --location -O" COMMON_GIT_FLAGS="--quiet --single-branch" +PATCH_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +PATCH_DIR="${PATCH_DIR}/patches" +CLUSTER="" +DEPENDENCY="" +NA_LAYER="" + +MOGON1_DEPS=( + "zstd" "lz4" "snappy" "bmi" "libfabric" "mercury" "argobots" "margo" + "rocksdb" "syscall_intercept date" +) + +MOGON2_DEPS=( + "zstd" "lz4" "snappy" "bmi" "mercury" "argobots" "margo" "rocksdb" + "syscall_intercept date" +) + +FH2_DEPS=( + "zstd" "lz4" "snappy" "bmi" "libfabric" "mercury" "argobots" "margo" + "rocksdb" "syscall_intercept date" +) + # Stop all backround jobs on interruption. # "kill -- -$$" sends a SIGTERM to the whole process group, @@ -23,14 +44,41 @@ error_exit() { exit "${2:-1}" ## Return a code specified by $2 or 1 by default. } +list_dependencies() { + + echo "Available dependencies: " + + echo -n " Mogon 1: " + for d in "${MOGON1_DEPS[@]}" + do + echo -n "$d " + done + echo "" + + echo -n " Mogon 2: " + for d in "${MOGON2_DEPS[@]}" + do + echo -n "$d " + done + echo "" + + echo -n " fh2: " + for d in "${FH2_DEPS[@]}" + do + echo -n "$d " + done + echo "" +} + clonedeps() { - set -e + set -ex trap exit_child EXIT local FOLDER=$1 local REPO=$2 local COMMIT=$3 local GIT_FLAGS=$4 + local PATCH=$5 local ACTION @@ -44,6 +92,12 @@ clonedeps() { # fix the version cd "${SOURCE}/${FOLDER}" && git checkout -qf ${COMMIT} echo "${ACTION} ${FOLDER} [$COMMIT]" + + # apply patch if provided + if [ ! -z ${PATCH} ]; then + git apply --verbose ${PATCH_DIR}/${PATCH} + fi + } wgetdeps() { @@ -69,7 +123,7 @@ wgetdeps() { usage_short() { echo " -usage: dl_dep.sh [-h] [-n ] [-c ] +usage: dl_dep.sh [-h] [-l] [-n ] [-c ] [-d ] source_path " } @@ -86,16 +140,19 @@ positional arguments: optional arguments: -h, --help shows this help message and exits + -l, --list-dependencies + list dependencies available for download -n , --na network layer that is used for communication. Valid: {bmi,ofi,all} defaults to 'all' -c , --cluster additional configurations for specific compute clusters supported clusters: {mogon1,mogon2,fh2} + -d , --dependency + download a specific dependency. If unspecified + all dependencies are built and installed. " } -CLUSTER="" -NA_LAYER="" POSITIONAL=() while [[ $# -gt 0 ]] @@ -113,6 +170,19 @@ case ${key} in shift # past argument shift # past value ;; + -d|--dependency) + if [[ -z "$2" ]]; then + echo "Missing argument for -d/--dependency option" + exit + fi + DEPENDENCY="$2" + shift # past argument + shift # past value + ;; + -l|--list-dependencies) + list_dependencies + exit + ;; -h|--help) help_msg exit @@ -166,38 +236,72 @@ mkdir -p ${SOURCE} # get cluster dependencies if [[ ( "${CLUSTER}" == "mogon1" ) || ( "${CLUSTER}" == "mogon2" ) || ( "${CLUSTER}" == "fh2" ) ]]; then + # get zstd for fast compression in rocksdb - wgetdeps "zstd" "https://github.com/facebook/zstd/archive/v1.3.2.tar.gz" & + if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "zstd" ) ]]; then + wgetdeps "zstd" "https://github.com/facebook/zstd/archive/v1.3.2.tar.gz" & + fi + # get zlib for rocksdb - wgetdeps "lz4" "https://github.com/lz4/lz4/archive/v1.8.0.tar.gz" & + if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "zstd" ) ]]; then + wgetdeps "lz4" "https://github.com/lz4/lz4/archive/v1.8.0.tar.gz" & + fi + # get snappy for rocksdb - wgetdeps "snappy" "https://github.com/google/snappy/archive/1.1.7.tar.gz" & + if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "snappy" ) ]]; then + wgetdeps "snappy" "https://github.com/google/snappy/archive/1.1.7.tar.gz" & + fi fi #if [ "${CLUSTER}" == "fh2" ]; then # no distinct 3rd party software needed as of now. #fi # get BMI -if [ "${NA_LAYER}" == "bmi" ] || [ "${NA_LAYER}" == "all" ]; then - clonedeps "bmi" "https://xgitlab.cels.anl.gov/sds/bmi.git" "81ad0575fc57a69269a16208417cbcbefa51f9ea" & +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "bmi" ) ]]; then + if [ "${NA_LAYER}" == "bmi" ] || [ "${NA_LAYER}" == "all" ]; then + clonedeps "bmi" "https://xgitlab.cels.anl.gov/sds/bmi.git" "81ad0575fc57a69269a16208417cbcbefa51f9ea" & + fi fi + # get libfabric -if [ "${NA_LAYER}" == "ofi" ] || [ "${NA_LAYER}" == "all" ]; then - # No need to get libfabric for mogon2 as it is already installed - if [[ ("${CLUSTER}" != "mogon2") ]]; then - wgetdeps "libfabric" "https://github.com/ofiwg/libfabric/releases/download/v1.7.2/libfabric-1.7.2.tar.gz" & +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "ofi" ) ]]; then + if [ "${NA_LAYER}" == "ofi" ] || [ "${NA_LAYER}" == "all" ]; then + # No need to get libfabric for mogon2 as it is already installed + if [[ ("${CLUSTER}" != "mogon2") ]]; then + wgetdeps "libfabric" "https://github.com/ofiwg/libfabric/releases/download/v1.7.2/libfabric-1.7.2.tar.gz" & + fi fi fi + # get Mercury -clonedeps "mercury" "https://github.com/mercury-hpc/mercury" "9906f25b6f9c52079d57006f199b3ea47960c435" "--recurse-submodules" & +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "mercury" ) ]]; then + clonedeps "mercury" "https://github.com/mercury-hpc/mercury" "9906f25b6f9c52079d57006f199b3ea47960c435" "--recurse-submodules" & +fi + # get Argobots -wgetdeps "argobots" "https://github.com/pmodels/argobots/archive/v1.0rc1.tar.gz" & +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "argobots" ) ]]; then + wgetdeps "argobots" "https://github.com/pmodels/argobots/archive/v1.0rc1.tar.gz" & +fi + # get Margo -clonedeps "margo" "https://xgitlab.cels.anl.gov/sds/margo.git" "6ed94e4f3a4d526b0a3b4e57be075461e86d3666" & +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "margo" ) ]]; then + clonedeps "margo" "https://xgitlab.cels.anl.gov/sds/margo.git" "6ed94e4f3a4d526b0a3b4e57be075461e86d3666" & +fi + # get rocksdb -wgetdeps "rocksdb" "https://github.com/facebook/rocksdb/archive/v6.1.2.tar.gz" & +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "rocksdb" ) ]]; then + wgetdeps "rocksdb" "https://github.com/facebook/rocksdb/archive/v6.1.2.tar.gz" & +fi + # get syscall_intercept -clonedeps "syscall_intercept" "https://github.com/pmem/syscall_intercept.git" "cc3412a2ad39f2e26cc307d5b155232811d7408e" & +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "syscall_intercept" ) ]]; then + clonedeps "syscall_intercept" "https://github.com/pmem/syscall_intercept.git" "cc3412a2ad39f2e26cc307d5b155232811d7408e" "" "syscall_intercept.patch" & +fi + +# get date +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "date" ) ]]; then + clonedeps "date" "https://github.com/HowardHinnant/date.git" "e7e1482087f58913b80a20b04d5c58d9d6d90155" & +fi # Wait for all download to be completed wait diff --git a/scripts/patches/syscall_intercept.patch b/scripts/patches/syscall_intercept.patch new file mode 100644 index 0000000000000000000000000000000000000000..00c7e26744dbc97703c3f6292f0dfed6046d5109 --- /dev/null +++ b/scripts/patches/syscall_intercept.patch @@ -0,0 +1,131 @@ +diff --git a/include/libsyscall_intercept_hook_point.h b/include/libsyscall_intercept_hook_point.h +index 2fe7d57..43a8974 100644 +--- a/include/libsyscall_intercept_hook_point.h ++++ b/include/libsyscall_intercept_hook_point.h +@@ -56,9 +56,18 @@ extern int (*intercept_hook_point)(long syscall_number, + long arg2, long arg3, + long arg4, long arg5, + long *result); +- +-extern void (*intercept_hook_point_clone_child)(void); +-extern void (*intercept_hook_point_clone_parent)(long pid); ++extern void (*intercept_hook_point_clone_child)( ++ unsigned long flags, void *child_stack, ++ int *ptid, int *ctid, long newtls); ++extern void (*intercept_hook_point_clone_parent)( ++ unsigned long flags, void *child_stack, ++ int *ptid, int *ctid, long newtls, ++ long returned_pid); ++extern void (*intercept_hook_point_post_kernel)(long syscall_number, ++ long arg0, long arg1, ++ long arg2, long arg3, ++ long arg4, long arg5, ++ long result); + + /* + * syscall_no_intercept - syscall without interception +diff --git a/src/intercept.c b/src/intercept.c +index 41fd95d..c0cd865 100644 +--- a/src/intercept.c ++++ b/src/intercept.c +@@ -67,9 +67,23 @@ int (*intercept_hook_point)(long syscall_number, + long *result) + __attribute__((visibility("default"))); + +-void (*intercept_hook_point_clone_child)(void) ++void (*intercept_hook_point_clone_child)( ++ unsigned long flags, void *child_stack, ++ int *ptid, int *ctid, ++ long newtls) + __attribute__((visibility("default"))); +-void (*intercept_hook_point_clone_parent)(long) ++ ++void (*intercept_hook_point_clone_parent)( ++ unsigned long flags, void *child_stack, ++ int *ptid, int *ctid, ++ long newtls, long returned_pid) ++ __attribute__((visibility("default"))); ++ ++void (*intercept_hook_point_post_kernel)(long syscall_number, ++ long arg0, long arg1, ++ long arg2, long arg3, ++ long arg4, long arg5, ++ long result) + __attribute__((visibility("default"))); + + bool debug_dumps_on; +@@ -655,6 +669,21 @@ intercept_routine(struct context *context) + desc.args[3], + desc.args[4], + desc.args[5]); ++ ++ /* ++ * some users might want to execute code after a syscall has ++ * been forwarded to the kernel (for example, to check its ++ * return value). ++ */ ++ if (intercept_hook_point_post_kernel != NULL) ++ intercept_hook_point_post_kernel(desc.nr, ++ desc.args[0], ++ desc.args[1], ++ desc.args[2], ++ desc.args[3], ++ desc.args[4], ++ desc.args[5], ++ result); + } + + intercept_log_syscall(patch, &desc, KNOWN, result); +@@ -670,12 +699,26 @@ intercept_routine(struct context *context) + struct wrapper_ret + intercept_routine_post_clone(struct context *context) + { ++ struct syscall_desc desc; ++ get_syscall_in_context(context, &desc); ++ + if (context->rax == 0) { + if (intercept_hook_point_clone_child != NULL) +- intercept_hook_point_clone_child(); ++ intercept_hook_point_clone_child( ++ (unsigned long)desc.args[0], ++ (void *)desc.args[1], ++ (int *)desc.args[2], ++ (int *)desc.args[3], ++ desc.args[4]); + } else { + if (intercept_hook_point_clone_parent != NULL) +- intercept_hook_point_clone_parent(context->rax); ++ intercept_hook_point_clone_parent( ++ (unsigned long)desc.args[0], ++ (void *)desc.args[1], ++ (int *)desc.args[2], ++ (int *)desc.args[3], ++ desc.args[4], ++ context->rax); + } + + return (struct wrapper_ret){.rax = context->rax, .rdx = 1 }; +diff --git a/test/test_clone_thread_preload.c b/test/test_clone_thread_preload.c +index c7663a2..bff239e 100644 +--- a/test/test_clone_thread_preload.c ++++ b/test/test_clone_thread_preload.c +@@ -96,8 +96,18 @@ hook(long syscall_number, + * of the clone syscall. + */ + static void +-hook_child(void) ++hook_child(unsigned long clone_flags, ++ void *child_stack, ++ int *ptid, ++ int *ctid, ++ long newtls) + { ++ (void) clone_flags; ++ (void) child_stack; ++ (void) ptid; ++ (void) ctid; ++ (void) newtls; ++ + static const char msg[] = "clone_hook_child called\n"; + + assert(flags != -1); diff --git a/src/client/CMakeLists.txt b/src/client/CMakeLists.txt index 62a1bd1acd189ffe580b3e03afb1cffe8e4e139a..5ea177e7f0c018b6be218a763803e5820a7ce242 100644 --- a/src/client/CMakeLists.txt +++ b/src/client/CMakeLists.txt @@ -8,11 +8,14 @@ set(PRELOAD_SRC preload.cpp resolve.cpp preload_util.cpp + rpc/hg_rpcs.cpp rpc/ld_rpc_management.cpp rpc/ld_rpc_data_ws.cpp rpc/ld_rpc_metadentry.cpp ../global/rpc/rpc_utils.cpp ../global/path_util.cpp + logging.cpp + syscalls/detail/syscall_info.c ) set(PRELOAD_HEADERS ../../include/global/configure.hpp @@ -24,16 +27,25 @@ set(PRELOAD_HEADERS ../../include/client/preload_context.hpp ../../include/client/adafs_functions.hpp ../../include/client/intercept.hpp - ../../include/client/syscall_names.hpp ../../include/client/hooks.hpp ../../include/client/open_file_map.hpp ../../include/client/open_dir.hpp ../../include/client/preload.hpp ../../include/client/resolve.hpp ../../include/client/preload_util.hpp + ../../include/client/rpc/hg_rpcs.hpp ../../include/client/rpc/ld_rpc_management.hpp ../../include/client/rpc/ld_rpc_data_ws.hpp ../../include/client/rpc/ld_rpc_metadentry.hpp + ../../include/client/logging.hpp + ../../include/client/env.hpp + ../../include/client/make_array.hpp + ../../include/client/syscalls/args.hpp + ../../include/client/syscalls/decoder.hpp + ../../include/client/syscalls/errno.hpp + ../../include/client/syscalls/rets.hpp + ../../include/client/syscalls/syscall.hpp + ../../include/client/syscalls/detail/syscall_info.h ) add_library(gkfs_intercept SHARED ${PRELOAD_SRC} ${PRELOAD_HEADERS}) @@ -42,16 +54,16 @@ target_link_libraries(gkfs_intercept # internal metadata distributor - log_util env_util # external Syscall_intercept::Syscall_intercept dl - ${ABT_LIBRARIES} mercury - ${MARGO_LIBRARIES} + hermes + fmt Boost::boost # needed for tokenizer header Threads::Threads + Date::TZ ) target_include_directories(gkfs_intercept diff --git a/src/client/adafs_functions.cpp b/src/client/adafs_functions.cpp index 2331aeab1b475185bc2469d41f0ebf6d87fed6dc..f006442c3ec7443f8bd8abb70a8944acf6852a62 100644 --- a/src/client/adafs_functions.cpp +++ b/src/client/adafs_functions.cpp @@ -18,13 +18,12 @@ #include #include #include "client/preload_util.hpp" +#include #include #include #include #include -#include - #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) @@ -37,18 +36,26 @@ struct linux_dirent { char d_name[1]; }; +struct linux_dirent64 { + unsigned long long d_ino; + unsigned long long d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[1]; +}; + using namespace std; int adafs_open(const std::string& path, mode_t mode, int flags) { if(flags & O_PATH){ - CTX->log()->error("{}() `O_PATH` flag is not supported", __func__); + LOG(ERROR, "`O_PATH` flag is not supported"); errno = ENOTSUP; return -1; } if(flags & O_APPEND){ - CTX->log()->error("{}() `O_APPEND` flag is not supported", __func__); + LOG(ERROR, "`O_APPEND` flag is not supported"); errno = ENOTSUP; return -1; } @@ -59,7 +66,7 @@ int adafs_open(const std::string& path, mode_t mode, int flags) { if(errno == ENOENT) { exists = false; } else { - CTX->log()->error("{}() error while retriving stat to file", __func__); + LOG(ERROR, "Error while retriving stat to file"); return -1; } } @@ -75,14 +82,14 @@ int adafs_open(const std::string& path, mode_t mode, int flags) { assert(flags & O_CREAT); if(flags & O_DIRECTORY){ - CTX->log()->error("{}() O_DIRECTORY use with O_CREAT. NOT SUPPORTED", __func__); + LOG(ERROR, "O_DIRECTORY use with O_CREAT. NOT SUPPORTED"); errno = ENOTSUP; return -1; } // no access check required here. If one is using our FS they have the permissions. if(adafs_mk_node(path, mode | S_IFREG)) { - CTX->log()->error("{}() error creating non-existent file", __func__); + LOG(ERROR, "Error creating non-existent file"); return -1; } } else { @@ -97,7 +104,7 @@ int adafs_open(const std::string& path, mode_t mode, int flags) { #ifdef HAS_SYMLINKS if (md->is_link()) { if (flags & O_NOFOLLOW) { - CTX->log()->warn("{}() symlink found and O_NOFOLLOW flag was specified", __func__); + LOG(WARNING, "Symlink found and O_NOFOLLOW flag was specified"); errno = ELOOP; return -1; } @@ -115,7 +122,7 @@ int adafs_open(const std::string& path, mode_t mode, int flags) { if( (flags & O_TRUNC) && ((flags & O_RDWR) || (flags & O_WRONLY)) ) { if(adafs_truncate(path, md->size(), 0)) { - CTX->log()->error("{}() error truncating file", __func__); + LOG(ERROR, "Error truncating file"); return -1; } } @@ -138,11 +145,11 @@ int adafs_mk_node(const std::string& path, mode_t mode) { case S_IFBLK: case S_IFIFO: case S_IFSOCK: - CTX->log()->warn("{}() unsupported node type", __func__); + LOG(WARNING, "Unsupported node type"); errno = ENOTSUP; return -1; default: - CTX->log()->warn("{}() unrecognized node type", __func__); + LOG(WARNING, "Unrecognized node type"); errno = EINVAL; return -1; } @@ -150,12 +157,12 @@ int adafs_mk_node(const std::string& path, mode_t mode) { auto p_comp = dirname(path); auto md = adafs_metadata(p_comp); if (!md) { - CTX->log()->debug("{}() parent component does not exists: '{}'", __func__, p_comp); + LOG(DEBUG, "Parent component does not exist: '{}'", p_comp); errno = ENOENT; return -1; } if (!S_ISDIR(md->mode())) { - CTX->log()->debug("{}() parent component is not a direcotory: '{}'", __func__, p_comp); + LOG(DEBUG, "Parent component is not a directory: '{}'", p_comp); errno = ENOTDIR; return -1; } @@ -173,7 +180,7 @@ int adafs_rm_node(const std::string& path) { return -1; } bool has_data = S_ISREG(md->mode()) && (md->size() != 0); - return rpc_send::rm_node(path, !has_data); + return rpc_send::rm_node(path, !has_data, md->size()); } int adafs_access(const std::string& path, const int mask, bool follow_links) { @@ -216,7 +223,6 @@ std::shared_ptr adafs_metadata(const string& path, bool follow_links) } int adafs_statfs(struct statfs* buf) { - CTX->log()->trace("{}() called", __func__); auto blk_stat = rpc_send::chunk_stat(); buf->f_type = 0; buf->f_bsize = blk_stat.chunk_size; @@ -258,15 +264,12 @@ off_t adafs_lseek(unsigned int fd, off_t offset, unsigned int whence) { off_t adafs_lseek(shared_ptr adafs_fd, off_t offset, unsigned int whence) { switch (whence) { case SEEK_SET: - CTX->log()->debug("{}() whence is SEEK_SET", __func__); adafs_fd->pos(offset); break; case SEEK_CUR: - CTX->log()->debug("{}() whence is SEEK_CUR", __func__); adafs_fd->pos(adafs_fd->pos() + offset); break; case SEEK_END: { - CTX->log()->debug("{}() whence is SEEK_END", __func__); off64_t file_size; auto err = rpc_send::get_metadentry_size(adafs_fd->path(), file_size); if (err < 0) { @@ -277,17 +280,17 @@ off_t adafs_lseek(shared_ptr adafs_fd, off_t offset, unsigned int when break; } case SEEK_DATA: - CTX->log()->warn("{}() SEEK_DATA whence is not supported", __func__); + LOG(WARNING, "SEEK_DATA whence is not supported"); // We do not support this whence yet errno = EINVAL; return -1; case SEEK_HOLE: - CTX->log()->warn("{}() SEEK_HOLE whence is not supported", __func__); + LOG(WARNING, "SEEK_HOLE whence is not supported"); // We do not support this whence yet errno = EINVAL; return -1; default: - CTX->log()->warn("{}() unknown whence {}", __func__, whence); + LOG(WARNING, "Unknown whence value {:#x}", whence); errno = EINVAL; return -1; } @@ -303,12 +306,12 @@ int adafs_truncate(const std::string& path, off_t old_size, off_t new_size) { } if (rpc_send::decr_size(path, new_size)) { - CTX->log()->debug("{}() failed to decrease size", __func__); + LOG(DEBUG, "Failed to decrease size"); return -1; } if(rpc_send::trunc_data(path, old_size, new_size)){ - CTX->log()->debug("{}() failed to truncate data", __func__); + LOG(DEBUG, "Failed to truncate data"); return -1; } return 0; @@ -324,7 +327,7 @@ int adafs_truncate(const std::string& path, off_t length) { * length increased. */ if(length < 0) { - CTX->log()->debug("{}() length is negative: {}", __func__, length); + LOG(DEBUG, "Length is negative: {}", length); errno = EINVAL; return -1; } @@ -335,8 +338,7 @@ int adafs_truncate(const std::string& path, off_t length) { } auto size = md->size(); if(static_cast(length) > size) { - CTX->log()->debug("{}() length is greater then file size: {} > {}", - __func__, length, size); + LOG(DEBUG, "Length is greater then file size: {} > {}", length, size); errno = EINVAL; return -1; } @@ -354,24 +356,23 @@ int adafs_dup2(const int oldfd, const int newfd) { ssize_t adafs_pwrite(std::shared_ptr file, const char * buf, size_t count, off64_t offset) { if (file->type() != FileType::regular) { assert(file->type() == FileType::directory); - CTX->log()->warn("{}() cannot read from directory", __func__); + LOG(WARNING, "Cannot read from directory"); errno = EISDIR; return -1; } auto path = make_shared(file->path()); - CTX->log()->trace("{}() count: {}, offset: {}", __func__, count, offset); auto append_flag = file->get_flag(OpenFile_flags::append); ssize_t ret = 0; long updated_size = 0; ret = rpc_send::update_metadentry_size(*path, count, offset, append_flag, updated_size); if (ret != 0) { - CTX->log()->error("{}() update_metadentry_size failed with ret {}", __func__, ret); + LOG(ERROR, "update_metadentry_size() failed with ret {}", ret); return ret; // ERR } ret = rpc_send::write(*path, buf, append_flag, offset, count, updated_size); if (ret < 0) { - CTX->log()->warn("{}() rpc_send::write failed with ret {}", __func__, ret); + LOG(WARNING, "rpc_send::write() failed with ret {}", ret); } return ret; // return written size or -1 as error } @@ -400,8 +401,6 @@ ssize_t adafs_write(int fd, const void * buf, size_t count) { } ssize_t adafs_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset) { - CTX->log()->trace("{}() called with fd {}, op num {}, offset {}", - __func__, fd, iovcnt, offset); auto file = CTX->file_map()->get(fd); auto pos = offset; // keep truck of current position @@ -432,8 +431,7 @@ ssize_t adafs_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset) } ssize_t adafs_writev(int fd, const struct iovec * iov, int iovcnt) { - CTX->log()->trace("{}() called with fd {}, ops num {}", - __func__, fd, iovcnt); + auto adafs_fd = CTX->file_map()->get(fd); auto pos = adafs_fd->pos(); // retrieve the current offset auto ret = adafs_pwritev(fd, iov, iovcnt, pos); @@ -448,18 +446,18 @@ ssize_t adafs_writev(int fd, const struct iovec * iov, int iovcnt) { ssize_t adafs_pread(std::shared_ptr file, char * buf, size_t count, off64_t offset) { if (file->type() != FileType::regular) { assert(file->type() == FileType::directory); - CTX->log()->warn("{}() cannot read from directory", __func__); + LOG(WARNING, "Cannot read from directory"); errno = EISDIR; return -1; } - CTX->log()->trace("{}() count: {}, offset: {}", __func__, count, offset); + // Zeroing buffer before read is only relevant for sparse files. Otherwise sparse regions contain invalid data. #if defined(ZERO_BUFFER_BEFORE_READ) memset(buf, 0, sizeof(char)*count); #endif auto ret = rpc_send::read(file->path(), buf, offset, count); if (ret < 0) { - CTX->log()->warn("{}() rpc_send::read failed with ret {}", __func__, ret); + LOG(WARNING, "rpc_send::read() failed with ret {}", ret); } // XXX check that we don't try to read past end of the file return ret; // return read size or -1 as error @@ -488,7 +486,7 @@ int adafs_opendir(const std::string& path) { return -1; } if (!S_ISDIR(md->mode())) { - CTX->log()->debug("{}() path is not a directory", __func__); + LOG(DEBUG, "Path is not a directory"); errno = ENOTDIR; return -1; } @@ -501,12 +499,12 @@ int adafs_opendir(const std::string& path) { int adafs_rmdir(const std::string& path) { auto md = adafs_metadata(path); if (!md) { - CTX->log()->debug("{}() path does not exists: '{}'", __func__, path); + LOG(DEBUG, "Path '{}' does not exist: ", path); errno = ENOENT; return -1; } if (!S_ISDIR(md->mode())) { - CTX->log()->debug("{}() path is not a directory", __func__); + LOG(DEBUG, "Path '{}' is not a directory", path); errno = ENOTDIR; return -1; } @@ -517,14 +515,14 @@ int adafs_rmdir(const std::string& path) { errno = ENOTEMPTY; return -1; } - return rpc_send::rm_node(path, true); + return rpc_send::rm_node(path, true, 0); } int getdents(unsigned int fd, struct linux_dirent *dirp, unsigned int count) { - CTX->log()->trace("{}() called on fd: {}, count {}", __func__, fd, count); + auto open_dir = CTX->file_map()->get_dir(fd); if(open_dir == nullptr){ //Cast did not succeeded: open_file is a regular file @@ -557,7 +555,7 @@ int getdents(unsigned int fd, *(reinterpret_cast(current_dirp) + total_size - 1) = ((de.type() == FileType::regular)? DT_REG : DT_DIR); - CTX->log()->trace("{}() name {}: {}", __func__, pos, de.name()); + LOG(DEBUG, "name {}: {}", pos, de.name()); std::strcpy(&(current_dirp->d_name[0]), de.name().c_str()); ++pos; current_dirp->d_off = pos; @@ -572,6 +570,59 @@ int getdents(unsigned int fd, return written; } + +int getdents64(unsigned int fd, + struct linux_dirent64 *dirp, + unsigned int count) { + + auto open_dir = CTX->file_map()->get_dir(fd); + if(open_dir == nullptr){ + //Cast did not succeeded: open_file is a regular file + errno = EBADF; + return -1; + } + + auto pos = open_dir->pos(); + if (pos >= open_dir->size()) { + return 0; + } + + unsigned int written = 0; + struct linux_dirent64 * current_dirp = nullptr; + while(pos < open_dir->size()) { + DirEntry de = open_dir->getdent(pos); + auto total_size = ALIGN(offsetof(struct linux_dirent64, d_name) + + de.name().size() + 3, sizeof(long)); + if (total_size > (count - written)) { + //no enough space left on user buffer to insert next dirent + break; + } + current_dirp = reinterpret_cast( + reinterpret_cast(dirp) + written); + current_dirp->d_ino = std::hash()( + open_dir->path() + "/" + de.name()); + + current_dirp->d_reclen = total_size; + current_dirp->d_type = ((de.type() == FileType::regular)? DT_REG : DT_DIR); + + + + LOG(DEBUG, "name {}: {}", pos, de.name()); + std::strcpy(&(current_dirp->d_name[0]), de.name().c_str()); + ++pos; + current_dirp->d_off = pos; + written += total_size; + } + + if (written == 0) { + errno = EINVAL; + return -1; + } + open_dir->pos(pos); + return written; +} + + #ifdef HAS_SYMLINKS int adafs_mk_symlink(const std::string& path, const std::string& target_path) { @@ -586,7 +637,7 @@ int adafs_mk_symlink(const std::string& path, const std::string& target_path) { auto trg_mode = target_md->mode(); if (!(S_ISREG(trg_mode) || S_ISLNK(trg_mode))) { assert(S_ISDIR(trg_mode)); - CTX->log()->debug("{}() target path is a directory. Not supported", __func__); + LOG(DEBUG, "Target path is a directory. Not supported"); errno = ENOTSUP; return -1; } @@ -595,19 +646,19 @@ int adafs_mk_symlink(const std::string& path, const std::string& target_path) { auto p_comp = dirname(path); auto md = adafs_metadata(p_comp, false); if (md == nullptr) { - CTX->log()->debug("{}() parent component does not exist: '{}'", __func__, p_comp); + LOG(DEBUG, "Parent component does not exist: '{}'", p_comp); errno = ENOENT; return -1; } if (!S_ISDIR(md->mode())) { - CTX->log()->debug("{}() parent component is not a directory: '{}'", __func__, p_comp); + LOG(DEBUG, "Parent component is not a directory: '{}'", p_comp); errno = ENOTDIR; return -1; } auto link_md = adafs_metadata(path, false); if (link_md != nullptr) { - CTX->log()->debug("{}() Link exists: '{}'", __func__, p_comp); + LOG(DEBUG, "Link exists: '{}'", p_comp); errno = EEXIST; return -1; } @@ -619,17 +670,17 @@ int adafs_readlink(const std::string& path, char *buf, int bufsize) { init_ld_env_if_needed(); auto md = adafs_metadata(path, false); if (md == nullptr) { - CTX->log()->debug("{}() named link doesn't exists", __func__); + LOG(DEBUG, "Named link doesn't exist"); return -1; } if (!(md->is_link())) { - CTX->log()->debug("{}() The named file is not a symbolic link", __func__); + LOG(DEBUG, "The named file is not a symbolic link"); errno = EINVAL; return -1; } int path_size = md->target_path().size() + CTX->mountdir().size(); if (path_size >= bufsize) { - CTX->log()->warn("{}() destination buffer size is to short: {} < {}, {} ", __func__, bufsize, path_size, md->target_path()); + LOG(WARNING, "Destination buffer size is too short: {} < {}, {} ", bufsize, path_size, md->target_path()); errno = ENAMETOOLONG; return -1; } diff --git a/src/client/hooks.cpp b/src/client/hooks.cpp index 79e7125d616c94c2103ccfe5b9f8801274602e24..2f30cb70fd8450720d4411d1620acb7250184a24 100644 --- a/src/client/hooks.cpp +++ b/src/client/hooks.cpp @@ -13,6 +13,8 @@ #include "client/hooks.hpp" #include "client/preload.hpp" +#include "client/logging.hpp" + #include "client/adafs_functions.hpp" #include "client/resolve.hpp" #include "client/open_dir.hpp" @@ -30,8 +32,8 @@ static inline int with_errno(int ret) { int hook_openat(int dirfd, const char *cpath, int flags, mode_t mode) { - CTX->log()->trace("{}() called with fd: {}, path: {}, flags: {}, mode: {}", - __func__, dirfd, cpath, flags, mode); + LOG(DEBUG, "{}() called with fd: {}, path: \"{}\", flags: {}, mode: {}", + __func__, dirfd, cpath, flags, mode); std::string resolved; auto rstatus = CTX->relativize_fd_path(dirfd, cpath, resolved); @@ -49,23 +51,35 @@ int hook_openat(int dirfd, const char *cpath, int flags, mode_t mode) { return with_errno(adafs_open(resolved, mode, flags)); default: - CTX->log()->error("{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown: {}", __func__); return -EINVAL; } } int hook_close(int fd) { - CTX->log()->trace("{}() called with fd {}", __func__, fd); + + LOG(DEBUG, "{}() called with fd: {}", __func__, fd); + if(CTX->file_map()->exist(fd)) { // No call to the daemon is required CTX->file_map()->remove(fd); return 0; } + + if(CTX->is_internal_fd(fd)) { + // the client application (for some reason) is trying to close an + // internal fd: ignore it + return 0; + } + return syscall_no_intercept(SYS_close, fd); } int hook_stat(const char* path, struct stat* buf) { - CTX->log()->trace("{}() called with path '{}'", __func__, path); + + LOG(DEBUG, "{}() called with path: \"{}\", buf: {}", + __func__, path, fmt::ptr(buf)); + std::string rel_path; if (CTX->relativize_path(path, rel_path, false)) { return with_errno(adafs_stat(rel_path, buf)); @@ -74,7 +88,10 @@ int hook_stat(const char* path, struct stat* buf) { } int hook_lstat(const char* path, struct stat* buf) { - CTX->log()->trace("{}() called with path '{}'", __func__, path); + + LOG(DEBUG, "{}() called with path: \"{}\", buf: {}", + __func__, path, fmt::ptr(buf)); + std::string rel_path; if (CTX->relativize_path(path, rel_path)) { return with_errno(adafs_stat(rel_path, buf)); @@ -83,7 +100,10 @@ int hook_lstat(const char* path, struct stat* buf) { } int hook_fstat(unsigned int fd, struct stat* buf) { - CTX->log()->trace("{}() called with fd '{}'", __func__, fd); + + LOG(DEBUG, "{}() called with fd: {}, buf: {}", + __func__, fd, fmt::ptr(buf)); + if (CTX->file_map()->exist(fd)) { auto path = CTX->file_map()->get(fd)->path(); return with_errno(adafs_stat(path, buf)); @@ -92,10 +112,12 @@ int hook_fstat(unsigned int fd, struct stat* buf) { } int hook_fstatat(int dirfd, const char * cpath, struct stat * buf, int flags) { - CTX->log()->trace("{}() called with path '{}' and fd {}", __func__, cpath, dirfd); + + LOG(DEBUG, "{}() called with path: \"{}\", fd: {}, buf: {}, flags: {}", + __func__, cpath, dirfd, fmt::ptr(buf), flags); if(flags & AT_EMPTY_PATH) { - CTX->log()->error("{}() AT_EMPTY_PATH flag not supported", __func__); + LOG(ERROR, "{}() AT_EMPTY_PATH flag not supported", __func__); return -ENOTSUP; } @@ -115,13 +137,16 @@ int hook_fstatat(int dirfd, const char * cpath, struct stat * buf, int flags) { return with_errno(adafs_stat(resolved, buf)); default: - CTX->log()->error("{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown: {}", __func__); return -EINVAL; } } int hook_read(unsigned int fd, void* buf, size_t count) { - CTX->log()->trace("{}() called with fd {}, count {}", __func__, fd, count); + + LOG(DEBUG, "{}() called with fd: {}, buf: {} count: {}", + __func__, fd, fmt::ptr(buf), count); + if (CTX->file_map()->exist(fd)) { return with_errno(adafs_read(fd, buf, count)); } @@ -129,8 +154,10 @@ int hook_read(unsigned int fd, void* buf, size_t count) { } int hook_pread(unsigned int fd, char * buf, size_t count, loff_t pos) { - CTX->log()->trace("{}() called with fd {}, count {}, pos {}", - __func__, fd, count, pos); + + LOG(DEBUG, "{}() called with fd: {}, buf: {}, count: {}, pos: {}", + __func__, fd, fmt::ptr(buf), count, pos); + if (CTX->file_map()->exist(fd)) { return with_errno(adafs_pread_ws(fd, buf, count, pos)); } @@ -139,7 +166,10 @@ int hook_pread(unsigned int fd, char * buf, size_t count, loff_t pos) { } int hook_write(unsigned int fd, const char * buf, size_t count) { - CTX->log()->trace("{}() called with fd {}, count {}", __func__, fd, count); + + LOG(DEBUG, "{}() called with fd: {}, buf: {}, count {}", + __func__, fd, fmt::ptr(buf), count); + if (CTX->file_map()->exist(fd)) { return with_errno(adafs_write(fd, buf, count)); } @@ -147,8 +177,10 @@ int hook_write(unsigned int fd, const char * buf, size_t count) { } int hook_pwrite(unsigned int fd, const char * buf, size_t count, loff_t pos) { - CTX->log()->trace("{}() called with fd {}, count {}, pos {}", - __func__, fd, count, pos); + + LOG(DEBUG, "{}() called with fd: {}, buf: {}, count: {}, pos: {}", + __func__, fd, fmt::ptr(buf), count, pos); + if (CTX->file_map()->exist(fd)) { return with_errno(adafs_pwrite_ws(fd, buf, count, pos)); } @@ -157,7 +189,10 @@ int hook_pwrite(unsigned int fd, const char * buf, size_t count, loff_t pos) { } int hook_writev(unsigned long fd, const struct iovec * iov, unsigned long iovcnt) { - CTX->log()->trace("{}() called with fd {}, ops_num {}", __func__, fd, iovcnt); + + LOG(DEBUG, "{}() called with fd: {}, iov: {}, iovcnt: {}", + __func__, fd, fmt::ptr(iov), iovcnt); + if (CTX->file_map()->exist(fd)) { return with_errno(adafs_writev(fd, iov, iovcnt)); } @@ -166,21 +201,25 @@ int hook_writev(unsigned long fd, const struct iovec * iov, unsigned long iovcnt int hook_pwritev(unsigned long fd, const struct iovec * iov, unsigned long iovcnt, unsigned long pos_l, unsigned long pos_h) { - CTX->log()->trace("{}() called with fd {}, ops_num {}, low position {}," - "high postion {}", __func__, fd, iovcnt, pos_l, pos_h); + + LOG(DEBUG, "{}() called with fd: {}, iov: {}, iovcnt: {}, " + "pos_l: {}," "pos_h: {}", + __func__, fd, fmt::ptr(iov), iovcnt, pos_l, pos_h); + if (CTX->file_map()->exist(fd)) { - CTX->log()->warn("{}() Not supported", __func__); + LOG(WARNING, "{}() Not supported", __func__); return -ENOTSUP; } return syscall_no_intercept(SYS_pwritev, fd, iov, iovcnt); } int hook_unlinkat(int dirfd, const char * cpath, int flags) { - CTX->log()->trace("{}() called with path '{}' dirfd {}, flags {}", - __func__, cpath, dirfd, flags); + + LOG(DEBUG, "{}() called with dirfd: {}, path: \"{}\", flags: {}", + __func__, dirfd, cpath, flags); if ((flags & ~AT_REMOVEDIR) != 0) { - CTX->log()->error("{}() Flags unknown: {}", __func__, flags); + LOG(ERROR, "{}() Flags unknown: {}", __func__, flags); return -EINVAL; } @@ -204,18 +243,19 @@ int hook_unlinkat(int dirfd, const char * cpath, int flags) { } default: - CTX->log()->error("{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown: {}", __func__); return -EINVAL; } } int hook_symlinkat(const char * oldname, int newdfd, const char * newname) { - CTX->log()->trace("{}() called with oldname '{}', new fd {}, new name '{}'", - __func__, oldname, newdfd, newname); + + LOG(DEBUG, "{}() called with oldname: \"{}\", newfd: {}, newname: \"{}\"", + __func__, oldname, newdfd, newname); std::string oldname_resolved; if (CTX->relativize_path(oldname, oldname_resolved)) { - CTX->log()->warn("{}() operation not supported", __func__); + LOG(WARNING, "{}() operation not supported", __func__); return -ENOTSUP; } @@ -232,18 +272,21 @@ int hook_symlinkat(const char * oldname, int newdfd, const char * newname) { return -ENOTDIR; case RelativizeStatus::internal: - CTX->log()->warn("{}() operation not supported", __func__); + LOG(WARNING, "{}() operation not supported", __func__); return -ENOTSUP; default: - CTX->log()->error("{}() relativize status unknown", __func__); + LOG(ERROR, "{}() relativize status unknown", __func__); return -EINVAL; } } int hook_access(const char* path, int mask) { - CTX->log()->trace("{}() called path '{}', mask {}", __func__, path, mask); + + LOG(DEBUG, "{}() called path: \"{}\", mask: {}", + __func__, path, mask); + std::string rel_path; if (CTX->relativize_path(path, rel_path)) { auto ret = adafs_access(rel_path, mask); @@ -256,8 +299,9 @@ int hook_access(const char* path, int mask) { } int hook_faccessat(int dirfd, const char * cpath, int mode) { - CTX->log()->trace("{}() called with path '{}' dirfd {}, mode {}", - __func__, cpath, dirfd, mode); + + LOG(DEBUG, "{}() called with dirfd: {}, path: \"{}\", mode: {}", + __func__, dirfd, cpath, mode); std::string resolved; auto rstatus = CTX->relativize_fd_path(dirfd, cpath, resolved); @@ -275,13 +319,16 @@ int hook_faccessat(int dirfd, const char * cpath, int mode) { return with_errno(adafs_access(resolved, mode)); default: - CTX->log()->error("{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown: {}", __func__); return -EINVAL; } } off_t hook_lseek(unsigned int fd, off_t offset, unsigned int whence) { - CTX->log()->trace("{}() called with fd {}, offset {}, whence {}", __func__, fd, offset, whence); + + LOG(DEBUG, "{}() called with fd: {}, offset: {}, whence: {}", + __func__, fd, offset, whence); + if (CTX->file_map()->exist(fd)) { auto off_ret = adafs_lseek(fd, static_cast(offset), whence); if (off_ret > std::numeric_limits::max()) { @@ -289,14 +336,17 @@ off_t hook_lseek(unsigned int fd, off_t offset, unsigned int whence) { } else if(off_ret < 0) { return -errno; } - CTX->log()->trace("{}() returning {}", __func__, off_ret); + LOG(DEBUG, "{}() returning {}", __func__, off_ret); return off_ret; } return syscall_no_intercept(SYS_lseek, fd, offset, whence); } int hook_truncate(const char* path, long length) { - CTX->log()->trace("{}() called with path: {}, offset: {}", __func__, path, length); + + LOG(DEBUG, "{}() called with path: {}, offset: {}", + __func__, path, length); + std::string rel_path; if (CTX->relativize_path(path, rel_path)) { return with_errno(adafs_truncate(rel_path, length)); @@ -305,7 +355,10 @@ int hook_truncate(const char* path, long length) { } int hook_ftruncate(unsigned int fd, unsigned long length) { - CTX->log()->trace("{}() called [fd: {}, offset: {}]", __func__, fd, length); + + LOG(DEBUG, "{}() called with fd: {}, offset: {}", + __func__, fd, length); + if (CTX->file_map()->exist(fd)) { auto path = CTX->file_map()->get(fd)->path(); return with_errno(adafs_truncate(path, length)); @@ -314,7 +367,10 @@ int hook_ftruncate(unsigned int fd, unsigned long length) { } int hook_dup(unsigned int fd) { - CTX->log()->trace("{}() called with oldfd {}", __func__, fd); + + LOG(DEBUG, "{}() called with oldfd: {}", + __func__, fd); + if (CTX->file_map()->exist(fd)) { return with_errno(adafs_dup(fd)); } @@ -322,7 +378,10 @@ int hook_dup(unsigned int fd) { } int hook_dup2(unsigned int oldfd, unsigned int newfd) { - CTX->log()->trace("{}() called with fd {} newfd {}", __func__, oldfd, newfd); + + LOG(DEBUG, "{}() called with oldfd: {}, newfd: {}", + __func__, oldfd, newfd); + if (CTX->file_map()->exist(oldfd)) { return with_errno(adafs_dup2(oldfd, newfd)); } @@ -330,26 +389,47 @@ int hook_dup2(unsigned int oldfd, unsigned int newfd) { } int hook_dup3(unsigned int oldfd, unsigned int newfd, int flags) { + + LOG(DEBUG, "{}() called with oldfd: {}, newfd: {}, flags: {}", + __func__, oldfd, newfd, flags); + if (CTX->file_map()->exist(oldfd)) { // TODO implement O_CLOEXEC flag first which is used with fcntl(2) // It is in glibc since kernel 2.9. So maybe not that important :) - CTX->log()->warn("{}() Not supported", __func__); + LOG(WARNING, "{}() Not supported", __func__); return -ENOTSUP; } return syscall_no_intercept(SYS_dup3, oldfd, newfd, flags); } int hook_getdents(unsigned int fd, struct linux_dirent *dirp, unsigned int count) { - CTX->log()->trace("{}() called with fd {}, count {}", __func__, fd, count); + + LOG(DEBUG, "{}() called with fd: {}, dirp: {}, count: {}", + __func__, fd, fmt::ptr(dirp), count); + if (CTX->file_map()->exist(fd)) { return with_errno(getdents(fd, dirp, count)); } return syscall_no_intercept(SYS_getdents, fd, dirp, count); } + +int hook_getdents64(unsigned int fd, struct linux_dirent64 *dirp, unsigned int count) { + + LOG(DEBUG, "{}() called with fd: {}, dirp: {}, count: {}", + __func__, fd, fmt::ptr(dirp), count); + + if (CTX->file_map()->exist(fd)) { + return with_errno(getdents64(fd, dirp, count)); + } + return syscall_no_intercept(SYS_getdents64, fd, dirp, count); +} + + int hook_mkdirat(int dirfd, const char * cpath, mode_t mode) { - CTX->log()->trace("{}() called with fd: {}, path: {}, mode: {}", - __func__, dirfd, cpath, mode); + + LOG(DEBUG, "{}() called with dirfd: {}, path: \"{}\", mode: {}", + __func__, dirfd, cpath, mode); std::string resolved; auto rstatus = CTX->relativize_fd_path(dirfd, cpath, resolved); @@ -367,13 +447,15 @@ int hook_mkdirat(int dirfd, const char * cpath, mode_t mode) { return with_errno(adafs_mk_node(resolved, mode | S_IFDIR)); default: - CTX->log()->error("{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown: {}", __func__); return -EINVAL; } } int hook_fchmodat(int dirfd, const char * cpath, mode_t mode) { - CTX->log()->trace("{}() called dirfd {}, path '{}', mode {}", __func__, dirfd, cpath, mode); + + LOG(DEBUG, "{}() called dirfd: {}, path: \"{}\", mode: {}", + __func__, dirfd, cpath, mode); std::string resolved; auto rstatus = CTX->relativize_fd_path(dirfd, cpath, resolved); @@ -388,37 +470,43 @@ int hook_fchmodat(int dirfd, const char * cpath, mode_t mode) { return -ENOTDIR; case RelativizeStatus::internal: - CTX->log()->warn("{}() operation not supported", __func__); + LOG(WARNING, "{}() operation not supported", __func__); return -ENOTSUP; default: - CTX->log()->error("{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown: {}", __func__); return -EINVAL; } } int hook_fchmod(unsigned int fd, mode_t mode) { - CTX->log()->trace("{}() called with fd {}, mode {}", __func__, fd, mode); + + LOG(DEBUG, "{}() called with fd: {}, mode: {}", + __func__, fd, mode); + if (CTX->file_map()->exist(fd)) { - CTX->log()->warn("{}() operation not supported", __func__); + LOG(WARNING, "{}() operation not supported", __func__); return -ENOTSUP; } return syscall_no_intercept(SYS_fchmod, fd, mode); } int hook_chdir(const char * path) { - CTX->log()->trace("{}() called with path '{}'", __func__, path); + + LOG(DEBUG, "{}() called with path: \"{}\"", + __func__, path); + std::string rel_path; bool internal = CTX->relativize_path(path, rel_path); if (internal) { //path falls in our namespace auto md = adafs_metadata(rel_path); if (md == nullptr) { - CTX->log()->error("{}() path does not exists", __func__); + LOG(ERROR, "{}() path does not exists", __func__); return -ENOENT; } if(!S_ISDIR(md->mode())) { - CTX->log()->error("{}() path is not a directory", __func__); + LOG(ERROR, "{}() path is not a directory", __func__); return -ENOTDIR; } //TODO get complete path from relativize_path instead of @@ -438,12 +526,15 @@ int hook_chdir(const char * path) { } int hook_fchdir(unsigned int fd) { - CTX->log()->trace("{}() called with fd {}", __func__, fd); + + LOG(DEBUG, "{}() called with fd: {}", + __func__, fd); + if (CTX->file_map()->exist(fd)) { auto open_dir = CTX->file_map()->get_dir(fd); if (open_dir == nullptr) { //Cast did not succeeded: open_file is a regular file - CTX->log()->error("{}() file descriptor refers to a normal file: '{}'", + LOG(ERROR, "{}() file descriptor refers to a normal file: '{}'", __func__, open_dir->path()); return -EBADF; } @@ -472,9 +563,12 @@ int hook_fchdir(unsigned int fd) { } int hook_getcwd(char * buf, unsigned long size) { - CTX->log()->trace("{}() called with size {}", __func__, size); + + LOG(DEBUG, "{}() called with buf: {}, size: {}", + __func__, fmt::ptr(buf), size); + if(CTX->cwd().size() + 1 > size) { - CTX->log()->error("{}() buffer too small to host current working dir", __func__); + LOG(ERROR, "{}() buffer too small to host current working dir", __func__); return -ERANGE; } @@ -483,8 +577,9 @@ int hook_getcwd(char * buf, unsigned long size) { } int hook_readlinkat(int dirfd, const char * cpath, char * buf, int bufsiz) { - CTX->log()->trace("{}() called with path '{}' dirfd {}, bufsize {}", - __func__, cpath, dirfd, bufsiz); + + LOG(DEBUG, "{}() called with dirfd: {}, path \"{}\", buf: {}, bufsize: {}", + __func__, dirfd, cpath, fmt::ptr(buf), bufsiz); std::string resolved; auto rstatus = CTX->relativize_fd_path(dirfd, cpath, resolved, false); @@ -499,17 +594,20 @@ int hook_readlinkat(int dirfd, const char * cpath, char * buf, int bufsiz) { return -ENOTDIR; case RelativizeStatus::internal: - CTX->log()->warn("{}() not supported", __func__); + LOG(WARNING, "{}() not supported", __func__); return -ENOTSUP; default: - CTX->log()->error("{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown: {}", __func__); return -EINVAL; } } int hook_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) { - CTX->log()->trace("{}() called with fd {}, cmd {}, arg {}", __func__, fd, cmd, arg); + + LOG(DEBUG, "{}() called with fd: {}, cmd: {}, arg: {}", + __func__, fd, cmd, arg); + if (!CTX->file_map()->exist(fd)) { return syscall_no_intercept(SYS_fcntl, fd, cmd, arg); } @@ -517,11 +615,11 @@ int hook_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) { switch (cmd) { case F_DUPFD: - CTX->log()->trace("{}() F_DUPFD on fd {}", __func__, fd); + LOG(DEBUG, "{}() F_DUPFD on fd {}", __func__, fd); return with_errno(adafs_dup(fd)); case F_DUPFD_CLOEXEC: - CTX->log()->trace("{}() F_DUPFD_CLOEXEC on fd {}", __func__, fd); + LOG(DEBUG, "{}() F_DUPFD_CLOEXEC on fd {}", __func__, fd); ret = adafs_dup(fd); if(ret == -1) { return -errno; @@ -530,7 +628,7 @@ int hook_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) { return ret; case F_GETFD: - CTX->log()->trace("{}() F_GETFD on fd {}", __func__, fd); + LOG(DEBUG, "{}() F_GETFD on fd {}", __func__, fd); if(CTX->file_map()->get(fd) ->get_flag(OpenFile_flags::cloexec)) { return FD_CLOEXEC; @@ -538,7 +636,7 @@ int hook_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) { return 0; case F_GETFL: - CTX->log()->trace("{}() F_GETFL on fd {}", __func__, fd); + LOG(DEBUG, "{}() F_GETFL on fd {}", __func__, fd); ret = 0; if(CTX->file_map()->get(fd) ->get_flag(OpenFile_flags::rdonly)) { @@ -555,7 +653,7 @@ int hook_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) { return ret; case F_SETFD: - CTX->log()->trace("{}() [fd: {}, cmd: F_SETFD, FD_CLOEXEC: {}]", + LOG(DEBUG, "{}() [fd: {}, cmd: F_SETFD, FD_CLOEXEC: {}]", __func__, fd, (arg & FD_CLOEXEC)); CTX->file_map()->get(fd) ->set_flag(OpenFile_flags::cloexec, (arg & FD_CLOEXEC)); @@ -563,7 +661,7 @@ int hook_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) { default: - CTX->log()->error("{}() unrecognized command {} on fd {}", + LOG(ERROR, "{}() unrecognized command {} on fd {}", __func__, cmd, fd); return -ENOTSUP; } @@ -573,8 +671,9 @@ int hook_renameat(int olddfd, const char * oldname, int newdfd, const char * newname, unsigned int flags) { - CTX->log()->trace("{}() called with olddfd {}, oldname: '{}', newfd {}, newname '{}', flags {}", - __func__, olddfd, oldname, newdfd, newname, flags); + LOG(DEBUG, "{}() called with olddfd: {}, oldname: \"{}\", newfd: {}, " + "newname \"{}\", flags {}", + __func__, olddfd, oldname, newdfd, newname, flags); const char * oldpath_pass; std::string oldpath_resolved; @@ -592,11 +691,11 @@ int hook_renameat(int olddfd, const char * oldname, return -ENOTDIR; case RelativizeStatus::internal: - CTX->log()->warn("{}() not supported", __func__); + LOG(WARNING, "{}() not supported", __func__); return -ENOTSUP; default: - CTX->log()->error("{}() relativize status unknown", __func__); + LOG(ERROR, "{}() relativize status unknown", __func__); return -EINVAL; } @@ -616,11 +715,11 @@ int hook_renameat(int olddfd, const char * oldname, return -ENOTDIR; case RelativizeStatus::internal: - CTX->log()->warn("{}() not supported", __func__); + LOG(WARNING, "{}() not supported", __func__); return -ENOTSUP; default: - CTX->log()->error("{}() relativize status unknown", __func__); + LOG(ERROR, "{}() relativize status unknown", __func__); return -EINVAL; } @@ -628,7 +727,10 @@ int hook_renameat(int olddfd, const char * oldname, } int hook_statfs(const char * path, struct statfs * buf) { - CTX->log()->trace("{}() called with path: {}", __func__, path); + + LOG(DEBUG, "{}() called with path: \"{}\", buf: {}", + __func__, path, fmt::ptr(buf)); + std::string rel_path; if (CTX->relativize_path(path, rel_path)) { return with_errno(adafs_statfs(buf)); @@ -637,7 +739,10 @@ int hook_statfs(const char * path, struct statfs * buf) { } int hook_fstatfs(unsigned int fd, struct statfs * buf) { - CTX->log()->trace("{}() called with fs: {}", __func__, fd); + + LOG(DEBUG, "{}() called with fd: {}, buf: {}", + __func__, fd, fmt::ptr(buf)); + if (CTX->file_map()->exist(fd)) { return with_errno(adafs_statfs(buf)); } diff --git a/src/client/intercept.cpp b/src/client/intercept.cpp index af8a59ccb68745b263a7906b83d288c0f3a90f69..f4da3893717f0717e90c3c1d90e72b8ba1315c15 100644 --- a/src/client/intercept.cpp +++ b/src/client/intercept.cpp @@ -15,346 +15,899 @@ #include "client/preload.hpp" #include "client/hooks.hpp" -#ifndef NDEBUG -#include "client/syscall_names.hpp" -#endif +#include #include #include #include +#include +#include +#include + +#include + +#include + +static thread_local bool reentrance_guard_flag; +static thread_local gkfs::syscall::info saved_syscall_info; + + +static constexpr void +save_current_syscall_info(gkfs::syscall::info info) { + saved_syscall_info = info; +} + +static constexpr void +reset_current_syscall_info() { + saved_syscall_info = gkfs::syscall::no_info; +} + +static inline gkfs::syscall::info +get_current_syscall_info() { + return saved_syscall_info; +} + + +/* + * hook_internal -- interception hook for internal syscalls + * + * This hook is basically used to keep track of file descriptors created + * internally by the library itself. This is important because some + * applications (e.g. ssh) may attempt to close all open file descriptors + * which would leave the library internals in an inconsistent state. + * We forward syscalls to the kernel but we keep track of any syscalls that may + * create or destroy a file descriptor so that we can mark them as 'internal'. + */ +static inline int +hook_internal(long syscall_number, + long arg0, long arg1, long arg2, + long arg3, long arg4, long arg5, + long *result) { + +#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) + const long args[gkfs::syscall::MAX_ARGS] = { + arg0, arg1, arg2, arg3, arg4, arg5 + }; +#endif + + LOG(SYSCALL, gkfs::syscall::from_internal_code | gkfs::syscall::to_hook | + gkfs::syscall::not_executed, syscall_number, args); + + switch (syscall_number) { + + case SYS_open: + *result = syscall_no_intercept(syscall_number, + reinterpret_cast(arg0), + static_cast(arg1), + static_cast(arg2)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + + break; + + case SYS_creat: + *result = syscall_no_intercept(syscall_number, + reinterpret_cast(arg0), + O_WRONLY | O_CREAT | O_TRUNC, + static_cast(arg1)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + + break; + + case SYS_openat: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2), + static_cast(arg3)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + + break; + + case SYS_epoll_create: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0)); + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } -#define NOT_HOOKED 1 -#define HOOKED 0 + break; + case SYS_epoll_create1: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0)); -static inline int hook(long syscall_number, + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + + break; + + case SYS_dup: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + + break; + + case SYS_dup2: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + static_cast(arg1)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + + break; + + case SYS_dup3: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + static_cast(arg1), + static_cast(arg2)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + + break; + + case SYS_inotify_init: + *result = syscall_no_intercept(syscall_number); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + + break; + + case SYS_inotify_init1: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + + break; + + case SYS_perf_event_open: + *result = syscall_no_intercept(syscall_number, + reinterpret_cast(arg0), + static_cast(arg1), + static_cast(arg2), + static_cast(arg3), + static_cast(arg4)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + break; + + case SYS_signalfd: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + reinterpret_cast(arg1)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + break; + + case SYS_signalfd4: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + break; + + case SYS_timerfd_create: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + static_cast(arg1)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + break; + + + case SYS_socket: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + static_cast(arg1), + static_cast(arg2)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + break; + + case SYS_socketpair: + + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + static_cast(arg1), + static_cast(arg2), + reinterpret_cast(arg3)); + + if(*result >= 0) { + reinterpret_cast(arg3)[0] = + CTX->register_internal_fd(reinterpret_cast(arg3)[0]); + reinterpret_cast(arg3)[1] = + CTX->register_internal_fd(reinterpret_cast(arg3)[1]); + } + + break; + + case SYS_pipe: + *result = syscall_no_intercept(syscall_number, + reinterpret_cast(arg0)); + + if(*result >= 0) { + reinterpret_cast(arg0)[0] = + CTX->register_internal_fd(reinterpret_cast(arg0)[0]); + reinterpret_cast(arg0)[1] = + CTX->register_internal_fd(reinterpret_cast(arg0)[1]); + } + + break; + + case SYS_pipe2: + + *result = syscall_no_intercept(syscall_number, + reinterpret_cast(arg0), + static_cast(arg1)); + if(*result >= 0) { + reinterpret_cast(arg0)[0] = + CTX->register_internal_fd(reinterpret_cast(arg0)[0]); + reinterpret_cast(arg0)[1] = + CTX->register_internal_fd(reinterpret_cast(arg0)[1]); + } + + break; + + case SYS_eventfd: + + *result = syscall_no_intercept(syscall_number, + static_cast(arg0)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + break; + + case SYS_eventfd2: + + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + static_cast(arg1)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + break; + + case SYS_recvmsg: + { + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + + // The recvmsg() syscall can receive file descriptors from another + // process that the kernel automatically adds to the client's fds + // as if dup2 had been called. Whenever that happens, we need to + // make sure that we register these additional fds as internal, or + // we could inadvertently overwrite them + if(*result >= 0) { + auto* hdr = reinterpret_cast(arg1); + struct cmsghdr* cmsg = CMSG_FIRSTHDR(hdr); + + for(; cmsg != NULL; cmsg = CMSG_NXTHDR(hdr, cmsg)) { + if(cmsg->cmsg_type == SCM_RIGHTS) { + + size_t nfd = cmsg->cmsg_len > CMSG_LEN(0) ? + (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int) : + 0; + + int* fds = + reinterpret_cast(CMSG_DATA(cmsg)); + + for(size_t i = 0; i < nfd; ++i) { + LOG(DEBUG, "recvmsg() provided extra fd {}", fds[i]); + + // ensure we update the fds in cmsg + // if they have been relocated + fds[i] = CTX->register_internal_fd(fds[i]); + } + } + } + } + + break; + } + + case SYS_accept: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + reinterpret_cast(arg1), + reinterpret_cast(arg2)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + break; + + case SYS_fcntl: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + static_cast(arg1), + arg2); + + if(*result >= 0 && + (static_cast(arg1) == F_DUPFD || + static_cast(arg1) == F_DUPFD_CLOEXEC)) { + *result = CTX->register_internal_fd(*result); + } + break; + + case SYS_close: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0)); + + if(*result == 0) { + CTX->unregister_internal_fd(arg0); + } + break; + + default: + // ignore any other syscalls, i.e.: pass them on to the kernel + // (syscalls forwarded to the kernel that return are logged in + // hook_forwarded_syscall()) + ::save_current_syscall_info( + gkfs::syscall::from_internal_code | + gkfs::syscall::to_kernel | + gkfs::syscall::not_executed); + return gkfs::syscall::forward_to_kernel; + } + + LOG(SYSCALL, gkfs::syscall::from_internal_code | + gkfs::syscall::to_hook | gkfs::syscall::executed, + syscall_number, args, *result); + + return gkfs::syscall::hooked; + +} + +/* + * hook -- interception hook for application syscalls + * + * This hook is used to implement any application filesystem-related syscalls. + */ +static inline +int hook(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5, - long *result) -{ + long *result) { + +#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) + const long args[gkfs::syscall::MAX_ARGS] = { + arg0, arg1, arg2, arg3, arg4, arg5 + }; +#endif + + LOG(SYSCALL, gkfs::syscall::from_external_code | + gkfs::syscall::to_hook | gkfs::syscall::not_executed, + syscall_number, args); switch (syscall_number) { - case SYS_open: - *result = hook_openat(AT_FDCWD, - reinterpret_cast(arg0), - static_cast(arg1), - static_cast(arg2)); - break; - - case SYS_creat: - *result = hook_openat(AT_FDCWD, - reinterpret_cast(arg0), - O_WRONLY | O_CREAT | O_TRUNC, - static_cast(arg1)); - break; - - case SYS_openat: - *result = hook_openat(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2), - static_cast(arg3)); - break; - - case SYS_close: - *result = hook_close(static_cast(arg0)); - break; - - case SYS_stat: - *result = hook_stat(reinterpret_cast(arg0), - reinterpret_cast(arg1)); - break; - - case SYS_lstat: - *result = hook_lstat(reinterpret_cast(arg0), - reinterpret_cast(arg1)); - break; - - case SYS_fstat: - *result = hook_fstat(static_cast(arg0), - reinterpret_cast(arg1)); - break; - - case SYS_newfstatat: - *result = hook_fstatat(static_cast(arg0), - reinterpret_cast(arg1), - reinterpret_cast(arg2), - static_cast(arg3)); - break; - - case SYS_read: - *result = hook_read(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2)); - break; - - case SYS_pread64: - *result = hook_pread(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2), - static_cast(arg3)); - break; - - case SYS_pwrite64: - *result = hook_pwrite(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2), - static_cast(arg3)); - break; - case SYS_write: - *result = hook_write(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2)); - break; - - case SYS_writev: - *result = hook_writev(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2)); - break; - - case SYS_pwritev: - *result = hook_pwritev(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2), - static_cast(arg3), - static_cast(arg4)); - break; - - case SYS_unlink: - *result = hook_unlinkat(AT_FDCWD, - reinterpret_cast(arg0), - 0); - break; + case SYS_execve: + *result = syscall_no_intercept(syscall_number, + reinterpret_cast(arg0), + reinterpret_cast(arg1), + reinterpret_cast(arg2)); + break; + +#ifdef SYS_execveat + case SYS_execveat: + *result = syscall_no_intercept(syscall_number, + arg0, + reinterpret_cast(arg1), + reinterpret_cast(arg2), + reinterpret_cast(arg3), + arg4); + break; +#endif + + case SYS_open: + *result = hook_openat(AT_FDCWD, + reinterpret_cast(arg0), + static_cast(arg1), + static_cast(arg2)); + break; - case SYS_unlinkat: - *result = hook_unlinkat(static_cast(arg0), + case SYS_creat: + *result = hook_openat(AT_FDCWD, + reinterpret_cast(arg0), + O_WRONLY | O_CREAT | O_TRUNC, + static_cast(arg1)); + break; + + case SYS_openat: + *result = hook_openat(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2), + static_cast(arg3)); + break; + + case SYS_close: + *result = hook_close(static_cast(arg0)); + break; + + case SYS_stat: + *result = hook_stat(reinterpret_cast(arg0), + reinterpret_cast(arg1)); + break; + + case SYS_lstat: + *result = hook_lstat(reinterpret_cast(arg0), + reinterpret_cast(arg1)); + break; + + case SYS_fstat: + *result = hook_fstat(static_cast(arg0), + reinterpret_cast(arg1)); + break; + + case SYS_newfstatat: + *result = hook_fstatat(static_cast(arg0), reinterpret_cast(arg1), + reinterpret_cast(arg2), + static_cast(arg3)); + break; + + case SYS_read: + *result = hook_read(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + break; + + case SYS_pread64: + *result = hook_pread(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2), + static_cast(arg3)); + break; + + case SYS_pwrite64: + *result = hook_pwrite(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2), + static_cast(arg3)); + break; + case SYS_write: + *result = hook_write(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + break; + + case SYS_writev: + *result = hook_writev(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + break; + + case SYS_pwritev: + *result = hook_pwritev(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2), + static_cast(arg3), + static_cast(arg4)); + break; + + case SYS_unlink: + *result = hook_unlinkat(AT_FDCWD, + reinterpret_cast(arg0), + 0); + break; + + case SYS_unlinkat: + *result = hook_unlinkat(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + break; + + case SYS_rmdir: + *result = hook_unlinkat(AT_FDCWD, + reinterpret_cast(arg0), + AT_REMOVEDIR); + break; + + case SYS_symlink: + *result = hook_symlinkat(reinterpret_cast(arg0), + AT_FDCWD, + reinterpret_cast(arg1)); + break; + + case SYS_symlinkat: + *result = hook_symlinkat(reinterpret_cast(arg0), + static_cast(arg1), + reinterpret_cast(arg2)); + break; + + case SYS_access: + *result = hook_access(reinterpret_cast(arg0), + static_cast(arg1)); + break; + + case SYS_faccessat: + *result = hook_faccessat(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + break; + + case SYS_lseek: + *result = hook_lseek(static_cast(arg0), + static_cast(arg1), + static_cast(arg2)); + break; + + case SYS_truncate: + *result = hook_truncate(reinterpret_cast(arg0), + static_cast(arg1)); + break; + + case SYS_ftruncate: + *result = hook_ftruncate(static_cast(arg0), + static_cast(arg1)); + break; + + case SYS_dup: + *result = hook_dup(static_cast(arg0)); + break; + + case SYS_dup2: + *result = hook_dup2(static_cast(arg0), + static_cast(arg1)); + break; + + case SYS_dup3: + *result = hook_dup3(static_cast(arg0), + static_cast(arg1), static_cast(arg2)); - break; - - case SYS_rmdir: - *result = hook_unlinkat(AT_FDCWD, + break; + + case SYS_getdents: + *result = hook_getdents(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + break; + + case SYS_getdents64: + *result = hook_getdents64(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + break; + + case SYS_mkdirat: + *result = hook_mkdirat(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + break; + + case SYS_mkdir: + *result = hook_mkdirat(AT_FDCWD, reinterpret_cast(arg0), - AT_REMOVEDIR); - break; - - case SYS_symlink: - *result = hook_symlinkat(reinterpret_cast(arg0), - AT_FDCWD, - reinterpret_cast(arg1)); - break; - - case SYS_symlinkat: - *result = hook_symlinkat(reinterpret_cast(arg0), - static_cast(arg1), - reinterpret_cast(arg2)); - break; - - case SYS_access: - *result = hook_access(reinterpret_cast(arg0), - static_cast(arg1)); - break; - - case SYS_faccessat: - *result = hook_faccessat(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2)); - break; - - case SYS_lseek: - *result = hook_lseek(static_cast(arg0), - static_cast(arg1), - static_cast(arg2)); - break; - - case SYS_truncate: - *result = hook_truncate(reinterpret_cast(arg0), - static_cast(arg1)); - break; - - case SYS_ftruncate: - *result = hook_ftruncate(static_cast(arg0), - static_cast(arg1)); - break; - - case SYS_dup: - *result = hook_dup(static_cast(arg0)); - break; - - case SYS_dup2: - *result = hook_dup2(static_cast(arg0), - static_cast(arg1)); - break; - - case SYS_dup3: - *result = hook_dup3(static_cast(arg0), - static_cast(arg1), - static_cast(arg2)); - break; - - case SYS_getdents: - *result = hook_getdents(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2)); - break; - - case SYS_mkdirat: - *result = hook_mkdirat(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2)); - break; - - case SYS_mkdir: - *result = hook_mkdirat(AT_FDCWD, - reinterpret_cast(arg0), - static_cast(arg1)); - break; - - case SYS_chmod: - *result = hook_fchmodat(AT_FDCWD, - reinterpret_cast(arg0), static_cast(arg1)); - break; + break; + + case SYS_chmod: + *result = hook_fchmodat(AT_FDCWD, + reinterpret_cast(arg0), + static_cast(arg1)); + break; + + case SYS_fchmod: + *result = hook_fchmod(static_cast(arg0), + static_cast(arg1)); + break; + + case SYS_fchmodat: + *result = hook_fchmodat(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + break; + + case SYS_chdir: + *result = hook_chdir(reinterpret_cast(arg0)); + break; + + case SYS_fchdir: + *result = hook_fchdir(static_cast(arg0)); + break; + + case SYS_getcwd: + *result = hook_getcwd(reinterpret_cast(arg0), + static_cast(arg1)); + break; + + case SYS_readlink: + *result = hook_readlinkat(AT_FDCWD, + reinterpret_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + break; + + case SYS_readlinkat: + *result = hook_readlinkat(static_cast(arg0), + reinterpret_cast(arg1), + reinterpret_cast(arg2), + static_cast(arg3)); + break; + + case SYS_fcntl: + *result = hook_fcntl(static_cast(arg0), + static_cast(arg1), + static_cast(arg2)); + break; + + case SYS_rename: + *result = hook_renameat(AT_FDCWD, + reinterpret_cast(arg0), + AT_FDCWD, + reinterpret_cast(arg1), + 0); + break; + + case SYS_renameat: + *result = hook_renameat(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2), + reinterpret_cast(arg3), + 0); + break; + + case SYS_renameat2: + *result = hook_renameat(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2), + reinterpret_cast(arg3), + static_cast(arg4)); + break; + + case SYS_fstatfs: + *result = hook_fstatfs(static_cast(arg0), + reinterpret_cast(arg1)); + break; + + case SYS_statfs: + *result = hook_statfs(reinterpret_cast(arg0), + reinterpret_cast(arg1)); + break; + + default: + // ignore any other syscalls, i.e.: pass them on to the kernel + // (syscalls forwarded to the kernel that return are logged in + // hook_forwarded_syscall()) + ::save_current_syscall_info( + gkfs::syscall::from_external_code | + gkfs::syscall::to_kernel | + gkfs::syscall::not_executed); + return gkfs::syscall::forward_to_kernel; + } - case SYS_fchmod: - *result = hook_fchmod(static_cast(arg0), - static_cast(arg1)); - break; + LOG(SYSCALL, gkfs::syscall::from_external_code | + gkfs::syscall::to_hook | gkfs::syscall::executed, + syscall_number, args, *result); - case SYS_fchmodat: - *result = hook_fchmodat(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2)); - break; - - case SYS_chdir: - *result = hook_chdir(reinterpret_cast(arg0)); - break; - - case SYS_fchdir: - *result = hook_fchdir(static_cast(arg0)); - break; - - case SYS_getcwd: - *result = hook_getcwd(reinterpret_cast(arg0), - static_cast(arg1)); - break; - - case SYS_readlink: - *result = hook_readlinkat(AT_FDCWD, - reinterpret_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2)); - break; - - case SYS_readlinkat: - *result = hook_readlinkat(static_cast(arg0), - reinterpret_cast(arg1), - reinterpret_cast(arg2), - static_cast(arg3)); - break; - - case SYS_fcntl: - *result = hook_fcntl(static_cast(arg0), - static_cast(arg1), - static_cast(arg2)); - break; - - case SYS_rename: - *result = hook_renameat(AT_FDCWD, - reinterpret_cast(arg0), - AT_FDCWD, - reinterpret_cast(arg1), - 0); - break; + return gkfs::syscall::hooked; +} - case SYS_renameat: - *result = hook_renameat(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2), - reinterpret_cast(arg3), - 0); - break; +static void +hook_forwarded_syscall(long syscall_number, + long arg0, long arg1, long arg2, + long arg3, long arg4, long arg5, + long result) +{ - case SYS_renameat2: - *result = hook_renameat(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2), - reinterpret_cast(arg3), - static_cast(arg4)); - break; - - case SYS_fstatfs: - *result = hook_fstatfs(static_cast(arg0), - reinterpret_cast(arg1)); - break; - - case SYS_statfs: - *result = hook_statfs(reinterpret_cast(arg0), - reinterpret_cast(arg1)); - break; - - default: - /* - * Ignore any other syscalls - * i.e.: pass them on to the kernel - * as would normally happen. - */ - - #ifndef NDEBUG - CTX->log()->trace("Syscall [{}, {}] Passthrough", syscall_names[syscall_number], syscall_number); - #endif - return NOT_HOOKED; + if(::get_current_syscall_info() == gkfs::syscall::no_info) { + return; } - #ifndef NDEBUG - CTX->log()->trace("Syscall [{}, {}] Intercepted", syscall_names[syscall_number], syscall_number); - #endif - return HOOKED; +#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) + const long args[gkfs::syscall::MAX_ARGS] = { + arg0, arg1, arg2, arg3, arg4, arg5 + }; +#endif + + LOG(SYSCALL, + ::get_current_syscall_info() | + gkfs::syscall::executed, + syscall_number, args, result); + + ::reset_current_syscall_info(); +} + +static void +hook_clone_at_child(unsigned long flags, + void* child_stack, + int* ptid, + int* ctid, + long newtls) { + +#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) + const long args[gkfs::syscall::MAX_ARGS] = { + static_cast(flags), + reinterpret_cast(child_stack), + reinterpret_cast(ptid), + reinterpret_cast(ctid), + static_cast(newtls), + 0}; +#endif + + reentrance_guard_flag = true; + + LOG(SYSCALL, + ::get_current_syscall_info() | + gkfs::syscall::executed, + SYS_clone, args, 0); + + reentrance_guard_flag = false; } +static void +hook_clone_at_parent(unsigned long flags, + void* child_stack, + int* ptid, + int* ctid, + long newtls, + long returned_pid) { + +#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) + const long args[gkfs::syscall::MAX_ARGS] = { + static_cast(flags), + reinterpret_cast(child_stack), + reinterpret_cast(ptid), + reinterpret_cast(ctid), + static_cast(newtls), + 0}; +#endif + + reentrance_guard_flag = true; + + LOG(SYSCALL, + ::get_current_syscall_info() | + gkfs::syscall::executed, + SYS_clone, args, returned_pid); + + reentrance_guard_flag = false; +} -static __thread bool guard_flag; +int +internal_hook_guard_wrapper(long syscall_number, + long arg0, long arg1, long arg2, + long arg3, long arg4, long arg5, + long *syscall_return_value) { + assert(CTX->interception_enabled()); + + + if (reentrance_guard_flag) { + ::save_current_syscall_info( + gkfs::syscall::from_internal_code | + gkfs::syscall::to_kernel | + gkfs::syscall::not_executed); + return gkfs::syscall::forward_to_kernel; + } + + int was_hooked = 0; + + reentrance_guard_flag = true; + int oerrno = errno; + was_hooked = hook_internal(syscall_number, + arg0, arg1, arg2, arg3, arg4, arg5, + syscall_return_value); + errno = oerrno; + reentrance_guard_flag = false; + + return was_hooked; +} + + +/* + * hook_guard_wrapper -- a wrapper which can notice reentrance. + * + * The reentrance_guard_flag flag allows the library to distinguish the hooking + * of its own syscalls. E.g. while handling an open() syscall, + * libgkfs_intercept might call fopen(), which in turn uses an open() + * syscall internally. This internally used open() syscall is once again + * forwarded to libgkfs_intercept, but using this flag we can notice this + * case of reentering itself. + * + * XXX This approach still contains a very significant bug, as libgkfs_intercept + * being called inside a signal handler might easily forward a mock fd to the + * kernel. + */ int hook_guard_wrapper(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5, - long *syscall_return_value) -{ + long *syscall_return_value) { + assert(CTX->interception_enabled()); - if (guard_flag) { - return NOT_HOOKED; - } + int was_hooked = 0; - int is_hooked; + if (reentrance_guard_flag) { + int oerrno = errno; + was_hooked = hook_internal(syscall_number, + arg0, arg1, arg2, arg3, arg4, arg5, + syscall_return_value); + errno = oerrno; + return was_hooked; + } - guard_flag = true; + reentrance_guard_flag = true; int oerrno = errno; - is_hooked = hook(syscall_number, - arg0, arg1, arg2, arg3, arg4, arg5, - syscall_return_value); + was_hooked = hook(syscall_number, + arg0, arg1, arg2, arg3, arg4, arg5, + syscall_return_value); errno = oerrno; - guard_flag = false; + reentrance_guard_flag = false; - return is_hooked; + return was_hooked; } +void start_self_interception() { + + LOG(DEBUG, "Enabling syscall interception for self"); + + intercept_hook_point = internal_hook_guard_wrapper; + intercept_hook_point_post_kernel = hook_forwarded_syscall; + intercept_hook_point_clone_child = hook_clone_at_child; + intercept_hook_point_clone_parent = hook_clone_at_parent; +} void start_interception() { + assert(CTX->interception_enabled()); -#ifndef NDEBUG - CTX->log()->debug("Activating interception of syscalls"); -#endif + + LOG(DEBUG, "Enabling syscall interception for client process"); + // Set up the callback function pointer intercept_hook_point = hook_guard_wrapper; + intercept_hook_point_post_kernel = hook_forwarded_syscall; + intercept_hook_point_clone_child = hook_clone_at_child; + intercept_hook_point_clone_parent = hook_clone_at_parent; } void stop_interception() { assert(CTX->interception_enabled()); + + LOG(DEBUG, "Disabling syscall interception for client process"); + // Reset callback function pointer intercept_hook_point = nullptr; + intercept_hook_point_post_kernel = nullptr; + intercept_hook_point_clone_child = nullptr; + intercept_hook_point_clone_parent = nullptr; } diff --git a/src/client/logging.cpp b/src/client/logging.cpp new file mode 100644 index 0000000000000000000000000000000000000000..19a2fe5945622f46562ebfe1b8d697c015ca42ba --- /dev/null +++ b/src/client/logging.cpp @@ -0,0 +1,358 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#include +#include +#include +#include +#include +#include + +namespace gkfs { +namespace log { + +struct opt_info { + const char name_[32]; + const std::size_t length_; + const char help_text_[8][64]; + const log_level mask_; +}; + +#define STR_AND_LEN(strbuf) \ + strbuf, sizeof(strbuf) - 1 + +static const auto constexpr debug_opts = utils::make_array( + + opt_info{STR_AND_LEN("none"), + {"don't print any messages"}, + log::none}, + +#ifdef GKFS_DEBUG_BUILD + + opt_info{STR_AND_LEN("syscalls"), + {"Trace system calls: print the name of each system call,", + "its arguments, and its return value. All system calls are", + "printed after being executed save for those that may not", + "return, such as execve() and execve_at()", + "[ default: off ]"}, + log::syscall}, + + opt_info{STR_AND_LEN("syscalls_at_entry"), + {"Trace system calls: print the name of each system call", + "and its arguments. All system calls are printed before ", + "being executed and therefore their return values are not", + "available in the log", + "[ default: off ]"}, + log::syscall_at_entry}, + +#endif // !GKFS_DEBUG_BUILD + + opt_info{STR_AND_LEN("info"), + {"Print information messages", + "[ default: on ]"}, + log::info}, + + opt_info{STR_AND_LEN("critical"), + {"Print critical errors", + "[ default: on ]"}, + log::critical}, + + opt_info{STR_AND_LEN("errors"), + {"Print errors", + "[ default: on ]"}, + log::error}, + + opt_info{STR_AND_LEN("warnings"), + {"Print warnings", + "[ default: on ]"}, + log::warning}, + + opt_info{STR_AND_LEN("mercury"), + {"Print Mercury messages", + "[ default: on ]"}, + log::mercury}, + +#ifdef GKFS_DEBUG_BUILD + + opt_info{STR_AND_LEN("debug"), + {"Print debug messages", + "[ default: off ]"}, + log::debug}, + + opt_info{STR_AND_LEN("most"), + {"All previous options except 'syscalls_at_entry' combined."}, + log::most }, + +#endif // !GKFS_DEBUG_BUILD + + opt_info{STR_AND_LEN("all"), + {"All previous options combined."}, + log::all }, + + opt_info{STR_AND_LEN("help"), + {"Print this help message and exit."}, + log::help} +); + +static const auto constexpr max_debug_opt_length = + sizeof("syscalls_at_entry") - 1; + +static const auto constexpr max_help_text_rows = + sizeof(debug_opts[0].help_text_) / sizeof(debug_opts[0].help_text_[0]); + +/** + * process_log_options -- process the string given as parameter to determine + * which debugging options are enabled and return a + * log_level describing them + */ +log_level +process_log_options(const std::string gkfs_debug) { + +#ifdef GKFS_DISABLE_LOGGING + + (void) gkfs_debug; + logger::log_message(stdout, "warning: logging options ignored: " + "logging support was disabled in this build"); + return log::none; + +#endif // ! GKFS_DISABLE_LOGGING + + log_level dm = log::none; + + std::vector tokens; + + // skip separating white spaces and commas + boost::split(tokens, gkfs_debug, + [](char c) { return c == ' ' || c == ','; }); + + for(const auto& t : tokens) { + + bool is_known = false; + + for(const auto& opt : debug_opts) { + + // none disables any future and previous flags observed + if(t == "none") { + return log::none; + } + + if(t == opt.name_) { + dm |= opt.mask_; + is_known = true; + break; + } + } + + if(!is_known) { + logger::log_message(stdout, "warning: logging option '{}' unknown; " + "try {}=help", t, gkfs::env::LOG); + } + } + + if(!!(dm & log::help)) { + logger::log_message(stdout, "Valid options for the {} " + "environment variable are:\n", gkfs::env::LOG); + + + for(const auto& opt : debug_opts) { + const auto padding = max_debug_opt_length - opt.length_ + 2; + + logger::log_message(stdout, " {}{:>{}}{}", opt.name_, "", + padding, opt.help_text_[0]); + + for(auto i = 1lu; i < max_help_text_rows; ++i) { + if(opt.help_text_[i][0] != 0) { + logger::log_message(stdout, " {:>{}}{}", "", + max_debug_opt_length + 2, + opt.help_text_[i]); + } + } + + logger::log_message(stdout, ""); + } + + logger::log_message(stdout, "\n" + "To direct the logging output into a file " + "instead of standard output\n" + "a filename can be specified using the " + "{} environment variable.", gkfs::env::LOG_OUTPUT); + ::_exit(0); + } + + return dm; +} + +#ifdef GKFS_DEBUG_BUILD +std::bitset<512> +process_log_filter(const std::string& log_filter) { + + std::bitset<512> filtered_syscalls; + std::vector tokens; + + if(log_filter.empty()) { + return filtered_syscalls; + } + + // skip separating white spaces and commas + boost::split(tokens, log_filter, + [](char c) { return c == ' ' || c == ','; }); + + for(const auto& t : tokens) { + const auto sc = syscall::lookup_by_name(t); + + if(std::strcmp(sc.name(), "unknown_syscall") == 0) { + logger::log_message(stdout, "warning: system call '{}' unknown; " + "will not filter", t); + continue; + } + + filtered_syscalls.set(sc.number()); + } + + return filtered_syscalls; +} +#endif // GKFS_DEBUG_BUILD + +logger::logger(const std::string& opts, + const std::string& path, + bool trunc, +#ifdef GKFS_DEBUG_BUILD + const std::string& filter +#endif + ) : + timezone_(date::current_zone()) { + + /* use stderr by default */ + log_fd_ = 2; + log_mask_ = process_log_options(opts); + +#ifdef GKFS_DEBUG_BUILD + filtered_syscalls_ = process_log_filter(filter); +#endif + + if(!path.empty()) { + int flags = O_CREAT | O_RDWR | O_APPEND | O_TRUNC; + + if(!trunc) { + flags &= ~O_TRUNC; + } + + // we use ::open() here rather than ::syscall_no_intercept(SYS_open) + // because we want the call to be intercepted by our hooks, which + // allows us to categorize the resulting fd as 'internal' and + // relocate it to our private range + int fd = ::open(path.c_str(), flags, 0600); + + if(fd == -1) { + log(gkfs::log::error, __func__, __LINE__, "Failed to open log " + "file '{}'. Logging will fall back to stderr", path); + return; + } + + log_fd_ = fd; + } + +#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) + // Finding the current timezone implies accessing OS files (i.e. syscalls), + // but current_zone() doesn't actually retrieve the time zone but rather + // provides a descriptor to it that is **atomically initialized** upon its + // first use. Thus, if we don't force the initialization here, logging + // the first intercepted syscall will produce a call to + // date::time_zone::init() (under std::call_once) which internally ends up + // calling fopen(). Since fopen() ends up calling sys_open(), we will need + // to generate another timestamp for a system call log entry, which will + // attempt to call date::time_zone::init() since the prior initialization + // (under the same std::call_once) has not yet completed. + // + // Unfortunately, date::time_zone doesn't provide a function to prevent + // this lazy initialization, therefore we force it by requesting + // information from an arbitrary timepoint (January 1st 1970) which forces + // the initialization. This doesn't do any actual work and could safely + // be removed if the date API ends up providing this functionality. + using namespace date; + timezone_->get_info(date::sys_days{January/1/1970}); +#endif +} + +logger::~logger() { + log_fd_ = ::syscall_no_intercept(SYS_close, log_fd_); +} + +void +logger::log_syscall(syscall::info info, + const long syscall_number, + const long args[6], + boost::optional result) { + + + const bool log_syscall_entry = !!(log::syscall_at_entry & log_mask_); + const bool log_syscall_result = !!(log::syscall & log_mask_); + + // log the syscall if and only if logging for syscalls is enabled + if(!log_syscall_entry && !log_syscall_result) { + return; + } + +#ifdef GKFS_DEBUG_BUILD + if(filtered_syscalls_[syscall_number]) { + return; + } +#endif + + // log the syscall even if we don't have information on it, since it may + // be important to the user (we assume that the syscall has completed + // though) + if(info == syscall::no_info) { + goto print_syscall; + } + + // log the syscall entry if the syscall may not return (e.g. execve) or + // if we are sure that it won't ever return (e.g. exit), even if + // log::syscall_at_entry is disabled + if(syscall::may_not_return(syscall_number) || + syscall::never_returns(syscall_number)) { + goto print_syscall; + } + + if(log_syscall_entry && syscall::execution_is_pending(info)) { + goto print_syscall; + } + + if(log_syscall_result && !syscall::execution_is_pending(info)) { + goto print_syscall; + } + + return; + +print_syscall: + + static_buffer buffer; + + detail::format_timestamp_to(buffer, timezone_); + detail::format_syscall_info_to(buffer, info); + + if(result) { + syscall::decode(buffer, syscall_number, args, *result); + } + else { + syscall::decode(buffer, syscall_number, args); + } + + fmt::format_to(buffer, "\n"); + + ::syscall_no_intercept(SYS_write, log_fd_, buffer.data(), buffer.size()); +} + +} // namespace log +} // namespace gkfs + diff --git a/src/client/open_file_map.cpp b/src/client/open_file_map.cpp index 5a13e8b2806ecd03367257350580dc8eccaecd67..3f2678a1865ea72294daf72c515c4c84e198ba34 100644 --- a/src/client/open_file_map.cpp +++ b/src/client/open_file_map.cpp @@ -17,6 +17,7 @@ #include #include #include +#include using namespace std; @@ -143,7 +144,7 @@ bool OpenFileMap::remove(const int fd) { files_.erase(fd); if (fd_validation_needed && files_.empty()) { fd_validation_needed = false; - CTX->log()->info("{}() fd_validation flag reset", __func__); + LOG(DEBUG, "fd_validation flag reset"); } return true; } @@ -188,7 +189,7 @@ int OpenFileMap::generate_fd_idx() { // We need a mutex here for thread safety std::lock_guard inode_lock(fd_idx_mutex); if (fd_idx == std::numeric_limits::max()) { - CTX->log()->info("{}() File descriptor index exceeded ints max value. Setting it back to 100000", __func__); + LOG(WARNING, "File descriptor index exceeded ints max value. Setting it back to 100000"); /* * Setting fd_idx back to 3 could have the effect that fd are given twice for different path. * This must not happen. Instead a flag is set which tells can tell the OpenFileMap that it should check diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 6b63dd95fee305233b8f937bcc515ece4f8e769d..9503ee31c007dec75a3856b3ce97e3f1539f26fc 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -11,7 +11,6 @@ SPDX-License-Identifier: MIT */ -#include #include #include #include @@ -20,15 +19,23 @@ #include #include #include "global/rpc/rpc_types.hpp" +#include #include #include #include +#include +#include + +#include +#include +#include #include + using namespace std; -// -// thread to initialize the whole margo shazaam only once per process + +// make sure that things are only initialized once static pthread_once_t init_env_thread = PTHREAD_ONCE_INIT; // RPC IDs @@ -46,120 +53,105 @@ hg_id_t rpc_read_data_id; hg_id_t rpc_trunc_data_id; hg_id_t rpc_get_dirents_id; hg_id_t rpc_chunk_stat_id; -// Margo instances -margo_instance_id ld_margo_rpc_id; +std::unique_ptr ld_network_service; static inline void exit_error_msg(int errcode, const string& msg) { - CTX->log()->error(msg); - cerr << "GekkoFS error: " << msg << endl; - exit(errcode); + + LOG_ERROR("{}", msg); + gkfs::log::logger::log_message(stderr, "{}\n", msg); + + // if we don't disable interception before calling ::exit() + // syscall hooks may find an inconsistent in shared state + // (e.g. the logger) and thus, crash + stop_interception(); + CTX->disable_interception(); + ::exit(errcode); } -/** - * Registers a margo instance with all used RPC - * Note that the r(pc tags are redundant for rpc - * @param mid - * @param mode - */ -void register_client_rpcs(margo_instance_id mid) { - - rpc_config_id = MARGO_REGISTER(mid, - hg_tag::fs_config, - void, - rpc_config_out_t, - NULL); - - rpc_mk_node_id = MARGO_REGISTER(mid, hg_tag::create, rpc_mk_node_in_t, rpc_err_out_t, NULL); - rpc_stat_id = MARGO_REGISTER(mid, hg_tag::stat, rpc_path_only_in_t, rpc_stat_out_t, NULL); - rpc_rm_node_id = MARGO_REGISTER(mid, hg_tag::remove, rpc_rm_node_in_t, - rpc_err_out_t, NULL); - - rpc_decr_size_id = MARGO_REGISTER(mid, - hg_tag::decr_size, - rpc_trunc_in_t, - rpc_err_out_t, - NULL); - - rpc_update_metadentry_id = MARGO_REGISTER(mid, hg_tag::update_metadentry, rpc_update_metadentry_in_t, - rpc_err_out_t, NULL); - rpc_get_metadentry_size_id = MARGO_REGISTER(mid, hg_tag::get_metadentry_size, rpc_path_only_in_t, - rpc_get_metadentry_size_out_t, NULL); - rpc_update_metadentry_size_id = MARGO_REGISTER(mid, hg_tag::update_metadentry_size, - rpc_update_metadentry_size_in_t, - rpc_update_metadentry_size_out_t, - NULL); +int +hg_log_function(FILE *stream, const char *fmt, ...) { -#ifdef HAS_SYMLINKS - rpc_mk_symlink_id = MARGO_REGISTER(mid, - hg_tag::mk_symlink, - rpc_mk_symlink_in_t, - rpc_err_out_t, - NULL); -#endif +#ifdef GKFS_DISABLE_LOGGING + (void) stream; + (void) fmt; + + return 0; +#endif // GKFS_DISABLE_LOGGING + + va_list ap; + ::va_start(ap, fmt); + int n = gkfs::log::get_global_logger()->log(gkfs::log::mercury, fmt, ap); + ::va_end(ap); - rpc_write_data_id = MARGO_REGISTER(mid, hg_tag::write_data, rpc_write_data_in_t, rpc_data_out_t, - NULL); - rpc_read_data_id = MARGO_REGISTER(mid, hg_tag::read_data, rpc_read_data_in_t, rpc_data_out_t, - NULL); - - rpc_trunc_data_id = MARGO_REGISTER(mid, - hg_tag::trunc_data, - rpc_trunc_in_t, - rpc_err_out_t, - NULL); - - rpc_get_dirents_id = MARGO_REGISTER(mid, hg_tag::get_dirents, rpc_get_dirents_in_t, rpc_get_dirents_out_t, - NULL); - - rpc_chunk_stat_id = MARGO_REGISTER(mid, - hg_tag::chunk_stat, - rpc_chunk_stat_in_t, - rpc_chunk_stat_out_t, - NULL); + return n; } /** - * Initializes the Margo client for a given na_plugin - * @param mode - * @param na_plugin - * @return + * Initializes the Hermes client for a given transport prefix + * @param transport_prefix + * @return true if succesfully initialized; false otherwise */ -bool init_margo_client(const std::string& na_plugin) { - // IMPORTANT: this struct needs to be zeroed before use - struct hg_init_info hg_options = {}; +bool init_hermes_client(const std::string& transport_prefix) { + + try { + + hermes::engine_options opts{}; + #if USE_SHM - hg_options.auto_sm = HG_TRUE; -#else - hg_options.auto_sm = HG_FALSE; + opts |= hermes::use_auto_sm; #endif - hg_options.stats = HG_FALSE; - hg_options.na_class = nullptr; - - ld_margo_rpc_id = margo_init_opt(na_plugin.c_str(), - MARGO_CLIENT_MODE, - &hg_options, - HG_FALSE, - 1); - if (ld_margo_rpc_id == MARGO_INSTANCE_NULL) { - CTX->log()->error("{}() margo_init_pool failed to initialize the Margo client", __func__); + + ld_network_service = + std::make_unique( + hermes::get_transport_type(transport_prefix), opts); + + ld_network_service->set_mercury_log_function(::hg_log_function); + + ld_network_service->run(); + } catch (const std::exception& ex) { + fmt::print(stderr, "Failed to initialize Hermes RPC client {}\n", + ex.what()); return false; } - register_client_rpcs(ld_margo_rpc_id); + + rpc_config_id = gkfs::rpc::fs_config::public_id; + rpc_mk_node_id = gkfs::rpc::create::public_id; + rpc_stat_id = gkfs::rpc::stat::public_id; + rpc_rm_node_id = gkfs::rpc::remove::public_id; + rpc_decr_size_id = gkfs::rpc::decr_size::public_id; + rpc_update_metadentry_id = gkfs::rpc::update_metadentry::public_id; + rpc_get_metadentry_size_id = gkfs::rpc::get_metadentry_size::public_id; + rpc_update_metadentry_size_id = gkfs::rpc::update_metadentry::public_id; + +#ifdef HAS_SYMLINKS + rpc_mk_symlink_id = gkfs::rpc::mk_symlink::public_id; +#endif // HAS_SYMLINKS + + rpc_write_data_id = gkfs::rpc::write_data::public_id; + rpc_read_data_id = gkfs::rpc::read_data::public_id; + rpc_trunc_data_id = gkfs::rpc::trunc_data::public_id; + rpc_get_dirents_id = gkfs::rpc::get_dirents::public_id; + rpc_chunk_stat_id = gkfs::rpc::chunk_stat::public_id; + return true; } /** - * This function is only called in the preload constructor and initializes Argobots and Margo clients + * This function is only called in the preload constructor and initializes + * the file system client */ void init_ld_environment_() { - //use rpc_addresses here to avoid "static initialization order problem" - if (!init_margo_client(RPC_PROTOCOL)) { - exit_error_msg(EXIT_FAILURE, "Unable to initializa Margo RPC client"); + // initialize Hermes interface to Mercury + LOG(INFO, "Initializing RPC subsystem..."); + + if (!init_hermes_client(RPC_PROTOCOL)) { + exit_error_msg(EXIT_FAILURE, "Unable to initialize RPC subsystem"); } try { + LOG(INFO, "Loading peer addresses..."); load_hosts(); } catch (const std::exception& e) { exit_error_msg(EXIT_FAILURE, "Failed to load hosts addresses: "s + e.what()); @@ -169,51 +161,32 @@ void init_ld_environment_() { auto simple_hash_dist = std::make_shared(CTX->local_host_id(), CTX->hosts().size()); CTX->distributor(simple_hash_dist); + LOG(INFO, "Retrieving file system configuration..."); + if (!rpc_send::get_fs_config()) { exit_error_msg(EXIT_FAILURE, "Unable to fetch file system configurations from daemon process through RPC."); } - CTX->log()->info("{}() Environment initialization successful.", __func__); + LOG(INFO, "Environment initialization successful."); } void init_ld_env_if_needed() { pthread_once(&init_env_thread, init_ld_environment_); } -void init_logging() { - std::string path; - try { - path = gkfs::get_env_own("PRELOAD_LOG_PATH"); - } catch (const std::exception& e) { - path = DEFAULT_PRELOAD_LOG_PATH; - } - - spdlog::level::level_enum level; - try { - level = get_spdlog_level(gkfs::get_env_own("LOG_LEVEL")); - } catch (const std::exception& e) { - level = get_spdlog_level(DEFAULT_DAEMON_LOG_LEVEL); - } - - auto logger_names = std::vector {"main"}; - - setup_loggers(logger_names, level, path); - - CTX->log(spdlog::get(logger_names.at(0))); -} - void log_prog_name() { std::string line; std::ifstream cmdline("/proc/self/cmdline"); if (!cmdline.is_open()) { - CTX->log()->error("Unable to open cmdline file"); + LOG(ERROR, "Unable to open cmdline file"); throw std::runtime_error("Unable to open cmdline file"); } if(!getline(cmdline, line)) { throw std::runtime_error("Unable to read cmdline file"); } std::replace(line.begin(), line.end(), '\0', ' '); - CTX->log()->info("Command to itercept: '{}'", line); + line.erase(line.length() - 1, line.length()); + LOG(INFO, "Process cmdline: '{}'", line); cmdline.close(); } @@ -221,14 +194,33 @@ void log_prog_name() { * Called initially ONCE when preload library is used with the LD_PRELOAD environment variable */ void init_preload() { - init_logging(); - CTX->log()->debug("Initialized logging subsystem"); + + CTX->enable_interception(); + start_self_interception(); + + CTX->init_logging(); + // from here ownwards it is safe to print messages + LOG(DEBUG, "Logging subsystem initialized"); + + // Kernel modules such as ib_uverbs may create fds in kernel space and pass + // them to user-space processes using ioctl()-like interfaces. if this + // happens during our internal initialization, there's no way for us to + // control this creation and the fd will be created in the + // [0, MAX_USER_FDS) range rather than in our private + // [MAX_USER_FDS, MAX_OPEN_FDS) range. To prevent this for our internal + // initialization code, we forcefully occupy the user fd range to force + // such modules to create fds in our private range. + CTX->protect_user_fds(); + log_prog_name(); init_cwd(); - CTX->log()->debug("Current working directory: '{}'", CTX->cwd()); + + LOG(DEBUG, "Current working directory: '{}'", CTX->cwd()); init_ld_env_if_needed(); CTX->enable_interception(); - CTX->log()->debug("{}() exit", __func__); + + CTX->unprotect_user_fds(); + start_interception(); } @@ -236,16 +228,16 @@ void init_preload() { * Called last when preload library is used with the LD_PRELOAD environment variable */ void destroy_preload() { + + CTX->clear_hosts(); + LOG(DEBUG, "Peer information deleted"); + + ld_network_service.reset(); + LOG(DEBUG, "RPC subsystem shut down"); + stop_interception(); CTX->disable_interception(); - if (ld_margo_rpc_id == nullptr) { - CTX->log()->debug("{}() No services in preload library used. Nothing to shut down.", __func__); - return; - } - cleanup_addresses(); - CTX->log()->debug("{}() About to finalize the margo RPC client", __func__); - // XXX Sometimes this hangs on the cluster. Investigate. - margo_finalize(ld_margo_rpc_id); - CTX->log()->debug("{}() Shut down Margo RPC client successful", __func__); - CTX->log()->info("All services shut down. Client shutdown complete."); + LOG(DEBUG, "Syscall interception stopped"); + + LOG(INFO, "All subsystems shut down. Client shutdown complete."); } diff --git a/src/client/preload_context.cpp b/src/client/preload_context.cpp index 79e7ac7a05314d76a35f5c975573580d1bd61386..485a3b212b734cd8cddf974a1047f5d0f18295aa 100644 --- a/src/client/preload_context.cpp +++ b/src/client/preload_context.cpp @@ -11,26 +11,57 @@ SPDX-License-Identifier: MIT */ +#include #include +#include +#include + +#include +#include +#include #include #include #include + #include #include +#include +decltype(PreloadContext::MIN_INTERNAL_FD) constexpr +PreloadContext::MIN_INTERNAL_FD; +decltype(PreloadContext::MAX_USER_FDS) constexpr +PreloadContext::MAX_USER_FDS; PreloadContext::PreloadContext(): ofm_(std::make_shared()), - fs_conf_(std::make_shared()) -{} + fs_conf_(std::make_shared()) { -void PreloadContext::log(std::shared_ptr logger) { - log_ = logger; + internal_fds_.set(); + internal_fds_must_relocate_ = true; } -std::shared_ptr PreloadContext::log() const { - return log_; +void +PreloadContext::init_logging() { + + const std::string log_opts = + gkfs::env::get_var(gkfs::env::LOG, DEFAULT_CLIENT_LOG_LEVEL); + + const std::string log_output = + gkfs::env::get_var(gkfs::env::LOG_OUTPUT, DEFAULT_CLIENT_LOG_PATH); + +#ifdef GKFS_DEBUG_BUILD + const std::string log_filter = + gkfs::env::get_var(gkfs::env::LOG_SYSCALL_FILTER, ""); +#endif + + const std::string trunc_val = + gkfs::env::get_var(gkfs::env::LOG_OUTPUT_TRUNC); + + const bool log_trunc = !(!trunc_val.empty() && trunc_val[0] == 0); + + gkfs::log::create_global_logger(log_opts, log_output, + log_trunc, log_filter); } void PreloadContext::mountdir(const std::string& path) { @@ -49,7 +80,6 @@ const std::vector& PreloadContext::mountdir_components() const { } void PreloadContext::cwd(const std::string& path) { - log_->debug("Setting CWD to '{}'", path); cwd_ = path; } @@ -57,12 +87,16 @@ const std::string& PreloadContext::cwd() const { return cwd_; } -const std::vector& PreloadContext::hosts() const { +const std::vector& PreloadContext::hosts() const { return hosts_; } -void PreloadContext::hosts(const std::vector& addrs) { - hosts_ = addrs; +void PreloadContext::hosts(const std::vector& endpoints) { + hosts_ = endpoints; +} + +void PreloadContext::clear_hosts() { + hosts_.clear(); } uint64_t PreloadContext::local_host_id() const { @@ -167,3 +201,146 @@ bool PreloadContext::interception_enabled() const { return interception_enabled_; } +int PreloadContext::register_internal_fd(int fd) { + + assert(fd >= 0); + + if(!internal_fds_must_relocate_) { + LOG(DEBUG, "registering fd {} as internal (no relocation needed)", fd); + assert(fd >= MIN_INTERNAL_FD); + internal_fds_.reset(fd - MIN_INTERNAL_FD); + return fd; + } + + LOG(DEBUG, "registering fd {} as internal (needs relocation)", fd); + + std::lock_guard lock(internal_fds_mutex_); + const int pos = internal_fds_._Find_first(); + + if(static_cast(pos) == internal_fds_.size()) { + throw std::runtime_error( +"Internal GekkoFS file descriptors exhausted, increase MAX_INTERNAL_FDS in " +"CMake, rebuild GekkoFS and try again."); + } + internal_fds_.reset(pos); + + +#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) + long args[gkfs::syscall::MAX_ARGS]{fd, pos + MIN_INTERNAL_FD, O_CLOEXEC}; +#endif + + LOG(SYSCALL, + gkfs::syscall::from_internal_code | + gkfs::syscall::to_kernel | + gkfs::syscall::not_executed, + SYS_dup3, args); + + const int ifd = + ::syscall_no_intercept(SYS_dup3, fd, pos + MIN_INTERNAL_FD, O_CLOEXEC); + + LOG(SYSCALL, + gkfs::syscall::from_internal_code | + gkfs::syscall::to_kernel | + gkfs::syscall::executed, + SYS_dup3, args, ifd); + + assert(::syscall_error_code(ifd) == 0); + +#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) + long args2[gkfs::syscall::MAX_ARGS]{fd}; +#endif + + LOG(SYSCALL, + gkfs::syscall::from_internal_code | + gkfs::syscall::to_kernel | + gkfs::syscall::not_executed, + SYS_close, args2); + +#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) + int rv = ::syscall_no_intercept(SYS_close, fd); +#else + ::syscall_no_intercept(SYS_close, fd); +#endif + + LOG(SYSCALL, + gkfs::syscall::from_internal_code | + gkfs::syscall::to_kernel | + gkfs::syscall::executed, + SYS_close, args2, rv); + + LOG(DEBUG, " (fd {} relocated to ifd {})", fd, ifd); + + return ifd; +} + +void PreloadContext::unregister_internal_fd(int fd) { + + LOG(DEBUG, "unregistering internal fd {}", fd); + + assert(fd >= MIN_INTERNAL_FD); + + const auto pos = fd - MIN_INTERNAL_FD; + + std::lock_guard lock(internal_fds_mutex_); + internal_fds_.set(pos); +} + +bool PreloadContext::is_internal_fd(int fd) const { + + if(fd < MIN_INTERNAL_FD) { + return false; + } + + const auto pos = fd - MIN_INTERNAL_FD; + + std::lock_guard lock(internal_fds_mutex_); + return !internal_fds_.test(pos); +} + +void +PreloadContext::protect_user_fds() { + + LOG(DEBUG, "Protecting application fds [{}, {}]", 0, MAX_USER_FDS - 1); + + const int nullfd = ::syscall_no_intercept(SYS_open, "/dev/null", O_RDONLY); + assert(::syscall_error_code(nullfd) == 0); + protected_fds_.set(nullfd); + + const auto fd_is_open = [](int fd) -> bool { + const int ret = ::syscall_no_intercept(SYS_fcntl, fd, F_GETFD); + return ::syscall_error_code(ret) == 0 || + ::syscall_error_code(ret) != EBADF; + }; + + for(int fd = 0; fd < MAX_USER_FDS; ++fd) { + if(fd_is_open(fd)) { + LOG(DEBUG, " fd {} was already in use, skipping", fd); + continue; + } + + const int ret = ::syscall_no_intercept(SYS_dup3, nullfd, fd, O_CLOEXEC); + assert(::syscall_error_code(ret) == 0); + protected_fds_.set(fd); + } + + internal_fds_must_relocate_ = false; +} + +void +PreloadContext::unprotect_user_fds() { + + for(std::size_t fd = 0; fd < protected_fds_.size(); ++fd) { + if(!protected_fds_[fd]) { + continue; + } + + const int ret = + ::syscall_error_code(::syscall_no_intercept(SYS_close, fd)); + + if(ret != 0) { + LOG(ERROR, "Failed to unprotect fd") + } + } + + internal_fds_must_relocate_ = true; +} diff --git a/src/client/preload_util.cpp b/src/client/preload_util.cpp index 02d7c51511fb5e768e13174e320d7ddc9710d243..32dd3ede3af5c82312959c5746bd3ea96cc37b0b 100644 --- a/src/client/preload_util.cpp +++ b/src/client/preload_util.cpp @@ -12,9 +12,12 @@ */ #include +#include +#include #include #include #include +#include #include #include @@ -76,7 +79,9 @@ int metadata_to_stat(const std::string& path, const Metadata& md, struct stat& a } vector> load_hosts_file(const std::string& lfpath) { - CTX->log()->debug("{}() Loading hosts file: '{}'", __func__, lfpath); + + LOG(DEBUG, "Loading hosts file: \"{}\"", lfpath); + ifstream lf(lfpath); if (!lf) { throw runtime_error(fmt::format("Failed to open hosts file '{}': {}", @@ -91,8 +96,10 @@ vector> load_hosts_file(const std::string& lfpath) { std::smatch match; while (getline(lf, line)) { if (!regex_match(line, match, line_re)) { - spdlog::error("{}() Unrecognized line format: [path: '{}', line: '{}']", - __func__, lfpath, line); + + LOG(ERROR, "Unrecognized line format: [path: '{}', line: '{}']", + lfpath, line); + throw runtime_error( fmt::format("unrecognized line format: '{}'", line)); } @@ -103,38 +110,41 @@ vector> load_hosts_file(const std::string& lfpath) { return hosts; } -hg_addr_t margo_addr_lookup_retry(const std::string& uri) { - CTX->log()->debug("{}() Lookink up address '{}'", __func__, uri); - // try to look up 3 times before erroring out - hg_return_t ret; - hg_addr_t remote_addr = HG_ADDR_NULL; - ::random_device rd; // obtain a random number from hardware - unsigned int attempts = 0; +hermes::endpoint lookup_endpoint(const std::string& uri, + std::size_t max_retries = 3) { + + LOG(DEBUG, "Looking up address \"{}\"", uri); + + std::random_device rd; // obtain a random number from hardware + std::size_t attempts = 0; + std::string error_msg; + do { - ret = margo_addr_lookup(ld_margo_rpc_id, uri.c_str(), &remote_addr); - if (ret == HG_SUCCESS) { - return remote_addr; + try { + return ld_network_service->lookup(uri); + } catch (const exception& ex) { + error_msg = ex.what(); + + LOG(WARNING, "Failed to lookup address '{}'. Attempts [{}/{}]", + uri, attempts + 1, max_retries); + + // Wait a random amount of time and try again + std::mt19937 g(rd()); // seed the random generator + std::uniform_int_distribution<> distr(50, 50 * (attempts + 2)); // define the range + std::this_thread::sleep_for(std::chrono::milliseconds(distr(g))); + continue; } - CTX->log()->warn("{}() Failed to lookup address '{}'. Attempts [{}/3]", __func__, uri, attempts + 1); - // Wait a random amount of time and try again - ::mt19937 g(rd()); // seed the random generator - ::uniform_int_distribution<> distr(50, 50 * (attempts + 2)); // define the range - ::this_thread::sleep_for(std::chrono::milliseconds(distr(g))); - } while (++attempts < 3); - throw runtime_error( - fmt::format("Failed to lookup address '{}', error: {}", uri, HG_Error_to_string(ret))); + } while (++attempts < max_retries); + + throw std::runtime_error( + fmt::format("Endpoint for address '{}' could not be found ({})", + uri, error_msg)); } void load_hosts() { string hosts_file; - try { - hosts_file = gkfs::get_env_own("HOSTS_FILE"); - } catch (const exception& e) { - CTX->log()->info("{}() Failed to get hosts file path" - " from environment, using default: '{}'", - __func__, DEFAULT_HOSTS_FILE); - hosts_file = DEFAULT_HOSTS_FILE; - } + + hosts_file = gkfs::env::get_var(gkfs::env::HOSTS_FILE, DEFAULT_HOSTS_FILE); vector> hosts; try { @@ -148,11 +158,13 @@ void load_hosts() { throw runtime_error(fmt::format("Host file empty: '{}'", hosts_file)); } - CTX->log()->info("{}() Hosts pool size: {}", __func__, hosts.size()); + LOG(INFO, "Hosts pool size: {}", hosts.size()); auto local_hostname = get_my_hostname(true); bool local_host_found = false; - vector addrs(hosts.size()); + + std::vector addrs; + addrs.resize(hosts.size()); vector host_ids(hosts.size()); // populate vector with [0, ..., host_size - 1] @@ -169,51 +181,24 @@ void load_hosts() { // lookup addresses and put abstract server addresses into rpc_addressesre for (const auto& id: host_ids) { - const auto& hostname = hosts.at(id).first; - const auto& uri = hosts.at(id).second; - auto addr = margo_addr_lookup_retry(uri); - addrs.at(id) = addr; + const auto& hostname = hosts.at(id).first; + const auto& uri = hosts.at(id).second; + + addrs[id] = ::lookup_endpoint(uri); if (!local_host_found && hostname == local_hostname) { - CTX->log()->debug("{}() Found local host: {}", __func__, hostname); + LOG(DEBUG, "Found local host: {}", hostname); CTX->local_host_id(id); local_host_found = true; } + + LOG(DEBUG, "Found peer: {}", addrs[id].to_string()); } if (!local_host_found) { - CTX->log()->warn("{}() Failed to find local host." - "Fallback: use host id '0' as local host", __func__); + LOG(WARNING, "Failed to find local host. Using host '0' as local host"); CTX->local_host_id(0); } - CTX->hosts(addrs); -} - -void cleanup_addresses() { - for (auto& addr: CTX->hosts()) { - margo_addr_free(ld_margo_rpc_id, addr); - } -} - - -hg_return -margo_create_wrap_helper(const hg_id_t rpc_id, uint64_t recipient, hg_handle_t& handle) { - auto ret = margo_create(ld_margo_rpc_id, CTX->hosts().at(recipient), rpc_id, &handle); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() creating handle FAILED", __func__); - return HG_OTHER_ERROR; - } - return ret; -} - -/** - * Wraps certain margo functions to create a Mercury handle - * @param path - * @param handle - * @return - */ -hg_return margo_create_wrap(const hg_id_t rpc_id, const std::string& path, hg_handle_t& handle) { - auto recipient = CTX->distributor()->locate_file_metadata(path); - return margo_create_wrap_helper(rpc_id, recipient, handle); + CTX->hosts(addrs); } diff --git a/src/client/resolve.cpp b/src/client/resolve.cpp index 673d7322c66a72f4447c53d24dc4a4cd8b6b378b..b1e88df46ca2584ab175960eaa49f1f5dee2d116 100644 --- a/src/client/resolve.cpp +++ b/src/client/resolve.cpp @@ -21,10 +21,10 @@ #include "global/path_util.hpp" #include "global/configure.hpp" #include "client/preload.hpp" +#include "client/logging.hpp" +#include "client/env.hpp" -constexpr static const char * ENV_NAME_CWD = ENV_PREFIX "CWD"; - /* Match components in path * * Returns the number of consecutive components at start of `path` @@ -79,7 +79,9 @@ unsigned int path_match_components(const std::string& path, unsigned int &path_c * and false otherwise. */ bool resolve_path (const std::string& path, std::string& resolved, bool resolve_last_link) { - CTX->log()->debug("{}() path: '{}'", __func__, path); + + LOG(DEBUG, "path: \"{}\", resolved: \"{}\", resolve_last_link: {}", + path, resolved, resolve_last_link); struct stat st; const std::vector& mnt_components = CTX->mountdir_components(); @@ -149,9 +151,9 @@ unsigned int path_match_components(const std::string& path, unsigned int &path_c ++matched_components; } if (lstat(resolved.c_str(), &st) < 0) { -#ifndef NDEBUG - CTX->log()->debug("{}() path does not exists: '{}'", __func__, resolved.c_str()); -#endif + + LOG(DEBUG, "path \"{}\" does not exist", resolved); + resolved.append(path, end, std::string::npos); return false; } @@ -161,7 +163,10 @@ unsigned int path_match_components(const std::string& path, unsigned int &path_c } auto link_resolved = std::unique_ptr(new char[PATH_MAX]); if (realpath(resolved.c_str(), link_resolved.get()) == nullptr) { - CTX->log()->error("{}() Failed to get realpath for link '{}'. Error: {}", __func__, resolved, strerror(errno)); + + LOG(ERROR, "Failed to get realpath for link \"{}\". " + "Error: {}", resolved, ::strerror(errno)); + resolved.append(path, end, std::string::npos); return false; } @@ -184,14 +189,14 @@ unsigned int path_match_components(const std::string& path, unsigned int &path_c if (matched_components >= mnt_components.size()) { resolved.erase(1, CTX->mountdir().size()); - CTX->log()->debug("{}() internal: '{}'", __func__, resolved); + LOG(DEBUG, "internal: \"{}\"", resolved); return true; } if (resolved.size() == 0) { resolved.push_back(PSP); } - CTX->log()->debug("{}() external: '{}'", __func__, resolved); + LOG(DEBUG, "external: \"{}\"", resolved); return false; } @@ -211,10 +216,12 @@ std::string get_sys_cwd() { } void set_sys_cwd(const std::string& path) { - CTX->log()->debug("{}() to '{}'", __func__, path); + + LOG(DEBUG, "Changing working directory to \"{}\"", path); + if (long ret = syscall_no_intercept(SYS_chdir, path.c_str())) { - CTX->log()->error("{}() failed to set system current working directory: {}", - __func__, std::strerror(syscall_error_code(ret))); + LOG(ERROR, "Failed to change working directory: {}", + std::strerror(syscall_error_code(ret))); throw std::system_error(syscall_error_code(ret), std::system_category(), "Failed to set system current working directory"); @@ -222,10 +229,12 @@ void set_sys_cwd(const std::string& path) { } void set_env_cwd(const std::string& path) { - CTX->log()->debug("{}() to '{}'", __func__, path); - if(setenv(ENV_NAME_CWD, path.c_str(), 1)) { - CTX->log()->error("{}() failed to set environment current working directory: {}", - __func__, std::strerror(errno)); + + LOG(DEBUG, "Setting {} to \"{}\"", gkfs::env::CWD, path); + + if(setenv(gkfs::env::CWD, path.c_str(), 1)) { + LOG(ERROR, "Failed while setting {}: {}", + gkfs::env::CWD, std::strerror(errno)); throw std::system_error(errno, std::system_category(), "Failed to set environment current working directory"); @@ -233,10 +242,14 @@ void set_env_cwd(const std::string& path) { } void unset_env_cwd() { - CTX->log()->debug("{}()", __func__); - if(unsetenv(ENV_NAME_CWD)) { - CTX->log()->error("{}() failed to unset environment current working directory: {}", - __func__, std::strerror(errno)); + + LOG(DEBUG, "Clearing {}()", gkfs::env::CWD); + + if(unsetenv(gkfs::env::CWD)) { + + LOG(ERROR, "Failed to clear {}: {}", + gkfs::env::CWD, std::strerror(errno)); + throw std::system_error(errno, std::system_category(), "Failed to unset environment current working directory"); @@ -244,7 +257,7 @@ void unset_env_cwd() { } void init_cwd() { - const char* env_cwd = std::getenv(ENV_NAME_CWD); + const char* env_cwd = std::getenv(gkfs::env::CWD); if (env_cwd != nullptr) { CTX->cwd(env_cwd); } else { diff --git a/src/client/rpc/hg_rpcs.cpp b/src/client/rpc/hg_rpcs.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6e0264671060ceb62baf962463641e4173581f5c --- /dev/null +++ b/src/client/rpc/hg_rpcs.cpp @@ -0,0 +1,45 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#include +#include + +namespace hermes { namespace detail { + +//============================================================================== +// register request types so that they can be used by users and the engine +// +void +register_user_request_types() { + (void) registered_requests().add(); + (void) registered_requests().add(); + (void) registered_requests().add(); + (void) registered_requests().add(); + (void) registered_requests().add(); + (void) registered_requests().add(); + (void) registered_requests().add(); + (void) registered_requests().add(); + +#ifdef HAS_SYMLINKS + (void) registered_requests().add(); +#endif // HAS_SYMLINKS + + (void) registered_requests().add(); + (void) registered_requests().add(); + (void) registered_requests().add(); + (void) registered_requests().add(); + (void) registered_requests().add(); + +} + +}} // namespace hermes::detail diff --git a/src/client/rpc/ld_rpc_data_ws.cpp b/src/client/rpc/ld_rpc_data_ws.cpp index 5b13c7dd0a4031870da71bedcdb92c21f3849b6e..c080150d58d7ca4de2e3950c76b07775db32fbc4 100644 --- a/src/client/rpc/ld_rpc_data_ws.cpp +++ b/src/client/rpc/ld_rpc_data_ws.cpp @@ -17,10 +17,11 @@ #include "global/rpc/rpc_types.hpp" #include #include +#include +#include #include - namespace rpc_send { @@ -32,369 +33,445 @@ using namespace std; /** * Sends an RPC request to a specific node to pull all chunks that belong to him */ -ssize_t write(const string& path, const void* buf, const bool append_flag, const off64_t in_offset, - const size_t write_size, const int64_t updated_metadentry_size) { +ssize_t write(const string& path, const void* buf, const bool append_flag, + const off64_t in_offset, const size_t write_size, + const int64_t updated_metadentry_size) { + assert(write_size > 0); - // Calculate chunkid boundaries and numbers so that daemons know in which interval to look for chunks - off64_t offset = in_offset; - if (append_flag) - offset = updated_metadentry_size - write_size; + + // Calculate chunkid boundaries and numbers so that daemons know in + // which interval to look for chunks + off64_t offset = append_flag ? + in_offset : + (updated_metadentry_size - write_size); auto chnk_start = chnk_id_for_offset(offset, CHUNKSIZE); auto chnk_end = chnk_id_for_offset((offset + write_size) - 1, CHUNKSIZE); - // Collect all chunk ids within count that have the same destination so that those are send in one rpc bulk transfer - map> target_chnks{}; - // contains the target ids, used to access the target_chnks map. First idx is chunk with potential offset - vector targets{}; + // Collect all chunk ids within count that have the same destination so + // that those are send in one rpc bulk transfer + std::map> target_chnks{}; + // contains the target ids, used to access the target_chnks map. + // First idx is chunk with potential offset + std::vector targets{}; + // targets for the first and last chunk as they need special treatment uint64_t chnk_start_target = 0; uint64_t chnk_end_target = 0; + for (uint64_t chnk_id = chnk_start; chnk_id <= chnk_end; chnk_id++) { auto target = CTX->distributor()->locate_data(path, chnk_id); + if (target_chnks.count(target) == 0) { - target_chnks.insert(make_pair(target, vector{chnk_id})); + target_chnks.insert( + std::make_pair(target, std::vector{chnk_id})); targets.push_back(target); - } else + } else { target_chnks[target].push_back(chnk_id); + } + // set first and last chnk targets - if (chnk_id == chnk_start) + if (chnk_id == chnk_start) { chnk_start_target = target; - if (chnk_id == chnk_end) + } + + if (chnk_id == chnk_end) { chnk_end_target = target; + } } + // some helper variables for async RPC - auto target_n = targets.size(); - vector rpc_handles(target_n); - vector rpc_waiters(target_n); - vector rpc_in(target_n); - // register local target buffer for bulk access for margo instance - auto bulk_buf = const_cast(buf); - hg_bulk_t rpc_bulk_handle = nullptr; - auto size = make_shared(write_size); - auto ret = margo_bulk_create(ld_margo_rpc_id, 1, &bulk_buf, size.get(), HG_BULK_READ_ONLY, &rpc_bulk_handle); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Failed to create rpc bulk handle", __func__); + std::vector bufseq{ + hermes::mutable_buffer{const_cast(buf), write_size}, + }; + + // expose user buffers so that they can serve as RDMA data sources + // (these are automatically "unexposed" when the destructor is called) + hermes::exposed_memory local_buffers; + + try { + local_buffers = + ld_network_service->expose(bufseq, hermes::access_mode::read_only); + + } catch (const std::exception& ex) { + LOG(ERROR, "Failed to expose buffers for RMA"); errno = EBUSY; return -1; } + std::vector> handles; + // Issue non-blocking RPC requests and wait for the result later - for (uint64_t i = 0; i < target_n; i++) { - auto target = targets[i]; - auto total_chunk_size = target_chnks[target].size() * CHUNKSIZE; // total chunk_size for target - if (target == chnk_start_target) // receiver of first chunk must subtract the offset from first chunk + // + // TODO(amiranda): This could be simplified by adding a vector of inputs + // to async_engine::broadcast(). This would allow us to avoid manually + // looping over handles as we do below + for(const auto& target : targets) { + + // total chunk_size for target + auto total_chunk_size = target_chnks[target].size() * CHUNKSIZE; + + // receiver of first chunk must subtract the offset from first chunk + if (target == chnk_start_target) { total_chunk_size -= chnk_lpad(offset, CHUNKSIZE); - if (target == chnk_end_target) // receiver of last chunk must subtract + } + + // receiver of last chunk must subtract + if (target == chnk_end_target) { total_chunk_size -= chnk_rpad(offset + write_size, CHUNKSIZE); - // Fill RPC input - rpc_in[i].path = path.c_str(); - rpc_in[i].host_id = target; - rpc_in[i].host_size = CTX->hosts().size(); - rpc_in[i].offset = chnk_lpad(offset, CHUNKSIZE);// first offset in targets is the chunk with a potential offset - rpc_in[i].chunk_n = target_chnks[target].size(); // number of chunks handled by that destination - rpc_in[i].chunk_start = chnk_start; // chunk start id of this write - rpc_in[i].chunk_end = chnk_end; // chunk end id of this write - rpc_in[i].total_chunk_size = total_chunk_size; // total size to write - rpc_in[i].bulk_handle = rpc_bulk_handle; - margo_create_wrap_helper(rpc_write_data_id, target, rpc_handles[i]); - // Send RPC - CTX->log()->trace("{}() host: {}, path: {}, chunks: {}, size: {}, offset: {}", __func__, - target, path, rpc_in[i].chunk_n, total_chunk_size, rpc_in[i].offset); - ret = margo_iforward(rpc_handles[i], &rpc_in[i], &rpc_waiters[i]); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Unable to send non-blocking rpc for path {} and recipient {}", __func__, path, - target); + } + + auto endp = CTX->hosts().at(target); + + try { + + LOG(DEBUG, "Sending RPC ..."); + + gkfs::rpc::write_data::input in( + path, + // first offset in targets is the chunk with + // a potential offset + chnk_lpad(offset, CHUNKSIZE), + target, + CTX->hosts().size(), + // number of chunks handled by that destination + target_chnks[target].size(), + // chunk start id of this write + chnk_start, + // chunk end id of this write + chnk_end, + // total size to write + total_chunk_size, + local_buffers); + + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that + // we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + handles.emplace_back( + ld_network_service->post(endp, in)); + + LOG(DEBUG, "host: {}, path: \"{}\", chunks: {}, size: {}, offset: {}", + target, path, in.chunk_n(), total_chunk_size, in.offset()); + + } catch(const std::exception& ex) { + LOG(ERROR, "Unable to send non-blocking rpc for " + "path \"{}\" [peer: {}]", path, target); errno = EBUSY; - for (uint64_t j = 0; j < i + 1; j++) { - margo_destroy(rpc_handles[j]); - } - // free bulk handles for buffer - margo_bulk_free(rpc_bulk_handle); return -1; } } - // Wait for RPC responses and then get response and add it to out_size which is the written size - // All potential outputs are served to free resources regardless of errors, although an errorcode is set. - ssize_t out_size = 0; + // Wait for RPC responses and then get response and add it to out_size + // which is the written size All potential outputs are served to free + // resources regardless of errors, although an errorcode is set. bool error = false; - for (unsigned int i = 0; i < target_n; i++) { - // XXX We might need a timeout here to not wait forever for an output that never comes? - ret = margo_wait(rpc_waiters[i]); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Unable to wait for margo_request handle for path {} recipient {}", __func__, path, - targets[i]); - error = true; - errno = EBUSY; - } - // decode response - rpc_data_out_t out{}; - ret = margo_get_output(rpc_handles[i], &out); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Failed to get rpc output for path {} recipient {}", __func__, path, targets[i]); + ssize_t out_size = 0; + std::size_t idx = 0; + + for(const auto& h : handles) { + try { + // XXX We might need a timeout here to not wait forever for an + // output that never comes? + auto out = h.get().at(0); + + if(out.err() != 0) { + LOG(ERROR, "Daemon reported error: {}", out.err()); + error = true; + errno = out.err(); + } + + out_size += static_cast(out.io_size()); + + } catch(const std::exception& ex) { + LOG(ERROR, "Failed to get rpc output for path \"{}\" [peer: {}]", + path, targets[idx]); error = true; errno = EIO; } - if (out.err != 0) { - CTX->log()->error("{}() Daemon reported error: {}", __func__, out.err); - error = true; - errno = out.err; - } - out_size += static_cast(out.io_size); - margo_free_output(rpc_handles[i], &out); - margo_destroy(rpc_handles[i]); + + ++idx; } - // free bulk handles for buffer - margo_bulk_free(rpc_bulk_handle); - return (error) ? -1 : out_size; + + return error ? -1 : out_size; } /** * Sends an RPC request to a specific node to push all chunks that belong to him */ ssize_t read(const string& path, void* buf, const off64_t offset, const size_t read_size) { - // Calculate chunkid boundaries and numbers so that daemons know in which interval to look for chunks - auto chnk_start = chnk_id_for_offset(offset, CHUNKSIZE); // first chunk number + + // Calculate chunkid boundaries and numbers so that daemons know in which + // interval to look for chunks + auto chnk_start = chnk_id_for_offset(offset, CHUNKSIZE); auto chnk_end = chnk_id_for_offset((offset + read_size - 1), CHUNKSIZE); - // Collect all chunk ids within count that have the same destination so that those are send in one rpc bulk transfer - map> target_chnks{}; - // contains the recipient ids, used to access the target_chnks map. First idx is chunk with potential offset - vector targets{}; + // Collect all chunk ids within count that have the same destination so + // that those are send in one rpc bulk transfer + std::map> target_chnks{}; + // contains the recipient ids, used to access the target_chnks map. + // First idx is chunk with potential offset + std::vector targets{}; + // targets for the first and last chunk as they need special treatment uint64_t chnk_start_target = 0; uint64_t chnk_end_target = 0; + for (uint64_t chnk_id = chnk_start; chnk_id <= chnk_end; chnk_id++) { auto target = CTX->distributor()->locate_data(path, chnk_id); + if (target_chnks.count(target) == 0) { - target_chnks.insert(make_pair(target, vector{chnk_id})); + target_chnks.insert( + std::make_pair(target, std::vector{chnk_id})); targets.push_back(target); - } else + } else { target_chnks[target].push_back(chnk_id); + } + // set first and last chnk targets - if (chnk_id == chnk_start) + if (chnk_id == chnk_start) { chnk_start_target = target; - if (chnk_id == chnk_end) + } + + if (chnk_id == chnk_end) { chnk_end_target = target; + } } - // some helper variables for async RPC - auto target_n = targets.size(); - vector rpc_handles(target_n); - vector rpc_waiters(target_n); - vector rpc_in(target_n); - // register local target buffer for bulk access for margo instance - auto bulk_buf = buf; - hg_bulk_t rpc_bulk_handle = nullptr; - auto size = make_shared(read_size); - auto ret = margo_bulk_create(ld_margo_rpc_id, 1, &bulk_buf, size.get(), HG_BULK_WRITE_ONLY, &rpc_bulk_handle); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Failed to create rpc bulk handle", __func__); + + // some helper variables for async RPCs + std::vector bufseq{ + hermes::mutable_buffer{buf, read_size}, + }; + + // expose user buffers so that they can serve as RDMA data targets + // (these are automatically "unexposed" when the destructor is called) + hermes::exposed_memory local_buffers; + + try { + local_buffers = + ld_network_service->expose(bufseq, hermes::access_mode::write_only); + + } catch (const std::exception& ex) { + LOG(ERROR, "Failed to expose buffers for RMA"); errno = EBUSY; return -1; } + + std::vector> handles; + // Issue non-blocking RPC requests and wait for the result later - for (unsigned int i = 0; i < target_n; i++) { - auto target = targets[i]; + // + // TODO(amiranda): This could be simplified by adding a vector of inputs + // to async_engine::broadcast(). This would allow us to avoid manually + // looping over handles as we do below + for(const auto& target : targets) { + + // total chunk_size for target auto total_chunk_size = target_chnks[target].size() * CHUNKSIZE; - if (target == chnk_start_target) // receiver of first chunk must subtract the offset from first chunk + + // receiver of first chunk must subtract the offset from first chunk + if (target == chnk_start_target) { total_chunk_size -= chnk_lpad(offset, CHUNKSIZE); - if (target == chnk_end_target) // receiver of last chunk must subtract + } + + // receiver of last chunk must subtract + if (target == chnk_end_target) { total_chunk_size -= chnk_rpad(offset + read_size, CHUNKSIZE); + } - // Fill RPC input - rpc_in[i].path = path.c_str(); - rpc_in[i].host_id = target; - rpc_in[i].host_size = CTX->hosts().size(); - rpc_in[i].offset = chnk_lpad(offset, CHUNKSIZE);// first offset in targets is the chunk with a potential offset - rpc_in[i].chunk_n = target_chnks[target].size(); // number of chunks handled by that destination - rpc_in[i].chunk_start = chnk_start; // chunk start id of this write - rpc_in[i].chunk_end = chnk_end; // chunk end id of this write - rpc_in[i].total_chunk_size = total_chunk_size; // total size to write - rpc_in[i].bulk_handle = rpc_bulk_handle; - margo_create_wrap_helper(rpc_read_data_id, target, rpc_handles[i]); - // Send RPC - ret = margo_iforward(rpc_handles[i], &rpc_in[i], &rpc_waiters[i]); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Unable to send non-blocking rpc for path {} and recipient {}", __func__, path, - target); + auto endp = CTX->hosts().at(target); + + try { + + LOG(DEBUG, "Sending RPC ..."); + + gkfs::rpc::read_data::input in( + path, + // first offset in targets is the chunk with + // a potential offset + chnk_lpad(offset, CHUNKSIZE), + target, + CTX->hosts().size(), + // number of chunks handled by that destination + target_chnks[target].size(), + // chunk start id of this write + chnk_start, + // chunk end id of this write + chnk_end, + // total size to write + total_chunk_size, + local_buffers); + + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that + // we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + handles.emplace_back( + ld_network_service->post(endp, in)); + + LOG(DEBUG, "host: {}, path: {}, chunks: {}, size: {}, offset: {}", + target, path, in.chunk_n(), total_chunk_size, in.offset()); + + } catch(const std::exception& ex) { + LOG(ERROR, "Unable to send non-blocking rpc for path \"{}\" " + "[peer: {}]", path, target); errno = EBUSY; - for (uint64_t j = 0; j < i + 1; j++) { - margo_destroy(rpc_handles[j]); - } - // free bulk handles for buffer - margo_bulk_free(rpc_bulk_handle); return -1; } } - // Wait for RPC responses and then get response and add it to out_size which is the read size - // All potential outputs are served to free resources regardless of errors, although an errorcode is set. - ssize_t out_size = 0; + // Wait for RPC responses and then get response and add it to out_size + // which is the read size. All potential outputs are served to free + // resources regardless of errors, although an errorcode is set. bool error = false; - for (unsigned int i = 0; i < target_n; i++) { - // XXX We might need a timeout here to not wait forever for an output that never comes? - ret = margo_wait(rpc_waiters[i]); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Unable to wait for margo_request handle for path {} recipient {}", __func__, path, - targets[i]); - error = true; - errno = EBUSY; - } - // decode response - rpc_data_out_t out{}; - ret = margo_get_output(rpc_handles[i], &out); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Failed to get rpc output for path {} recipient {}", __func__, path, targets[i]); + ssize_t out_size = 0; + std::size_t idx = 0; + + for(const auto& h : handles) { + try { + // XXX We might need a timeout here to not wait forever for an + // output that never comes? + auto out = h.get().at(0); + + if(out.err() != 0) { + LOG(ERROR, "Daemon reported error: {}", out.err()); + error = true; + errno = out.err(); + } + + out_size += static_cast(out.io_size()); + + } catch(const std::exception& ex) { + LOG(ERROR, "Failed to get rpc output for path \"{}\" [peer: {}]", + path, targets[idx]); error = true; errno = EIO; } - if (out.err != 0) { - CTX->log()->error("{}() Daemon reported error: {}", __func__, out.err); - error = true; - errno = out.err; - } - out_size += static_cast(out.io_size); - margo_free_output(rpc_handles[i], &out); - margo_destroy(rpc_handles[i]); + + ++idx; } - // free bulk handles for buffer - margo_bulk_free(rpc_bulk_handle); - return (error) ? -1 : out_size; + + return error ? -1 : out_size; } int trunc_data(const std::string& path, size_t current_size, size_t new_size) { - assert(current_size > new_size); - hg_return_t ret; - rpc_trunc_in_t in; - in.path = path.c_str(); - in.length = new_size; + assert(current_size > new_size); bool error = false; - // Find out which data server needs to delete chunks in order to contact only them + // Find out which data servers need to delete data chunks in order to + // contact only them const unsigned int chunk_start = chnk_id_for_offset(new_size, CHUNKSIZE); - const unsigned int chunk_end = chnk_id_for_offset(current_size - new_size - 1, CHUNKSIZE); + const unsigned int chunk_end = + chnk_id_for_offset(current_size - new_size - 1, CHUNKSIZE); + std::unordered_set hosts; for(unsigned int chunk_id = chunk_start; chunk_id <= chunk_end; ++chunk_id) { hosts.insert(CTX->distributor()->locate_data(path, chunk_id)); } - std::vector rpc_handles(hosts.size()); - std::vector rpc_waiters(hosts.size()); - unsigned int req_num = 0; + std::vector> handles; + for (const auto& host: hosts) { - ret = margo_create_wrap_helper(rpc_trunc_data_id, host, rpc_handles[req_num]); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Unable to create Mercury handle for host: ", __func__, host); - break; - } - // send async rpc - ret = margo_iforward(rpc_handles[req_num], &in, &rpc_waiters[req_num]); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Failed to send request to host: {}", __func__, host); - break; - } - ++req_num; - } + auto endp = CTX->hosts().at(host); + + try { + LOG(DEBUG, "Sending RPC ..."); - if(req_num < hosts.size()) { - // An error occurred. Cleanup and return - CTX->log()->error("{}() Error -> sent only some requests {}/{}. Cancelling request...", __func__, req_num, hosts.size()); - for(unsigned int i = 0; i < req_num; ++i) { - margo_destroy(rpc_handles[i]); + gkfs::rpc::trunc_data::input in(path, new_size); + + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that + // we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + handles.emplace_back( + ld_network_service->post(endp, in)); + + } catch (const std::exception& ex) { + // TODO(amiranda): we should cancel all previously posted requests + // here, unfortunately, Hermes does not support it yet :/ + LOG(ERROR, "Failed to send request to host: {}", host); + errno = EIO; + return -1; } - errno = EIO; - return -1; + } - assert(req_num == hosts.size()); // Wait for RPC responses and then get response - rpc_err_out_t out{}; - for (unsigned int i = 0; i < hosts.size(); ++i) { - ret = margo_wait(rpc_waiters[i]); - if (ret == HG_SUCCESS) { - ret = margo_get_output(rpc_handles[i], &out); - if (ret == HG_SUCCESS) { - if(out.err){ - CTX->log()->error("{}() received error response: {}", __func__, out.err); - error = true; - } - } else { - // Get output failed - CTX->log()->error("{}() while getting rpc output", __func__); + for(const auto& h : handles) { + + try { + // XXX We might need a timeout here to not wait forever for an + // output that never comes? + auto out = h.get().at(0); + + if(out.err() != 0) { + LOG(ERROR, "received error response: {}", out.err()); error = true; + errno = EIO; } - } else { - // Wait failed - CTX->log()->error("{}() Failed while waiting for response", __func__); + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); error = true; + errno = EIO; } - - /* clean up resources consumed by this rpc */ - margo_free_output(rpc_handles[i], &out); - margo_destroy(rpc_handles[i]); } - if(error) { - errno = EIO; - return -1; - } - return 0; + return error ? -1 : 0; } ChunkStat chunk_stat() { - CTX->log()->trace("{}()", __func__); - rpc_chunk_stat_in_t in; - auto const host_size = CTX->hosts().size(); - std::vector rpc_handles(host_size); - std::vector rpc_waiters(host_size); + std::vector> handles; - hg_return_t hg_ret; + for (const auto& endp : CTX->hosts()) { + try { + LOG(DEBUG, "Sending RPC to host: {}", endp.to_string()); - for (unsigned int target_host = 0; target_host < host_size; ++target_host) { - //Setup rpc input parameters for each host - hg_ret = margo_create_wrap_helper(rpc_chunk_stat_id, target_host, - rpc_handles[target_host]); - if (hg_ret != HG_SUCCESS) { - throw std::runtime_error("Failed to create margo handle"); - } - // Send RPC - CTX->log()->trace("{}() Sending RPC to host: {}", __func__, target_host); - hg_ret = margo_iforward(rpc_handles[target_host], - &in, - &rpc_waiters[target_host]); - if (hg_ret != HG_SUCCESS) { - CTX->log()->error("{}() Unable to send non-blocking chunk_stat to recipient {}", __func__, target_host); - for (unsigned int i = 0; i <= target_host; i++) { - margo_destroy(rpc_handles[i]); - } + gkfs::rpc::chunk_stat::input in(0); + + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that + // we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + handles.emplace_back( + ld_network_service->post(endp, in)); + + } catch (const std::exception& ex) { + // TODO(amiranda): we should cancel all previously posted requests + // here, unfortunately, Hermes does not support it yet :/ + LOG(ERROR, "Failed to send request to host: {}", endp.to_string()); throw std::runtime_error("Failed to forward non-blocking rpc request"); } } + unsigned long chunk_size = CHUNKSIZE; unsigned long chunk_total = 0; unsigned long chunk_free = 0; - for (unsigned int target_host = 0; target_host < host_size; ++target_host) { - hg_ret = margo_wait(rpc_waiters[target_host]); - if (hg_ret != HG_SUCCESS) { - throw std::runtime_error(fmt::format("Failed while waiting for rpc completion. target host: {}", target_host)); - } - rpc_chunk_stat_out_t out{}; - hg_ret = margo_get_output(rpc_handles[target_host], &out); - if (hg_ret != HG_SUCCESS) { - throw std::runtime_error(fmt::format("Failed to get rpc output for target host: {}", target_host)); - } + // wait for RPC responses + for(std::size_t i = 0; i < handles.size(); ++i) { - assert(out.chunk_size == chunk_size); - chunk_total += out.chunk_total; - chunk_free += out.chunk_free; + gkfs::rpc::chunk_stat::output out; - margo_free_output(rpc_handles[target_host], &out); - margo_destroy(rpc_handles[target_host]); + try { + // XXX We might need a timeout here to not wait forever for an + // output that never comes? + out = handles[i].get().at(0); + + assert(out.chunk_size() == chunk_size); + chunk_total += out.chunk_total(); + chunk_free += out.chunk_free(); + + } catch(const std::exception& ex) { + throw std::runtime_error( + fmt::format("Failed to get rpc output for target host: {}]", i)); + } } return {chunk_size, chunk_total, chunk_free}; diff --git a/src/client/rpc/ld_rpc_management.cpp b/src/client/rpc/ld_rpc_management.cpp index 08fa1031ef62c9c345e4eb57a08d3203e8803e7d..6e52aaca6bdc7045ee658dbdd0471685f0123774 100644 --- a/src/client/rpc/ld_rpc_management.cpp +++ b/src/client/rpc/ld_rpc_management.cpp @@ -13,10 +13,13 @@ #include "client/rpc/ld_rpc_management.hpp" #include "global/rpc/rpc_types.hpp" +#include #include #include // see https://github.com/boostorg/tokenizer/issues/9 #include #include +#include +#include namespace rpc_send { @@ -27,56 +30,39 @@ namespace rpc_send { * @return */ bool get_fs_config() { - hg_handle_t handle; - rpc_config_out_t out{}; - // fill in - auto ret = margo_create(ld_margo_rpc_id, CTX->hosts().at(CTX->local_host_id()), rpc_config_id, &handle); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() creating handle for failed", __func__); - return false; - } - CTX->log()->debug("{}() Forwarding request", __func__); - for (int i = 0; i < RPC_TRIES; ++i) { - ret = margo_forward_timed(handle, nullptr, RPC_TIMEOUT); - if (ret == HG_SUCCESS) { - break; - } - CTX->log()->warn("{}() Failed to forward request. Error: {}. Attempt {}/{}", __func__, HG_Error_to_string(ret), i+1, RPC_TRIES); - } - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Failed to forward request. Giving up after {} attempts", __func__, RPC_TRIES); - margo_destroy(handle); - return false; - } - /* decode response */ - CTX->log()->debug("{}() Waiting for response", __func__); - ret = margo_get_output(handle, &out); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Retrieving fs configurations from daemon", __func__); - margo_destroy(handle); + auto endp = CTX->hosts().at(CTX->local_host_id()); + gkfs::rpc::fs_config::output out; + + try { + LOG(DEBUG, "Retrieving file system configurations from daemon"); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can retry + // for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + out = ld_network_service->post(endp).get().at(0); + } catch (const std::exception& ex) { + LOG(ERROR, "Retrieving fs configurations from daemon"); return false; } - CTX->mountdir(out.mountdir); - CTX->log()->info("Mountdir: '{}'", CTX->mountdir()); + CTX->mountdir(out.mountdir()); + LOG(INFO, "Mountdir: '{}'", CTX->mountdir()); - CTX->fs_conf()->rootdir = out.rootdir; - CTX->fs_conf()->atime_state = out.atime_state; - CTX->fs_conf()->mtime_state = out.mtime_state; - CTX->fs_conf()->ctime_state = out.ctime_state; - CTX->fs_conf()->link_cnt_state = out.link_cnt_state; - CTX->fs_conf()->blocks_state = out.blocks_state; - CTX->fs_conf()->uid = out.uid; - CTX->fs_conf()->gid = out.gid; + CTX->fs_conf()->rootdir = out.rootdir(); + CTX->fs_conf()->atime_state = out.atime_state(); + CTX->fs_conf()->mtime_state = out.mtime_state(); + CTX->fs_conf()->ctime_state = out.ctime_state(); + CTX->fs_conf()->link_cnt_state = out.link_cnt_state(); + CTX->fs_conf()->blocks_state = out.blocks_state(); + CTX->fs_conf()->uid = out.uid(); + CTX->fs_conf()->gid = out.gid(); - CTX->log()->debug("{}() Got response with mountdir {}", __func__, out.mountdir); + LOG(DEBUG, "Got response with mountdir {}", out.mountdir()); - /* clean up resources consumed by this rpc */ - margo_free_output(handle, &out); - margo_destroy(handle); return true; } -} \ No newline at end of file +} diff --git a/src/client/rpc/ld_rpc_metadentry.cpp b/src/client/rpc/ld_rpc_metadentry.cpp index 59f85b40833947648620e7eff650e2fb0ef28134..20bae82764d36e4579c141d6e52259794f207a3b 100644 --- a/src/client/rpc/ld_rpc_metadentry.cpp +++ b/src/client/rpc/ld_rpc_metadentry.cpp @@ -14,380 +14,364 @@ #include #include #include "client/preload.hpp" +#include "client/logging.hpp" #include "client/preload_util.hpp" #include "client/open_dir.hpp" #include #include #include +#include namespace rpc_send { using namespace std; -static inline hg_return_t -margo_forward_timed_wrap(const hg_handle_t& handle, void* in_struct) { - return margo_forward_timed(handle, in_struct, RPC_TIMEOUT); -} - int mk_node(const std::string& path, const mode_t mode) { - hg_handle_t handle; - rpc_mk_node_in_t in{}; - rpc_err_out_t out{}; + int err = EUNKNOWN; - // fill in - in.path = path.c_str(); - in.mode = mode; - // Create handle - CTX->log()->debug("{}() Creating Mercury handle ...", __func__); - auto ret = margo_create_wrap(rpc_mk_node_id, path, handle); - if (ret != HG_SUCCESS) { + auto endp = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(path)); + + try { + LOG(DEBUG, "Sending RPC ..."); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can + // retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = + ld_network_service->post(endp, path, mode).get().at(0); + err = out.err(); + LOG(DEBUG, "Got response success: {}", err); + + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); errno = EBUSY; return -1; } - // Send rpc - CTX->log()->debug("{}() About to send RPC ...", __func__); - ret = margo_forward_timed_wrap(handle, &in); - // Get response - if (ret == HG_SUCCESS) { - CTX->log()->trace("{}() Waiting for response", __func__); - ret = margo_get_output(handle, &out); - if (ret == HG_SUCCESS) { - CTX->log()->debug("{}() Got response success: {}", __func__, out.err); - err = out.err; - } else { - // something is wrong - errno = EBUSY; - CTX->log()->error("{}() while getting rpc output", __func__); - } - /* clean up resources consumed by this rpc */ - margo_free_output(handle, &out); - } else { - CTX->log()->warn("{}() timed out", __func__); - errno = EBUSY; - } - margo_destroy(handle); + return err; } int stat(const std::string& path, string& attr) { - hg_handle_t handle; - rpc_path_only_in_t in{}; - rpc_stat_out_t out{}; - int err = 0; - // fill in - in.path = path.c_str(); - CTX->log()->debug("{}() Creating Mercury handle ...", __func__); - auto ret = margo_create_wrap(rpc_stat_id, path, handle); - if (ret != HG_SUCCESS) { - errno = EBUSY; - return -1; - } - // Send rpc - ret = margo_forward_timed_wrap(handle, &in); - // Get response - if (ret != HG_SUCCESS) { - errno = EBUSY; - CTX->log()->error("{}() timed out", __func__); - margo_destroy(handle); - return -1; - } - ret = margo_get_output(handle, &out); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() while getting rpc output", __func__); - errno = EBUSY; - margo_free_output(handle, &out); - margo_destroy(handle); - return -1; - } + auto endp = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(path)); + + try { + LOG(DEBUG, "Sending RPC ..."); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can + // retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = + ld_network_service->post(endp, path).get().at(0); + LOG(DEBUG, "Got response success: {}", out.err()); + + if(out.err() != 0) { + errno = out.err(); + return -1; + } - CTX->log()->debug("{}() Got response success: {}", __func__, out.err); + attr = out.db_val(); + return 0; - if(out.err != 0) { - err = -1; - errno = out.err; - } else { - attr = out.db_val; + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); + errno = EBUSY; + return -1; } - /* clean up resources consumed by this rpc */ - margo_free_output(handle, &out); - margo_destroy(handle); - return err; + return 0; } int decr_size(const std::string& path, size_t length) { - hg_handle_t handle; - rpc_trunc_in_t in{}; - int err = 0; - in.path = path.c_str(); - in.length = length; - - CTX->log()->debug("{}() Creating Mercury handle ...", __func__); - auto ret = margo_create_wrap(rpc_decr_size_id, path, handle); - if (ret != HG_SUCCESS) { - errno = EBUSY; - return -1; - } - // Send rpc - ret = margo_forward_timed_wrap(handle, &in); - // Get response - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() timed out", __func__); - margo_destroy(handle); - errno = EBUSY; - return -1; - } + auto endp = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(path)); - rpc_err_out_t out{}; - ret = margo_get_output(handle, &out); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() while getting rpc output", __func__); - margo_free_output(handle, &out); - margo_destroy(handle); + try { + + LOG(DEBUG, "Sending RPC ..."); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can + // retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = + ld_network_service->post( + endp, path, length).get().at(0); + + LOG(DEBUG, "Got response success: {}", out.err()); + + if(out.err() != 0) { + errno = out.err(); + return -1; + } + + return 0; + + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); errno = EBUSY; return -1; } +} - CTX->log()->debug("{}() Got response: {}", __func__, out.err); +int rm_node(const std::string& path, const bool remove_metadentry_only, const ssize_t size) { - if(out.err != 0){ - //In case of error out.err contains the - //corresponding value of errno - errno = out.err; - err = -1; - } + // if only the metadentry should be removed, send one rpc to the + // metadentry's responsible node to remove the metadata + // else, send an rpc to all hosts and thus broadcast chunk_removal. + if(remove_metadentry_only) { - margo_free_output(handle, &out); - margo_destroy(handle); - return err; -} + auto endp = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(path)); -int rm_node(const std::string& path, const bool remove_metadentry_only) { - hg_return_t ret; - int err = 0; // assume we succeed - // if metadentry should only removed only, send only 1 rpc to remove the metadata - // else send an rpc to all hosts and thus broadcast chunk_removal. - auto rpc_target_size = remove_metadentry_only ? static_cast(1) : CTX->hosts().size(); - - CTX->log()->debug("{}() Creating Mercury handles for all nodes ...", __func__); - vector rpc_handles(rpc_target_size); - vector rpc_waiters(rpc_target_size); - vector rpc_in(rpc_target_size); - // Send rpc to all nodes as all of them can have chunks for this path - for (size_t i = 0; i < rpc_target_size; i++) { - // fill in - rpc_in[i].path = path.c_str(); - // create handle - // if only the metadentry needs to removed send one rpc to metadentry's responsible node - if (remove_metadentry_only) - ret = margo_create_wrap(rpc_rm_node_id, path, rpc_handles[i]); - else - ret = margo_create_wrap_helper(rpc_rm_node_id, i, rpc_handles[i]); - if (ret != HG_SUCCESS) { - CTX->log()->warn("{}() Unable to create Mercury handle", __func__); - // We use continue here to remove at least some data - // XXX In the future we can discuss RPC retrying. This should be a function to be used in general - errno = EBUSY; - err = -1; - } - // send async rpc - ret = margo_iforward(rpc_handles[i], &rpc_in[i], &rpc_waiters[i]); - if (ret != HG_SUCCESS) { - CTX->log()->warn("{}() Unable to create Mercury handle", __func__); + try { + + LOG(DEBUG, "Sending RPC ..."); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can + // retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = + ld_network_service->post(endp, path).get().at(0); + + LOG(DEBUG, "Got response success: {}", out.err()); + + if(out.err() != 0) { + errno = out.err(); + return -1; + } + + return 0; + + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); errno = EBUSY; - err = -1; + return -1; } + + return 0; } - // Wait for RPC responses and then get response - for (size_t i = 0; i < rpc_target_size; i++) { - // XXX We might need a timeout here to not wait forever for an output that never comes? - ret = margo_wait(rpc_waiters[i]); - if (ret != HG_SUCCESS) { - CTX->log()->warn("{}() Unable to wait for margo_request handle for path {} recipient {}", __func__, path, i); - errno = EBUSY; - err = -1; + std::vector> handles; + + // Small files + if(static_cast(size / CHUNKSIZE) < CTX->hosts().size()) { + + auto endp = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(path)); + + try { + LOG(DEBUG, "Sending RPC to host: {}", endp.to_string()); + gkfs::rpc::remove::input in(path); + handles.emplace_back( + ld_network_service->post(endp,in)); + + uint64_t chnk_start = 0; + uint64_t chnk_end = size/CHUNKSIZE; + + for (uint64_t chnk_id = chnk_start; chnk_id <= chnk_end; chnk_id++) { + const auto target = CTX->hosts().at( + CTX->distributor()->locate_data(path, chnk_id)); + + LOG(DEBUG, "Sending RPC to host: {}", target.to_string()); + + handles.emplace_back( + ld_network_service->post(target, in)); + } + } catch (const std::exception & ex) { + LOG(ERROR, "Failed to send reduced remove requests"); + throw std::runtime_error( + "Failed to forward non-blocking rpc request"); + } + } + else { // "Big" files + for (const auto& endp : CTX->hosts()) { + try { + LOG(DEBUG, "Sending RPC to host: {}", endp.to_string()); + + gkfs::rpc::remove::input in(path); + + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that + // we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + // + // + + handles.emplace_back( + ld_network_service->post(endp, in)); + + } catch (const std::exception& ex) { + // TODO(amiranda): we should cancel all previously posted requests + // here, unfortunately, Hermes does not support it yet :/ + LOG(ERROR, "Failed to send request to host: {}", + endp.to_string()); + throw std::runtime_error( + "Failed to forward non-blocking rpc request"); + } } - rpc_err_out_t out{}; - ret = margo_get_output(rpc_handles[i], &out); - if (ret == HG_SUCCESS) { - CTX->log()->debug("{}() Got response success: {}", __func__, out.err); - if (err != 0) { - errno = out.err; - err = -1; + } + // wait for RPC responses + bool got_error = false; + + for(const auto& h : handles) { + + try { + // XXX We might need a timeout here to not wait forever for an + // output that never comes? + auto out = h.get().at(0); + + if(out.err() != 0) { + LOG(ERROR, "received error response: {}", out.err()); + got_error = true; + errno = out.err(); } - } else { - // something is wrong + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); + got_error = true; errno = EBUSY; - err = -1; - CTX->log()->error("{}() while getting rpc output", __func__); } - /* clean up resources consumed by this rpc */ - margo_free_output(rpc_handles[i], &out); - margo_destroy(rpc_handles[i]); } - return err; + + return got_error ? -1 : 0; + } int update_metadentry(const string& path, const Metadata& md, const MetadentryUpdateFlags& md_flags) { - hg_handle_t handle; - rpc_update_metadentry_in_t in{}; - rpc_err_out_t out{}; - int err = EUNKNOWN; - // fill in - // add data - in.path = path.c_str(); - in.size = md_flags.size ? md.size() : 0; - in.nlink = md_flags.link_count ? md.link_count() : 0; - in.blocks = md_flags.blocks ? md.blocks() : 0; - in.atime = md_flags.atime ? md.atime() : 0; - in.mtime = md_flags.mtime ? md.mtime() : 0; - in.ctime = md_flags.ctime ? md.ctime() : 0; - // add data flags - in.size_flag = bool_to_merc_bool(md_flags.size); - in.nlink_flag = bool_to_merc_bool(md_flags.link_count); - in.block_flag = bool_to_merc_bool(md_flags.blocks); - in.atime_flag = bool_to_merc_bool(md_flags.atime); - in.mtime_flag = bool_to_merc_bool(md_flags.mtime); - in.ctime_flag = bool_to_merc_bool(md_flags.ctime); - - CTX->log()->debug("{}() Creating Mercury handle ...", __func__); - auto ret = margo_create_wrap(rpc_update_metadentry_id, path, handle); - if (ret != HG_SUCCESS) { - errno = EBUSY; - return -1; - } - // Send rpc - ret = margo_forward_timed_wrap(handle, &in); - // Get response - if (ret == HG_SUCCESS) { - CTX->log()->trace("{}() Waiting for response", __func__); - ret = margo_get_output(handle, &out); - if (ret == HG_SUCCESS) { - CTX->log()->debug("{}() Got response success: {}", __func__, out.err); - err = out.err; - } else { - // something is wrong - errno = EBUSY; - CTX->log()->error("{}() while getting rpc output", __func__); + + auto endp = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(path)); + + try { + + LOG(DEBUG, "Sending RPC ..."); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can + // retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = + ld_network_service->post( + endp, + path, + (md_flags.link_count ? md.link_count() : 0), + /* mode */ 0, + /* uid */ 0, + /* gid */ 0, + (md_flags.size ? md.size() : 0), + (md_flags.blocks ? md.blocks() : 0), + (md_flags.atime ? md.atime() : 0), + (md_flags.mtime ? md.mtime() : 0), + (md_flags.ctime ? md.ctime() : 0), + bool_to_merc_bool(md_flags.link_count), + /* mode_flag */ false, + bool_to_merc_bool(md_flags.size), + bool_to_merc_bool(md_flags.blocks), + bool_to_merc_bool(md_flags.atime), + bool_to_merc_bool(md_flags.mtime), + bool_to_merc_bool(md_flags.ctime)).get().at(0); + + LOG(DEBUG, "Got response success: {}", out.err()); + + if(out.err() != 0) { + errno = out.err(); + return -1; } - /* clean up resources consumed by this rpc */ - margo_free_output(handle, &out); - } else { - CTX->log()->warn("{}() timed out", __func__); + + return 0; + + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); errno = EBUSY; + return -1; } - - margo_destroy(handle); - return err; } int update_metadentry_size(const string& path, const size_t size, const off64_t offset, const bool append_flag, off64_t& ret_size) { - hg_handle_t handle; - rpc_update_metadentry_size_in_t in{}; - rpc_update_metadentry_size_out_t out{}; - // add data - in.path = path.c_str(); - in.size = size; - in.offset = offset; - if (append_flag) - in.append = HG_TRUE; - else - in.append = HG_FALSE; - int err = EUNKNOWN; - CTX->log()->debug("{}() Creating Mercury handle ...", __func__); - auto ret = margo_create_wrap(rpc_update_metadentry_size_id, path, handle); - if (ret != HG_SUCCESS) { - ret_size = 0; + auto endp = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(path)); + + try { + + LOG(DEBUG, "Sending RPC ..."); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can + // retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = + ld_network_service->post( + endp, path, size, offset, + bool_to_merc_bool(append_flag)).get().at(0); + + LOG(DEBUG, "Got response success: {}", out.err()); + + if(out.err() != 0) { + errno = out.err(); + return -1; + } + + ret_size = out.ret_size(); + return out.err(); + + return 0; + + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); errno = EBUSY; - margo_destroy(handle); - return -1; - } - // Send rpc - ret = margo_forward_timed_wrap(handle, &in); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() margo forward failed: {}", __func__, HG_Error_to_string(ret)); ret_size = 0; - errno = EBUSY; - margo_destroy(handle); - return -1; + return EUNKNOWN; } +} - ret = margo_get_output(handle, &out); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() failed to get rpc ouptut: {}", __func__, HG_Error_to_string(ret)); - ret_size = 0; - errno = EBUSY; - margo_free_output(handle, &out); - margo_destroy(handle); - } +int get_metadentry_size(const std::string& path, off64_t& ret_size) { - CTX->log()->debug("{}() Got response: {}", __func__, out.err); - err = out.err; - ret_size = out.ret_size; + auto endp = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(path)); - margo_free_output(handle, &out); - margo_destroy(handle); - return err; -} + try { -int get_metadentry_size(const std::string& path, off64_t& ret_size) { - hg_handle_t handle; - rpc_path_only_in_t in{}; - rpc_get_metadentry_size_out_t out{}; - // add data - in.path = path.c_str(); - int err = EUNKNOWN; + LOG(DEBUG, "Sending RPC ..."); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can + // retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = + ld_network_service->post( + endp, path).get().at(0); - CTX->log()->debug("{}() Creating Mercury handle ...", __func__); - auto ret = margo_create_wrap(rpc_get_metadentry_size_id, path, handle); - if (ret != HG_SUCCESS) { - errno = EBUSY; - return -1; - } - // Send rpc - ret = margo_forward_timed_wrap(handle, &in); - // Get response - if (ret == HG_SUCCESS) { - CTX->log()->trace("{}() Waiting for response", __func__); - ret = margo_get_output(handle, &out); - if (ret == HG_SUCCESS) { - CTX->log()->debug("{}() Got response success: {}", __func__, out.err); - err = out.err; - ret_size = out.ret_size; - } else { - // something is wrong - errno = EBUSY; - ret_size = 0; - CTX->log()->error("{}() while getting rpc output", __func__); - } - /* clean up resources consumed by this rpc */ - margo_free_output(handle, &out); - } else { - CTX->log()->warn("{}() timed out", __func__); + LOG(DEBUG, "Got response success: {}", out.err()); + + ret_size = out.ret_size(); + return out.err(); + + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); errno = EBUSY; + ret_size = 0; + return EUNKNOWN; } - margo_destroy(handle); - return err; } /** * Sends an RPC request to a specific node to push all chunks that belong to him */ void get_dirents(OpenDir& open_dir){ - CTX->log()->trace("{}() called", __func__); + auto const root_dir = open_dir.path(); - auto const targets = CTX->distributor()->locate_directory_metadata(root_dir); - auto const host_size = targets.size(); - std::vector rpc_handles(host_size); - std::vector rpc_waiters(host_size); - std::vector rpc_in(host_size); - std::vector recv_buffers(host_size); + auto const targets = + CTX->distributor()->locate_directory_metadata(root_dir); /* preallocate receiving buffer. The actual size is not known yet. * @@ -395,126 +379,144 @@ void get_dirents(OpenDir& open_dir){ * It turns out that this operation is increadibly slow for such a big * buffer. Moreover we don't need a zeroed buffer here. */ - auto recv_buff = std::unique_ptr(new char[RPC_DIRENTS_BUFF_SIZE]); - const unsigned long int per_host_buff_size = RPC_DIRENTS_BUFF_SIZE / host_size; + auto large_buffer = + std::unique_ptr(new char[RPC_DIRENTS_BUFF_SIZE]); + + //XXX there is a rounding error here depending on the number of targets... + const std::size_t per_host_buff_size = + RPC_DIRENTS_BUFF_SIZE / targets.size(); + + // expose local buffers for RMA from servers + std::vector exposed_buffers; + exposed_buffers.reserve(targets.size()); + + for(std::size_t i = 0; i < targets.size(); ++i) { + try { + exposed_buffers.emplace_back( + ld_network_service->expose( + std::vector{ + hermes::mutable_buffer{ + large_buffer.get() + (i * per_host_buff_size), + per_host_buff_size + } + }, + hermes::access_mode::write_only)); + } catch (const std::exception& ex) { + throw std::runtime_error("Failed to expose buffers for RMA"); + } + } - hg_return_t hg_ret; + // send RPCs + std::vector> handles; - for(const auto& target_host: targets){ + for(std::size_t i = 0; i < targets.size(); ++i) { - CTX->log()->trace("{}() target_host: {}", __func__, target_host); - //Setup rpc input parameters for each host - rpc_in[target_host].path = root_dir.c_str(); - recv_buffers[target_host] = recv_buff.get() + (target_host * per_host_buff_size); + LOG(DEBUG, "target_host: {}", targets[i]); - hg_ret = margo_bulk_create( - ld_margo_rpc_id, 1, - reinterpret_cast(&recv_buffers[target_host]), - &per_host_buff_size, - HG_BULK_WRITE_ONLY, &(rpc_in[target_host].bulk_handle)); - if(hg_ret != HG_SUCCESS){ - throw std::runtime_error("Failed to create margo bulk handle"); - } + // Setup rpc input parameters for each host + auto endp = CTX->hosts().at(targets[i]); - hg_ret = margo_create_wrap_helper(rpc_get_dirents_id, target_host, rpc_handles[target_host]); - if (hg_ret != HG_SUCCESS) { - std::runtime_error("Failed to create margo handle"); - } - // Send RPC - CTX->log()->trace("{}() Sending RPC to host: {}", __func__, target_host); - hg_ret = margo_iforward(rpc_handles[target_host], - &rpc_in[target_host], - &rpc_waiters[target_host]); - if (hg_ret != HG_SUCCESS) { - CTX->log()->error("{}() Unable to send non-blocking get_dirents on {} to recipient {}", __func__, root_dir, target_host); - for (uint64_t i = 0; i <= target_host; i++) { - margo_bulk_free(rpc_in[i].bulk_handle); - margo_destroy(rpc_handles[i]); - } - throw std::runtime_error("Failed to forward non-blocking rpc request"); + gkfs::rpc::get_dirents::input in(root_dir, exposed_buffers[i]); + + try { + + LOG(DEBUG, "Sending RPC to host: {}", targets[i]); + handles.emplace_back( + ld_network_service->post(endp, in)); + } catch(const std::exception& ex) { + LOG(ERROR, "Unable to send non-blocking get_dirents() " + "on {} [peer: {}]", root_dir, targets[i]); + throw std::runtime_error("Failed to post non-blocking RPC request"); } } - for(unsigned int target_host = 0; target_host < host_size; target_host++){ - hg_ret = margo_wait(rpc_waiters[target_host]); - if (hg_ret != HG_SUCCESS) { - throw std::runtime_error(fmt::format("Failed while waiting for rpc completion. [root dir: {}, target host: {}]", root_dir, target_host)); - } - rpc_get_dirents_out_t out{}; - hg_ret = margo_get_output(rpc_handles[target_host], &out); - if (hg_ret != HG_SUCCESS) { - throw std::runtime_error(fmt::format("Failed to get rpc output.. [path: {}, target host: {}]", root_dir, target_host)); - } + // wait for RPC responses + for(std::size_t i = 0; i < handles.size(); ++i) { + + gkfs::rpc::get_dirents::output out; - if (out.err) { - CTX->log()->error("{}() Sending RPC to host: {}", __func__, target_host); - throw std::runtime_error(fmt::format("Failed to retrieve dir entries from host '{}'. " - "Error '{}', path '{}'", target_host, strerror(out.err), root_dir)); + try { + // XXX We might need a timeout here to not wait forever for an + // output that never comes? + out = handles[i].get().at(0); + + if(out.err() != 0) { + throw std::runtime_error( + fmt::format("Failed to retrieve dir entries from " + "host '{}'. Error '{}', path '{}'", + targets[i], strerror(out.err()), root_dir)); + } + } catch(const std::exception& ex) { + throw std::runtime_error( + fmt::format("Failed to get rpc output.. [path: {}, " + "target host: {}]", root_dir, targets[i])); } - bool* bool_ptr = reinterpret_cast(recv_buffers[target_host]); - char* names_ptr = recv_buffers[target_host] + (out.dirents_size * sizeof(bool)); - for(unsigned int i = 0; i < out.dirents_size; i++){ + // each server wrote information to its pre-defined region in + // large_buffer, recover it by computing the base_address for each + // particular server and adding the appropriate offsets + assert(exposed_buffers[i].count() == 1); + void* base_ptr = exposed_buffers[i].begin()->data(); - FileType ftype = (*bool_ptr)? FileType::directory : FileType::regular; + bool* bool_ptr = reinterpret_cast(base_ptr); + char* names_ptr = reinterpret_cast(base_ptr) + + (out.dirents_size() * sizeof(bool)); + + for(std::size_t j = 0; j < out.dirents_size(); j++) { + + FileType ftype = (*bool_ptr) ? + FileType::directory : + FileType::regular; bool_ptr++; - //Check that we are not outside the recv_buff for this specific host - assert((names_ptr - recv_buffers[target_host]) > 0); - assert(static_cast(names_ptr - recv_buffers[target_host]) < per_host_buff_size); + // Check that we are not outside the recv_buff for this specific host + assert((names_ptr - reinterpret_cast(base_ptr)) > 0); + assert( + static_cast( + names_ptr - reinterpret_cast(base_ptr)) < + per_host_buff_size); auto name = std::string(names_ptr); names_ptr += name.size() + 1; open_dir.add(name, ftype); } - - margo_free_output(rpc_handles[target_host], &out); - margo_bulk_free(rpc_in[target_host].bulk_handle); - margo_destroy(rpc_handles[target_host]); } } #ifdef HAS_SYMLINKS int mk_symlink(const std::string& path, const std::string& target_path) { - hg_handle_t handle; - rpc_mk_symlink_in_t in{}; - rpc_err_out_t out{}; - int err = EUNKNOWN; - // fill in - in.path = path.c_str(); - in.target_path = target_path.c_str(); - // Create handle - CTX->log()->debug("{}() Creating Mercury handle ...", __func__); - auto ret = margo_create_wrap(rpc_mk_symlink_id, path, handle); - if (ret != HG_SUCCESS) { - errno = EBUSY; - return -1; - } - // Send rpc - CTX->log()->debug("{}() About to send RPC ...", __func__); - ret = margo_forward_timed_wrap(handle, &in); - // Get response - if (ret == HG_SUCCESS) { - CTX->log()->trace("{}() Waiting for response", __func__); - ret = margo_get_output(handle, &out); - if (ret == HG_SUCCESS) { - CTX->log()->debug("{}() Got response success: {}", __func__, out.err); - err = out.err; - } else { - // something is wrong - errno = EBUSY; - CTX->log()->error("{}() while getting rpc output", __func__); + + auto endp = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(path)); + + try { + + LOG(DEBUG, "Sending RPC ..."); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can + // retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = + ld_network_service->post( + endp, path, target_path).get().at(0); + + LOG(DEBUG, "Got response success: {}", out.err()); + + if(out.err() != 0) { + errno = out.err(); + return -1; } - /* clean up resources consumed by this rpc */ - margo_free_output(handle, &out); - } else { - CTX->log()->warn("{}() timed out"); + + return 0; + + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); errno = EBUSY; + return -1; } - margo_destroy(handle); - return err; } #endif diff --git a/src/client/syscalls/detail/syscall_info.c b/src/client/syscalls/detail/syscall_info.c new file mode 100644 index 0000000000000000000000000000000000000000..9f7008ebce67d7bbc5369ff2c27988d3de93a54f --- /dev/null +++ b/src/client/syscalls/detail/syscall_info.c @@ -0,0 +1,919 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define SYSCALL(id, nargs, ret, ...) \ + [SYS_##id] = \ +{ \ + .s_nr = SYS_##id, \ + .s_name = #id, \ + .s_nargs = nargs, \ + .s_return_type = ret, \ + .s_args = {__VA_ARGS__} \ +} + +#define S_NOARGS() {0} + +#define S_UARG(t) \ +{ \ + .a_type = t, \ + .a_name = #t \ +} + +#define S_NARG(t, n) \ +{ \ + .a_type = t, \ + .a_name = n \ +} + +#define S_RET(t) \ +{ \ + .r_type = t \ +} + +/* Linux syscalls on x86_64 */ +const struct syscall_info syscall_table[] = { + SYSCALL(read, 3, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "buf"), S_NARG(arg, "count")), + SYSCALL(write, 3, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "buf"), S_NARG(arg, "count")), + SYSCALL(open, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(open_flags, "flags")), + SYSCALL(close, 1, S_RET(rdec), S_UARG(fd)), + SYSCALL(stat, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(ptr, "statbuf")), + SYSCALL(fstat, 2, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "statbuf")), + SYSCALL(lstat, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(ptr, "statbuf")), + SYSCALL(poll, 3, S_RET(rdec), S_NARG(ptr, "fds"), S_NARG(dec, "nfds"), S_NARG(dec, "timeout")), + SYSCALL(lseek, 3, S_RET(rdec), S_UARG(fd), S_UARG(offset), S_UARG(whence)), + SYSCALL(mmap, 6, S_RET(rptr), S_NARG(ptr, "addr"), S_NARG(dec, "length"), S_NARG(mmap_prot, "prot"), S_NARG(mmap_flags, "flags"), S_UARG(fd), S_UARG(offset)), + SYSCALL(mprotect, 3, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length"), S_NARG(mmap_prot, "prot")), + SYSCALL(munmap, 2, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length")), + SYSCALL(brk, 1, S_RET(rdec), S_NARG(ptr, "addr")), + SYSCALL(rt_sigaction, 4, S_RET(rdec), S_NARG(signum, "signum"), S_NARG(ptr, "act"), S_NARG(ptr, "oldact"), S_NARG(dec, "sigsetsize")), + SYSCALL(rt_sigprocmask, 4, S_RET(rdec), S_NARG(sigproc_how, "how"), S_NARG(ptr, "set"), S_NARG(ptr, "oldset"), S_NARG(dec, "sigsetsize")), + SYSCALL(rt_sigreturn, 0, S_RET(rnone), S_NOARGS()), + SYSCALL(ioctl, 3, S_RET(rdec), S_UARG(fd), S_NARG(arg, "cmd"), S_NARG(arg, "argp")), + SYSCALL(pread64, 4, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "buf"), S_NARG(arg, "count"), S_UARG(offset)), + SYSCALL(pwrite64, 4, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "buf"), S_NARG(arg, "count"), S_UARG(offset)), + SYSCALL(readv, 3, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "iov"), S_NARG(dec, "iovcnt")), + SYSCALL(writev, 3, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "iov"), S_NARG(dec, "iovcnt")), + SYSCALL(access, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(octal_mode, "mode")), + SYSCALL(pipe, 1, S_RET(rdec), S_NARG(ptr, "pipefd")), + SYSCALL(select, 5, S_RET(rdec), S_NARG(dec, "nfds"), S_NARG(ptr, "readfds"), S_NARG(ptr, "writefds"), S_NARG(ptr, "exceptfds"), S_NARG(ptr, "timeout")), + SYSCALL(sched_yield, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(mremap, 5, S_RET(rdec), S_NARG(ptr, "old_address"), S_NARG(dec, "old_size"), S_NARG(dec, "new_size"), S_NARG(arg, "flags"), S_NARG(ptr, "new_addr")), + SYSCALL(msync, 3, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length"), S_NARG(arg, "flags")), + SYSCALL(mincore, 3, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length"), S_NARG(ptr, "vec")), + SYSCALL(madvise, 3, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length"), S_NARG(arg, "behavior")), + SYSCALL(shmget, 3, S_RET(rdec), S_NARG(arg, "key"), S_NARG(dec, "size"), S_NARG(arg, "flag")), + SYSCALL(shmat, 3, S_RET(rdec), S_NARG(arg, "shmid"), S_NARG(ptr, "shmaddr"), S_NARG(arg, "shmflg")), + SYSCALL(shmctl, 3, S_RET(rdec), S_NARG(arg, "shmid"), S_NARG(arg, "cmd"), S_NARG(ptr, "buf")), + SYSCALL(dup, 1, S_RET(rdec), S_NARG(fd, "oldfd")), + SYSCALL(dup2, 2, S_RET(rdec), S_NARG(fd, "oldfd"), S_NARG(fd, "newfd")), + SYSCALL(pause, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(nanosleep, 2, S_RET(rdec), S_NARG(ptr, "rqtp"), S_NARG(ptr, "rmtp")), + SYSCALL(getitimer, 2, S_RET(rdec), S_NARG(arg, "which"), S_NARG(ptr, "value")), + SYSCALL(alarm, 1, S_RET(rdec), S_NARG(dec, "seconds")), + SYSCALL(setitimer, 3, S_RET(rdec), S_NARG(arg, "which"), S_NARG(ptr, "value"), S_NARG(ptr, "ovalue")), + SYSCALL(getpid, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(sendfile, 4, S_RET(rdec), S_NARG(fd, "out_fd"), S_NARG(fd, "in_fd"), S_NARG(ptr, "offset"), S_NARG(arg, "count")), + SYSCALL(socket, 3, S_RET(rdec), S_NARG(arg, "domain"), S_NARG(arg, "type"), S_NARG(arg, "protocol")), + SYSCALL(connect, 3, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "addr"), S_NARG(arg, "addrlen")), + SYSCALL(accept, 3, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "addr"), S_NARG(ptr, "addrlen")), + SYSCALL(sendto, 5, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "dest_addr"), S_NARG(arg, "len"), S_NARG(ptr, "addr"), S_NARG(arg, "addrlen")), + SYSCALL(recvfrom, 5, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "src_addr"), S_NARG(arg, "len"), S_NARG(ptr, "addr"), S_NARG(ptr, "addrlen")), + SYSCALL(sendmsg, 3, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "msg"), S_NARG(arg, "flags")), + SYSCALL(recvmsg, 3, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "msg"), S_NARG(arg, "flags")), + SYSCALL(shutdown, 2, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(arg, "how")), + SYSCALL(bind, 3, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "addr"), S_NARG(arg, "addrlen")), + SYSCALL(listen, 2, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(arg, "backlog")), + SYSCALL(getsockname, 3, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "addr"), S_NARG(ptr, "addrlen")), + SYSCALL(getpeername, 3, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "addr"), S_NARG(ptr, "addrlen")), + SYSCALL(socketpair, 4, S_RET(rdec), S_NARG(arg, "domain"), S_NARG(arg, "type"), S_NARG(arg, "protocol"), S_NARG(ptr, "sv")), + SYSCALL(setsockopt, 5, S_RET(rdec), S_UARG(fd), S_NARG(arg, "level"), S_NARG(arg, "optname"), S_NARG(ptr, "optval"), S_NARG(arg, "optlen")), + SYSCALL(getsockopt, 5, S_RET(rdec), S_UARG(fd), S_NARG(arg, "level"), S_NARG(arg, "optname"), S_NARG(ptr, "optval"), S_NARG(ptr, "optlen")), + SYSCALL(clone, 5, S_RET(rdec), S_NARG(clone_flags, "flags"), S_NARG(ptr, "child_stack"), S_NARG(ptr, "ptid"), S_NARG(ptr, "ctid"), S_NARG(ptr, "newtls")), + SYSCALL(fork, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(vfork, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(execve, 3, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(ptr, "argv"), S_NARG(ptr, "envp")), + SYSCALL(exit, 1, S_RET(rnone), S_NARG(dec, "status")), + SYSCALL(wait4, 4, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "stat_addr"), S_NARG(arg, "options"), S_NARG(ptr, "rusage")), + SYSCALL(kill, 2, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(signum, "sig")), + SYSCALL(uname, 1, S_RET(rdec), S_NARG(ptr, "buf")), + SYSCALL(semget, 3, S_RET(rdec), S_NARG(arg, "key"), S_NARG(dec, "nsems"), S_NARG(arg, "semflg")), + SYSCALL(semop, 3, S_RET(rdec), S_NARG(dec, "semid"), S_NARG(ptr, "sops"), S_NARG(arg, "nsops")), + SYSCALL(semctl, 4, S_RET(rdec), S_NARG(dec, "semid"), S_NARG(dec, "semnum"), S_NARG(arg, "cmd"), S_NARG(arg, "arg")), + SYSCALL(shmdt, 1, S_RET(rdec), S_NARG(ptr, "shmaddr")), + SYSCALL(msgget, 2, S_RET(rdec), S_NARG(arg, "key"), S_NARG(arg, "msflg")), + SYSCALL(msgsnd, 4, S_RET(rdec), S_NARG(arg, "msqid"), S_NARG(ptr, "msgp"), S_NARG(dec, "msgsz"), S_NARG(arg, "msflg")), + SYSCALL(msgrcv, 5, S_RET(rdec), S_NARG(arg, "msqid"), S_NARG(ptr, "msgp"), S_NARG(dec, "msgsz"), S_NARG(arg, "msgtyp"), S_NARG(arg, "msflg")), + SYSCALL(msgctl, 3, S_RET(rdec), S_NARG(arg, "msqid"), S_NARG(arg, "cmd"), S_NARG(ptr, "buf")), + SYSCALL(fcntl, 3, S_RET(rdec), S_UARG(fd), S_NARG(arg, "cmd"), S_NARG(arg, "arg")), + SYSCALL(flock, 2, S_RET(rdec), S_UARG(fd), S_NARG(arg, "cmd")), + SYSCALL(fsync, 1, S_RET(rdec), S_UARG(fd)), + SYSCALL(fdatasync, 2, S_RET(rdec), S_UARG(fd)), + SYSCALL(truncate, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(arg, "length")), + SYSCALL(ftruncate, 2, S_RET(rdec), S_UARG(fd), S_NARG(offset, "length")), + SYSCALL(getdents, 3, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "dirent"), S_NARG(arg, "count")), + SYSCALL(getcwd, 2, S_RET(rdec), S_NARG(ptr, "buf"), S_NARG(dec, "size")), + SYSCALL(chdir, 1, S_RET(rdec), S_NARG(cstr, "pathname")), + SYSCALL(fchdir, 1, S_RET(rdec), S_UARG(fd)), + SYSCALL(rename, 2, S_RET(rdec), S_NARG(cstr, "oldpath"), S_NARG(cstr, "newpath")), + SYSCALL(mkdir, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(octal_mode, "mode")), + SYSCALL(rmdir, 1, S_RET(rdec), S_NARG(cstr, "pathname")), + SYSCALL(creat, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(octal_mode, "mode")), + SYSCALL(link, 2, S_RET(rdec), S_NARG(cstr, "oldpath"), S_NARG(cstr, "newpath")), + SYSCALL(unlink, 1, S_RET(rdec), S_NARG(cstr, "pathname")), + SYSCALL(symlink, 2, S_RET(rdec), S_NARG(cstr, "target"), S_NARG(cstr, "linkpath")), + SYSCALL(readlink, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(ptr, "buf"), S_NARG(arg, "bufsiz")), + SYSCALL(chmod, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(octal_mode, "mode")), + SYSCALL(fchmod, 2, S_RET(rdec), S_UARG(fd), S_NARG(octal_mode, "mode")), + SYSCALL(chown, 3, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(dec, "user"), S_NARG(dec, "group")), + SYSCALL(fchown, 3, S_RET(rdec), S_UARG(fd), S_NARG(dec, "user"), S_NARG(dec, "group")), + SYSCALL(lchown, 3, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(dec, "user"), S_NARG(dec, "group")), + SYSCALL(umask, 1, S_RET(rdec), S_NARG(arg, "mask")), + SYSCALL(gettimeofday, 2, S_RET(rdec), S_NARG(ptr, "tv"), S_NARG(ptr, "tz")), + SYSCALL(getrlimit, 2, S_RET(rdec), S_NARG(arg, "resource"), S_NARG(ptr, "rlim")), + SYSCALL(getrusage, 2, S_RET(rdec), S_NARG(arg, "who"), S_NARG(ptr, "ru")), + SYSCALL(sysinfo, 1, S_RET(rdec), S_NARG(ptr, "info")), + SYSCALL(times, 1, S_RET(rdec), S_NARG(ptr, "tbuf")), + SYSCALL(ptrace, 4, S_RET(rdec), S_NARG(arg, "request"), S_NARG(dec, "pid"), S_NARG(ptr, "addr"), S_NARG(ptr, "data")), + SYSCALL(getuid, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(syslog, 3, S_RET(rdec), S_NARG(arg, "type"), S_NARG(ptr, "buf"), S_NARG(arg, "length")), + SYSCALL(getgid, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(setuid, 1, S_RET(rdec), S_NARG(dec, "uid")), + SYSCALL(setgid, 1, S_RET(rdec), S_NARG(dec, "gid")), + SYSCALL(geteuid, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(getegid, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(setpgid, 2, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(dec, "pgid")), + SYSCALL(getppid, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(getpgrp, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(setsid, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(setreuid, 2, S_RET(rdec), S_NARG(dec, "ruid"), S_NARG(dec, "euid")), + SYSCALL(setregid, 2, S_RET(rdec), S_NARG(dec, "rgid"), S_NARG(dec, "egid")), + SYSCALL(getgroups, 2, S_RET(rdec), S_NARG(arg, "gidsetsize"), S_NARG(ptr, "grouplist")), + SYSCALL(setgroups, 2, S_RET(rdec), S_NARG(arg, "gidsetsize"), S_NARG(ptr, "grouplist")), + SYSCALL(setresuid, 3, S_RET(rdec), S_NARG(dec, "ruid"), S_NARG(dec, "euid"), S_NARG(dec, "suid")), + SYSCALL(getresuid, 3, S_RET(rdec), S_NARG(ptr, "ruid"), S_NARG(ptr, "euid"), S_NARG(ptr, "suid")), + SYSCALL(setresgid, 3, S_RET(rdec), S_NARG(dec, "rgid"), S_NARG(dec, "egid"), S_NARG(dec, "sgid")), + SYSCALL(getresgid, 3, S_RET(rdec), S_NARG(ptr, "rgid"), S_NARG(ptr, "egid"), S_NARG(ptr, "sgid")), + SYSCALL(getpgid, 1, S_RET(rdec), S_NARG(dec, "pid")), + SYSCALL(setfsuid, 1, S_RET(rdec), S_NARG(dec, "uid")), + SYSCALL(setfsgid, 1, S_RET(rdec), S_NARG(dec, "gid")), + SYSCALL(getsid, 1, S_RET(rdec), S_NARG(dec, "pid")), + SYSCALL(capget, 2, S_RET(rdec), S_NARG(ptr, "header"), S_NARG(ptr, "datap")), + SYSCALL(capset, 2, S_RET(rdec), S_NARG(ptr, "header"), S_NARG(ptr, "datap")), + SYSCALL(rt_sigpending, 2, S_RET(rdec), S_NARG(ptr, "set"), S_NARG(dec, "sigsetsize")), + SYSCALL(rt_sigtimedwait, 4, S_RET(rdec), S_NARG(ptr, "uthese"), S_NARG(ptr, "uinfo"), S_NARG(ptr, "uts"), S_NARG(dec, "sigsetsize")), + SYSCALL(rt_sigqueueinfo, 4, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(signum, "sig"), S_NARG(ptr, "uinfo")), + SYSCALL(rt_sigsuspend, 2, S_RET(rdec), S_NARG(ptr, "unewset"), S_NARG(dec, "sigsetsize")), + SYSCALL(sigaltstack, 2, S_RET(rdec), S_NARG(ptr, "ss"), S_NARG(ptr, "old_ss")), + SYSCALL(utime, 2, S_RET(rdec), S_NARG(cstr, "filename"), S_NARG(ptr, "times")), + SYSCALL(mknod, 3, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(octal_mode, "mode"), S_NARG(arg, "dev")), + SYSCALL(uselib, 1, S_RET(rdec), S_NARG(cstr, "library")), + SYSCALL(personality, 1, S_RET(rdec), S_NARG(arg, "personality")), + SYSCALL(ustat, 2, S_RET(rdec), S_NARG(arg, "dev"), S_NARG(ptr, "ubuf")), + SYSCALL(statfs, 2, S_RET(rdec), S_NARG(cstr, "path"), S_NARG(ptr, "buf")), + SYSCALL(fstatfs, 2, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "buf")), + SYSCALL(sysfs, 3, S_RET(rdec), S_NARG(arg, "option"), S_NARG(ptr, "arg1"), S_NARG(ptr, "arg2")), + SYSCALL(getpriority, 2, S_RET(rdec), S_NARG(arg, "which"), S_NARG(arg, "who")), + SYSCALL(setpriority, 3, S_RET(rdec), S_NARG(arg, "which"), S_NARG(arg, "who"), S_NARG(arg, "prio")), + SYSCALL(sched_setparam, 2, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "param")), + SYSCALL(sched_getparam, 2, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "param")), + SYSCALL(sched_setscheduler, 3, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(arg, "policy"), S_NARG(ptr, "param")), + SYSCALL(sched_getscheduler, 1, S_RET(rdec), S_NARG(dec, "pid")), + SYSCALL(sched_get_priority_max, 1, S_RET(rdec), S_NARG(arg, "policy")), + SYSCALL(sched_get_priority_min, 1, S_RET(rdec), S_NARG(arg, "policy")), + SYSCALL(sched_rr_get_interval, 2, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "interval")), + SYSCALL(mlock, 2, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length")), + SYSCALL(munlock, 2, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length")), + SYSCALL(mlockall, 1, S_RET(rdec), S_NARG(arg, "flags")), + SYSCALL(munlockall, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(vhangup, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(modify_ldt, 3, S_RET(rdec), S_NARG(arg, "func"), S_NARG(ptr, "ptr"), S_NARG(arg, "bytecount")), + SYSCALL(pivot_root, 2, S_RET(rdec), S_NARG(cstr, "new_root"), S_NARG(cstr, "put_old")), + SYSCALL(_sysctl, 1, S_RET(rdec), S_NARG(ptr, "args")), + SYSCALL(prctl, 5, S_RET(rdec), S_NARG(arg, "option"), S_NARG(arg, "arg2"), S_NARG(arg, "arg3"), S_NARG(arg, "arg4"), S_NARG(arg, "arg5")), + SYSCALL(arch_prctl, 2, S_RET(rdec), S_NARG(arg, "code"), S_NARG(arg, "addr")), + SYSCALL(adjtimex, 1, S_RET(rdec), S_NARG(ptr, "txc_p")), + SYSCALL(setrlimit, 2, S_RET(rdec), S_NARG(arg, "resource"), S_NARG(ptr, "rlim")), + SYSCALL(chroot, 1, S_RET(rdec), S_NARG(cstr, "pathname")), + SYSCALL(sync, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(acct, 1, S_RET(rdec), S_NARG(cstr, "pathname")), + SYSCALL(settimeofday, 2, S_RET(rdec), S_NARG(ptr, "tv"), S_NARG(ptr, "tz")), + SYSCALL(mount, 5, S_RET(rdec), S_NARG(cstr, "dev_name"), S_NARG(cstr, "dir_name"), S_NARG(cstr, "type"), S_NARG(arg, "flags"), S_NARG(ptr, "data")), + SYSCALL(umount2, 2, S_RET(rdec), S_NARG(cstr, "target"), S_NARG(arg, "flags")), + SYSCALL(swapon, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(arg, "swap_flags")), + SYSCALL(swapoff, 1, S_RET(rdec), S_NARG(cstr, "pathname")), + SYSCALL(reboot, 4, S_RET(rdec), S_NARG(arg, "magic1"), S_NARG(arg, "magic2"), S_NARG(arg, "cmd"), S_NARG(ptr, "arg")), + SYSCALL(sethostname, 2, S_RET(rdec), S_NARG(cstr, "name"), S_NARG(arg, "length")), + SYSCALL(setdomainname, 2, S_RET(rdec), S_NARG(cstr, "name"), S_NARG(arg, "length")), + SYSCALL(iopl, 1, S_RET(rdec), S_NARG(arg, "level")), + SYSCALL(ioperm, 3, S_RET(rdec), S_NARG(arg, "from"), S_NARG(arg, "num"), S_NARG(arg, "on")), + SYSCALL(create_module, 2, S_RET(rdec), S_NARG(cstr, "name"), S_NARG(arg, "size")), + SYSCALL(init_module, 3, S_RET(rdec), S_NARG(ptr, "module_image"), S_NARG(dec, "length"), S_NARG(cstr, "param_values")), + SYSCALL(delete_module, 2, S_RET(rdec), S_NARG(cstr, "name"), S_NARG(arg, "flags")), + SYSCALL(get_kernel_syms, 1, S_RET(rdec), S_NARG(ptr, "table")), + SYSCALL(query_module, 5, S_RET(rdec), S_NARG(cstr, "name"), S_NARG(arg, "which"), S_NARG(ptr, "buf"), S_NARG(arg, "bufsize"), S_NARG(ptr, "ret")), + SYSCALL(quotactl, 4, S_RET(rdec), S_NARG(arg, "cmd"), S_NARG(cstr, "special"), S_NARG(arg, "id"), S_NARG(ptr, "addr")), + SYSCALL(nfsservctl, 3, S_RET(rdec), S_NARG(arg, "cmd"), S_NARG(ptr, "argp"), S_NARG(ptr, "resp")), + SYSCALL(getpmsg, 5, S_RET(rdec), S_NARG(arg, "arg0"), S_NARG(arg, "arg1"), S_NARG(arg, "arg2"), S_NARG(arg, "arg3"), S_NARG(arg, "arg4")), + SYSCALL(putpmsg, 5, S_RET(rdec), S_NARG(arg, "arg0"), S_NARG(arg, "arg1"), S_NARG(arg, "arg2"), S_NARG(arg, "arg3"), S_NARG(arg, "arg4")), + SYSCALL(afs_syscall, 5, S_RET(rdec), S_NARG(arg, "arg0"), S_NARG(arg, "arg1"), S_NARG(arg, "arg2"), S_NARG(arg, "arg3"), S_NARG(arg, "arg4")), + SYSCALL(tuxcall, 3, S_RET(rdec), S_NARG(arg, "arg0"), S_NARG(arg, "arg1"), S_NARG(arg, "arg2")), + SYSCALL(security, 3, S_RET(rdec), S_NARG(arg, "arg0"), S_NARG(arg, "arg1"), S_NARG(arg, "arg2")), + SYSCALL(gettid, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(readahead, 3, S_RET(rdec), S_UARG(fd), S_UARG(offset), S_NARG(arg, "count")), + SYSCALL(setxattr, 5, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(cstr, "pathname"), S_NARG(ptr, "value"), S_NARG(dec, "size"), S_NARG(arg, "flags")), + SYSCALL(lsetxattr, 5, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(cstr, "pathname"), S_NARG(ptr, "value"), S_NARG(dec, "size"), S_NARG(arg, "flags")), + SYSCALL(fsetxattr, 5, S_RET(rdec), S_UARG(fd), S_NARG(cstr, "pathname"), S_NARG(ptr, "value"), S_NARG(dec, "size"), S_NARG(arg, "flags")), + SYSCALL(getxattr, 4, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(cstr, "pathname"), S_NARG(ptr, "value"), S_NARG(dec, "size")), + SYSCALL(lgetxattr, 4, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(cstr, "pathname"), S_NARG(ptr, "value"), S_NARG(dec, "size")), + SYSCALL(fgetxattr, 4, S_RET(rdec), S_UARG(fd), S_NARG(cstr, "pathname"), S_NARG(ptr, "value"), S_NARG(dec, "size")), + SYSCALL(listxattr, 3, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(ptr, "list"), S_NARG(dec, "size")), + SYSCALL(llistxattr, 3, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(ptr, "list"), S_NARG(dec, "size")), + SYSCALL(flistxattr, 3, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "list"), S_NARG(dec, "size")), + SYSCALL(removexattr, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(cstr, "pathname")), + SYSCALL(lremovexattr, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(cstr, "pathname")), + SYSCALL(fremovexattr, 2, S_RET(rdec), S_UARG(fd), S_NARG(cstr, "pathname")), + SYSCALL(tkill, 2, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(signum, "sig")), + SYSCALL(time, 1, S_RET(rdec), S_NARG(ptr, "tloc")), + SYSCALL(futex, 6, S_RET(rdec), S_NARG(ptr, "uaddr"), S_NARG(arg, "op"), S_NARG(arg, "val"), S_NARG(ptr, "utime"), S_NARG(ptr, "uaddr2"), S_NARG(arg, "val3")), + SYSCALL(sched_setaffinity, 3, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(arg, "length"), S_NARG(ptr, "mask")), + SYSCALL(sched_getaffinity, 3, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(arg, "length"), S_NARG(ptr, "mask")), + SYSCALL(set_thread_area, 1, S_RET(rdec), S_NARG(ptr, "u_info")), + SYSCALL(io_setup, 2, S_RET(rdec), S_NARG(dec, "nr_reqs"), S_NARG(ptr, "ctx")), + SYSCALL(io_destroy, 1, S_RET(rdec), S_NARG(ptr, "ctx")), + SYSCALL(io_getevents, 5, S_RET(rdec), S_NARG(ptr, "ctx_id"), S_NARG(dec, "min_nr"), S_NARG(dec, "nr"), S_NARG(ptr, "events"), S_NARG(ptr, "timeout")), + SYSCALL(io_submit, 3, S_RET(rdec), S_NARG(ptr, "ctx_id"), S_NARG(dec, "nr"), S_NARG(ptr, "iocbpp")), + SYSCALL(io_cancel, 3, S_RET(rdec), S_NARG(ptr, "ctx_id"), S_NARG(ptr, "iocb"), S_NARG(ptr, "result")), + SYSCALL(get_thread_area, 1, S_RET(rdec), S_NARG(ptr, "u_info")), + SYSCALL(lookup_dcookie, 3, S_RET(rdec), S_NARG(arg, "cookie64"), S_NARG(ptr, "buf"), S_NARG(dec, "length")), + SYSCALL(epoll_create, 3, S_RET(rdec), S_NARG(arg, "size")), + SYSCALL(epoll_ctl_old, 4, S_RET(rdec), S_NARG(arg, "arg0"), S_NARG(arg, "arg1"), S_NARG(arg, "arg2"), S_NARG(arg, "arg3")), + SYSCALL(epoll_wait_old, 4, S_RET(rdec), S_NARG(arg, "arg0"), S_NARG(arg, "arg1"), S_NARG(arg, "arg2"), S_NARG(arg, "arg3")), + SYSCALL(remap_file_pages, 5, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "size"), S_NARG(mmap_prot, "prot"), S_NARG(dec, "pgoff"), S_NARG(arg, "flags")), + SYSCALL(getdents64, 3, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "dirent"), S_NARG(arg, "count")), + SYSCALL(set_tid_address, 1, S_RET(rdec), S_NARG(ptr, "tidptr")), + SYSCALL(restart_syscall, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(semtimedop, 4, S_RET(rdec), S_NARG(dec, "semid"), S_NARG(ptr, "sops"), S_NARG(arg, "nsops"), S_NARG(ptr, "timeout")), + SYSCALL(fadvise64, 4, S_RET(rdec), S_UARG(fd), S_UARG(offset), S_NARG(dec, "length"), S_NARG(arg, "advice")), + SYSCALL(timer_create, 3, S_RET(rdec), S_NARG(arg, "which_clock"), S_NARG(ptr, "timer_event_spec"), S_NARG(ptr, "created_timer_id")), + SYSCALL(timer_settime, 4, S_RET(rdec), S_NARG(arg, "timer_id"), S_NARG(arg, "flags"), S_NARG(ptr, "new_setting"), S_NARG(ptr, "old_setting")), + SYSCALL(timer_gettime, 2, S_RET(rdec), S_NARG(arg, "timer_id"), S_NARG(ptr, "setting")), + SYSCALL(timer_getoverrun, 1, S_RET(rdec), S_NARG(arg, "timer_id")), + SYSCALL(timer_delete, 1, S_RET(rdec), S_NARG(arg, "timer_id")), + SYSCALL(clock_settime, 2, S_RET(rdec), S_NARG(arg, "which_clock"), S_NARG(ptr, "tp")), + SYSCALL(clock_gettime, 2, S_RET(rdec), S_NARG(arg, "which_clock"), S_NARG(ptr, "tp")), + SYSCALL(clock_getres, 2, S_RET(rdec), S_NARG(arg, "which_clock"), S_NARG(ptr, "tp")), + SYSCALL(clock_nanosleep, 4, S_RET(rdec), S_NARG(arg, "which_clock"), S_NARG(arg, "flags"), S_NARG(ptr, "rqtp"), S_NARG(ptr, "rmtp")), + SYSCALL(exit_group, 1, S_RET(rnone), S_NARG(dec, "status")), + SYSCALL(epoll_wait, 4, S_RET(rdec), S_NARG(dec, "epfd"), S_NARG(ptr, "events"), S_NARG(dec, "maxevents"), S_NARG(dec32, "timeout")), + SYSCALL(epoll_ctl, 4, S_RET(rdec), S_NARG(dec, "epfd"), S_NARG(arg, "op"), S_UARG(fd), S_NARG(ptr, "event")), + SYSCALL(tgkill, 3, S_RET(rdec), S_NARG(arg, "tgid"), S_NARG(dec, "pid"), S_NARG(signum, "sig")), + SYSCALL(utimes, 2, S_RET(rdec), S_NARG(cstr, "filename"), S_NARG(ptr, "utimes")), + SYSCALL(vserver, 5, S_RET(rdec), S_NARG(arg, "arg0"), S_NARG(arg, "arg1"), S_NARG(arg, "arg2"), S_NARG(arg, "arg3"), S_NARG(arg, "arg4")), + SYSCALL(mbind, 6, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length"), S_NARG(octal_mode, "mode"), S_NARG(ptr, "nmask"), S_NARG(arg, "maxnode"), S_NARG(arg, "flags")), + SYSCALL(set_mempolicy, 3, S_RET(rdec), S_NARG(octal_mode, "mode"), S_NARG(ptr, "nmask"), S_NARG(arg, "maxnode")), + SYSCALL(get_mempolicy, 5, S_RET(rdec), S_NARG(ptr, "policy"), S_NARG(ptr, "nmask"), S_NARG(arg, "maxnode"), S_NARG(ptr, "addr"), S_NARG(arg, "flags")), + SYSCALL(mq_open, 4, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(open_flags, "oflag"), S_NARG(octal_mode, "mode"), S_NARG(ptr, "attr")), + SYSCALL(mq_unlink, 1, S_RET(rdec), S_NARG(cstr, "pathname")), + SYSCALL(mq_timedsend, 5, S_RET(rdec), S_NARG(arg, "mqdes"), S_NARG(cstr, "msg_ptr"), S_NARG(dec, "msg_len"), S_NARG(arg, "msg_prio"), S_NARG(ptr, "abs_timeout")), + SYSCALL(mq_timedreceive, 5, S_RET(rdec), S_NARG(arg, "mqdes"), S_NARG(ptr, "msg_ptr"), S_NARG(dec, "msg_len"), S_NARG(ptr, "msg_prio"), S_NARG(ptr, "abs_timeout")), + SYSCALL(mq_notify, 2, S_RET(rdec), S_NARG(arg, "mqdes"), S_NARG(ptr, "notification")), + SYSCALL(mq_getsetattr, 3, S_RET(rdec), S_NARG(arg, "mqdes"), S_NARG(ptr, "mqstat"), S_NARG(ptr, "omqstat")), + SYSCALL(kexec_load, 4, S_RET(rdec), S_NARG(arg, "entry"), S_NARG(arg, "nr_segments"), S_NARG(ptr, "segments"), S_NARG(arg, "flags")), + SYSCALL(waitid, 5, S_RET(rdec), S_NARG(arg, "which"), S_NARG(dec, "pid"), S_NARG(ptr, "infop"), S_NARG(arg, "options"), S_NARG(ptr, "ru")), + SYSCALL(add_key, 5, S_RET(rdec), S_NARG(cstr, "type"), S_NARG(cstr, "description"), S_NARG(ptr, "payload"), S_NARG(dec, "plen"), S_NARG(arg, "destringid")), + SYSCALL(request_key, 4, S_RET(rdec), S_NARG(cstr, "type"), S_NARG(cstr, "description"), S_NARG(cstr, "callout_info"), S_NARG(arg, "destringid")), + SYSCALL(keyctl, 5, S_RET(rdec), S_NARG(arg, "cmd"), S_NARG(arg, "arg2"), S_NARG(arg, "arg3"), S_NARG(arg, "arg4"), S_NARG(arg, "arg5")), + SYSCALL(ioprio_set, 3, S_RET(rdec), S_NARG(arg, "which"), S_NARG(arg, "who"), S_NARG(dec, "ioprio")), + SYSCALL(ioprio_get, 2, S_RET(rdec), S_NARG(arg, "which"), S_NARG(arg, "who")), + SYSCALL(inotify_init, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(inotify_add_watch, 3, S_RET(rdec), S_UARG(fd), S_NARG(cstr, "pathname"), S_NARG(arg, "mask")), + SYSCALL(inotify_rm_watch, 2, S_RET(rdec), S_UARG(fd), S_NARG(dec, "wd")), + SYSCALL(migrate_pages, 4, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(arg, "maxnode"), S_NARG(ptr, "from"), S_NARG(ptr, "to")), + SYSCALL(openat, 3, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(open_flags, "flags")), + SYSCALL(mkdirat, 3, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(octal_mode, "mode")), + SYSCALL(mknodat, 4, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "filename"), S_NARG(octal_mode, "mode"), S_NARG(arg, "dev")), + SYSCALL(fchownat, 5, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(dec, "user"), S_NARG(dec, "group"), S_NARG(arg, "flag")), + SYSCALL(futimesat, 3, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(ptr, "utimes")), + SYSCALL(newfstatat, 4, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(ptr, "statbuf"), S_NARG(arg, "flag")), + SYSCALL(unlinkat, 3, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(arg, "flag")), + SYSCALL(renameat, 4, S_RET(rdec), S_NARG(atfd, "olddfd"), S_NARG(cstr, "oldname"), S_NARG(atfd, "newdfd"), S_NARG(cstr, "newname")), + SYSCALL(linkat, 5, S_RET(rdec), S_NARG(atfd, "olddfd"), S_NARG(cstr, "oldpath"), S_NARG(atfd, "newdfd"), S_NARG(cstr, "newpath"), S_NARG(arg, "flags")), + SYSCALL(symlinkat, 3, S_RET(rdec), S_NARG(cstr, "oldname"), S_NARG(atfd, "newdfd"), S_NARG(cstr, "newname")), + SYSCALL(readlinkat, 4, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(ptr, "buf"), S_NARG(arg, "bufsiz")), + SYSCALL(fchmodat, 3, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "filename"), S_NARG(octal_mode, "mode")), + SYSCALL(faccessat, 3, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(octal_mode, "mode")), + SYSCALL(pselect6, 6, S_RET(rdec), S_NARG(dec, "nfds"), S_NARG(ptr, "readfds"), S_NARG(ptr, "writefds"), S_NARG(ptr, "exceptfds"), S_NARG(ptr, "timeval"), S_NARG(ptr, "sigmask")), + SYSCALL(ppoll, 5, S_RET(rdec), S_NARG(ptr, "fds"), S_NARG(dec, "nfds"), S_NARG(ptr, "tmo_p"), S_NARG(ptr, "sigmask"), S_NARG(dec, "sigsetsize")), + SYSCALL(unshare, 1, S_RET(rdec), S_NARG(arg, "unshare_flags")), + SYSCALL(set_robust_list, 2, S_RET(rdec), S_NARG(ptr, "head"), S_NARG(dec, "length")), + SYSCALL(get_robust_list, 3, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "head_ptr"), S_NARG(ptr, "len_ptr")), + SYSCALL(splice, 6, S_RET(rdec), S_NARG(dec, "fd_in"), S_NARG(ptr, "off_in"), S_NARG(dec, "fd_out"), S_NARG(ptr, "off_out"), S_NARG(dec, "length"), S_NARG(arg, "flags")), + SYSCALL(tee, 4, S_RET(rdec), S_NARG(dec, "fd_in"), S_NARG(dec, "fd_out"), S_NARG(dec, "length"), S_NARG(arg, "flags")), + SYSCALL(sync_file_range, 4, S_RET(rdec), S_UARG(fd), S_UARG(offset), S_NARG(offset, "nbytes"), S_NARG(arg, "flags")), + SYSCALL(vmsplice, 4, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "iov"), S_NARG(arg, "nr_segs"), S_NARG(arg, "flags")), + SYSCALL(move_pages, 6, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(arg, "nr_pages"), S_NARG(ptr, "pages"), S_NARG(ptr, "nodes"), S_NARG(ptr, "status"), S_NARG(arg, "flags")), + SYSCALL(utimensat, 4, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(ptr, "utimes"), S_NARG(arg, "flags")), + SYSCALL(epoll_pwait, 6, S_RET(rdec), S_NARG(fd, "epfd"), S_NARG(ptr, "events"), S_NARG(dec, "maxevents"), S_NARG(dec, "timeout"), S_NARG(ptr, "sigmask"), S_NARG(dec, "sigsetsize")), + SYSCALL(signalfd, 3, S_RET(rdec), S_NARG(dec, "ufd"), S_NARG(ptr, "user_mask"), S_NARG(dec, "sizemask")), + SYSCALL(timerfd_create, 2, S_RET(rdec), S_NARG(dec, "clockid"), S_NARG(arg, "flags")), + SYSCALL(eventfd, 1, S_RET(rdec), S_NARG(arg, "count")), + SYSCALL(fallocate, 4, S_RET(rdec), S_UARG(fd), S_NARG(octal_mode, "mode"), S_UARG(offset), S_NARG(offset, "length")), + SYSCALL(timerfd_settime, 4, S_RET(rdec), S_NARG(fd, "ufd"), S_NARG(arg, "flags"), S_NARG(ptr, "utmr"), S_NARG(ptr, "otmr")), + SYSCALL(timerfd_gettime, 2, S_RET(rdec), S_NARG(fd, "ufd"), S_NARG(ptr, "otmr")), + SYSCALL(accept4, 4, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "addr"), S_NARG(ptr, "addrlen"), S_NARG(arg, "flags")), + SYSCALL(signalfd4, 4, S_RET(rdec), S_NARG(fd, "ufd"), S_NARG(ptr, "user_mask"), S_NARG(dec, "sizemask"), S_NARG(arg, "flags")), + SYSCALL(eventfd2, 2, S_RET(rdec), S_NARG(arg, "count"), S_NARG(arg, "flags")), + SYSCALL(epoll_create1, 1, S_RET(rdec), S_NARG(arg, "flags")), + SYSCALL(dup3, 3, S_RET(rdec), S_NARG(fd, "oldfd"), S_NARG(fd, "newfd"), S_NARG(arg, "flags")), + SYSCALL(pipe2, 2, S_RET(rdec), S_NARG(ptr, "fildes"), S_NARG(arg, "flags")), + SYSCALL(inotify_init1, 1, S_RET(rdec), S_NARG(arg, "flags")), + SYSCALL(preadv, 5, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "iov"), S_NARG(dec, "iovcnt"), S_NARG(arg, "pos_l"), S_NARG(arg, "pos_h")), + SYSCALL(pwritev, 5, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "iov"), S_NARG(dec, "iovcnt"), S_NARG(arg, "pos_l"), S_NARG(arg, "pos_h")), + SYSCALL(rt_tgsigqueueinfo, 4, S_RET(rdec), S_NARG(arg, "tgid"), S_NARG(arg, "pid"), S_NARG(signum, "sig"), S_NARG(ptr, "uinfo")), + SYSCALL(perf_event_open, 5, S_RET(rdec), S_NARG(ptr, "attr_uptr"), S_NARG(dec, "pid"), S_NARG(dec, "cpu"), S_NARG(fd, "group_fd"), S_NARG(arg, "flags")), + SYSCALL(recvmmsg, 5, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "msg"), S_NARG(dec, "vlen"), S_NARG(arg, "flags"), S_NARG(ptr, "timeout")), + SYSCALL(fanotify_init, 2, S_RET(rdec), S_NARG(arg, "flags"), S_NARG(arg, "event_f_flags")), + SYSCALL(fanotify_mark, 5, S_RET(rdec), S_NARG(fd, "fanotify_fd"), S_NARG(arg, "flags"), S_NARG(arg, "mask"), S_UARG(fd), S_NARG(cstr, "pathname")), + SYSCALL(prlimit64, 4, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(arg, "resource"), S_NARG(ptr, "new_rlim"), S_NARG(ptr, "old_rlim")), + SYSCALL(name_to_handle_at, 5, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(ptr, "handle"), S_NARG(ptr, "mnt_id"), S_NARG(arg, "flag")), + SYSCALL(open_by_handle_at, 3, S_RET(rdec), S_NARG(fd, "mountdirfd"), S_NARG(ptr, "handle"), S_NARG(arg, "flags")), + SYSCALL(clock_adjtime, 2, S_RET(rdec), S_NARG(arg, "which_clock"), S_NARG(ptr, "tx")), + SYSCALL(syncfs, 2, S_RET(rdec), S_UARG(fd)), + SYSCALL(sendmmsg, 4, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "msg"), S_NARG(dec, "vlen"), S_NARG(arg, "flags")), + SYSCALL(setns, 2, S_RET(rdec), S_UARG(fd), S_NARG(arg, "nstype")), + SYSCALL(getcpu, 3, S_RET(rdec), S_NARG(ptr, "cpu"), S_NARG(ptr, "node"), S_NARG(ptr, "cache")), + SYSCALL(process_vm_readv, 6, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "local_iov"), S_NARG(dec, "liovcnt"), S_NARG(ptr, "remote_iov"), S_NARG(dec, "riovcnt"), S_NARG(arg, "flags")), + SYSCALL(process_vm_writev, 6, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "local_iov"), S_NARG(dec, "liovcnt"), S_NARG(ptr, "remote_iov"), S_NARG(dec, "riovcnt"), S_NARG(arg, "flags")), + SYSCALL(kcmp, 5, S_RET(rdec), S_NARG(arg, "pid1"), S_NARG(arg, "pid2"), S_NARG(arg, "type"), S_NARG(arg, "idx1"), S_NARG(arg, "idx2")), + SYSCALL(finit_module, 3, S_RET(rdec), S_UARG(fd), S_NARG(cstr, "param_values"), S_NARG(arg, "flags")), + SYSCALL(sched_setattr, 3, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "attr"), S_NARG(arg, "flags")), + SYSCALL(sched_getattr, 4, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "attr"), S_NARG(dec, "size"), S_NARG(arg, "flags")), + SYSCALL(renameat2, 5, S_RET(rdec), S_NARG(atfd, "olddfd"), S_NARG(cstr, "oldpath"), S_NARG(atfd, "newdfd"), S_NARG(cstr, "newpath"), S_NARG(arg, "flags")), + SYSCALL(seccomp, 3, S_RET(rdec), S_NARG(arg, "op"), S_NARG(arg, "flags"), S_NARG(ptr, "uargs")), + SYSCALL(getrandom, 3, S_RET(rdec), S_NARG(ptr, "buf"), S_NARG(arg, "count"), S_NARG(arg, "flags")), + SYSCALL(memfd_create, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(arg, "flags")), + SYSCALL(kexec_file_load, 5, S_RET(rdec), S_NARG(fd, "kernel_fd"), S_NARG(fd, "initrd_fd"), S_NARG(arg, "cmdline_len"), S_NARG(cstr, "cmdline"), S_NARG(arg, "flags")), + +#ifdef SYS_bpf + SYSCALL(bpf, 2, S_RET(rdec), S_NARG(arg, "cmd"), S_NARG(ptr, "attr"), S_NARG(arg, "size")), +#endif // SYS_bpf + +#ifdef SYS_execveat + SYSCALL(execveat, 5, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(ptr, "argv"), S_NARG(ptr, "envp"), S_NARG(arg, "flags")), +#endif // SYS_execveat + + SYSCALL(userfaultfd, 2, S_RET(rdec), S_NARG(arg, "flags")), + +#ifdef SYS_membarrier + SYSCALL(membarrier, 2, S_RET(rdec), S_NARG(arg, "cmd"), S_NARG(arg, "flags")), +#endif // SYS_membarrier + +#ifdef SYS_mlock2 + SYSCALL(mlock2, 3, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length"), S_NARG(arg, "flags")), +#endif // SYS_mlock2 + + SYSCALL(copy_file_range, 6, S_RET(rdec), S_NARG(fd, "fd_in"), S_NARG(ptr, "off_in"), S_NARG(fd, "fd_out"), S_NARG(ptr, "off_out"), S_NARG(dec, "length"), S_NARG(arg, "flags")), + +#ifdef SYS_preadv2 + SYSCALL(preadv2, 6, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "iov"), S_NARG(arg, "vlen"), S_NARG(arg, "pos_l"), S_NARG(arg, "pos_h"), S_NARG(arg, "flags")), +#endif // SYS_preadv2 + +#ifdef SYS_pwritev2 + SYSCALL(pwritev2, 6, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "iov"), S_NARG(arg, "vlen"), S_NARG(arg, "pos_l"), S_NARG(arg, "pos_h"), S_NARG(arg, "flags")), +#endif // SYS_pwritev2 + + SYSCALL(pkey_mprotect, 4, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length"), S_NARG(mmap_prot, "prot"), S_NARG(dec, "pkey")), + SYSCALL(pkey_alloc, 2, S_RET(rdec), S_NARG(arg, "flags"), S_NARG(arg, "init_val")), + SYSCALL(pkey_free, 1, S_RET(rdec), S_NARG(dec, "pkey")), + +#ifdef SYS_statx + SYSCALL(statx, 5, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(arg, "flags"), S_NARG(arg, "mask"), S_NARG(ptr, "buffer")), +#endif // SYS_statx + +#ifdef SYS_io_pgetevents + SYSCALL(io_pgetevents, 6, S_RET(rdec), S_NARG(ptr, "ctx_id"), S_NARG(dec, "min_nr"), S_NARG(dec, "nr"), S_NARG(ptr, "events"), S_NARG(ptr, "timeout"), S_NARG(ptr, "sig")), +#endif // SYS_io_pgetevents + +#ifdef SYS_rseq + SYSCALL(rseq, 4, S_RET(rdec), S_NARG(ptr, "rseq"), S_NARG(dec, "rseq_len"), S_NARG(arg, "flags"), S_NARG(signum, "sig")) +#endif // SYS_rseq +}; + +static const struct syscall_info unknown_syscall = { + .s_name = "unknown_syscall", + .s_nargs = MAX_SYSCALL_ARGS, + .s_return_type = S_RET(rdec), + .s_args = { + S_NARG(arg, "arg0"), + S_NARG(arg, "arg1"), + S_NARG(arg, "arg2"), + S_NARG(arg, "arg3"), + S_NARG(arg, "arg4"), + S_NARG(arg, "arg5"), + } +}; + +static const struct syscall_info open_with_o_creat = { + .s_name = "open", + .s_nargs = 3, + .s_return_type = S_RET(rdec), + .s_args = { + S_NARG(cstr, "pathname"), + S_NARG(open_flags, "flags"), + S_NARG(octal_mode, "mode") + } +}; + +static const struct syscall_info openat_with_o_creat = { + .s_name = "openat", + .s_nargs = 4, + .s_return_type = S_RET(rdec), + .s_args = { + S_NARG(atfd, "dfd"), + S_NARG(cstr, "pathname"), + S_NARG(open_flags, "flags"), + S_NARG(octal_mode, "mode") + } +}; + +static bool +requires_mode_arg(int flags) { + + if((flags & O_CREAT) == O_CREAT) { + return true; + } + +#ifdef O_TMPFILE + if ((flags & O_TMPFILE) == O_TMPFILE) { + return true; + } +#endif + + return false; +} + +#include + +/** + * get_syscall_info - Return a syscall descriptor + * + * This function returns a pointer to a syscall_info structure that + * appropriately describes the system call identified by 'syscall_number'. + */ +const struct syscall_info * +get_syscall_info(const long syscall_number, + const long* argv) { + + if(syscall_number < 0 || + syscall_number >= (long) ARRAY_SIZE(syscall_table)) { + return &unknown_syscall; + } + + if(syscall_table[syscall_number].s_name == NULL) { + return &unknown_syscall; + } + + if(argv == NULL) { + return &syscall_table[syscall_number]; + } + + if(syscall_number == SYS_open && requires_mode_arg(argv[1])) { + return &open_with_o_creat; + } + + if(syscall_number == SYS_openat && requires_mode_arg(argv[2])) { + return &openat_with_o_creat; + } + + return &syscall_table[syscall_number]; +} + +struct named_syscall_entry { + const char * s_name; + const struct syscall_info * s_info; +}; + +#define SYSCALL_BY_NAME(id) \ +{ \ + .s_name = #id, \ + .s_info = &syscall_table[SYS_##id] \ +} + +/** Linux syscalls ordered by name */ +const struct named_syscall_entry syscalls_by_name[] = { + SYSCALL_BY_NAME(_sysctl), + SYSCALL_BY_NAME(accept), + SYSCALL_BY_NAME(accept4), + SYSCALL_BY_NAME(access), + SYSCALL_BY_NAME(acct), + SYSCALL_BY_NAME(add_key), + SYSCALL_BY_NAME(adjtimex), + SYSCALL_BY_NAME(afs_syscall), + SYSCALL_BY_NAME(alarm), + SYSCALL_BY_NAME(arch_prctl), + SYSCALL_BY_NAME(bind), +#ifdef SYS_bpf + SYSCALL_BY_NAME(bpf), +#endif // SYS_bpf + SYSCALL_BY_NAME(brk), + SYSCALL_BY_NAME(capget), + SYSCALL_BY_NAME(capset), + SYSCALL_BY_NAME(chdir), + SYSCALL_BY_NAME(chmod), + SYSCALL_BY_NAME(chown), + SYSCALL_BY_NAME(chroot), + SYSCALL_BY_NAME(clock_adjtime), + SYSCALL_BY_NAME(clock_getres), + SYSCALL_BY_NAME(clock_gettime), + SYSCALL_BY_NAME(clock_nanosleep), + SYSCALL_BY_NAME(clock_settime), + SYSCALL_BY_NAME(clone), + SYSCALL_BY_NAME(close), + SYSCALL_BY_NAME(connect), + SYSCALL_BY_NAME(copy_file_range), + SYSCALL_BY_NAME(creat), + SYSCALL_BY_NAME(create_module), + SYSCALL_BY_NAME(delete_module), + SYSCALL_BY_NAME(dup), + SYSCALL_BY_NAME(dup2), + SYSCALL_BY_NAME(dup3), + SYSCALL_BY_NAME(epoll_create), + SYSCALL_BY_NAME(epoll_create1), + SYSCALL_BY_NAME(epoll_ctl), + SYSCALL_BY_NAME(epoll_ctl_old), + SYSCALL_BY_NAME(epoll_pwait), + SYSCALL_BY_NAME(epoll_wait), + SYSCALL_BY_NAME(epoll_wait_old), + SYSCALL_BY_NAME(eventfd), + SYSCALL_BY_NAME(eventfd2), + SYSCALL_BY_NAME(execve), +#ifdef SYS_execveat + SYSCALL_BY_NAME(execveat), +#endif // SYS_execveat + SYSCALL_BY_NAME(exit), + SYSCALL_BY_NAME(exit_group), + SYSCALL_BY_NAME(faccessat), + SYSCALL_BY_NAME(fadvise64), + SYSCALL_BY_NAME(fallocate), + SYSCALL_BY_NAME(fanotify_init), + SYSCALL_BY_NAME(fanotify_mark), + SYSCALL_BY_NAME(fchdir), + SYSCALL_BY_NAME(fchmod), + SYSCALL_BY_NAME(fchmodat), + SYSCALL_BY_NAME(fchown), + SYSCALL_BY_NAME(fchownat), + SYSCALL_BY_NAME(fcntl), + SYSCALL_BY_NAME(fdatasync), + SYSCALL_BY_NAME(fgetxattr), + SYSCALL_BY_NAME(finit_module), + SYSCALL_BY_NAME(flistxattr), + SYSCALL_BY_NAME(flock), + SYSCALL_BY_NAME(fork), + SYSCALL_BY_NAME(fremovexattr), + SYSCALL_BY_NAME(fsetxattr), + SYSCALL_BY_NAME(fstat), + SYSCALL_BY_NAME(fstatfs), + SYSCALL_BY_NAME(fsync), + SYSCALL_BY_NAME(ftruncate), + SYSCALL_BY_NAME(futex), + SYSCALL_BY_NAME(futimesat), + SYSCALL_BY_NAME(get_kernel_syms), + SYSCALL_BY_NAME(get_mempolicy), + SYSCALL_BY_NAME(get_robust_list), + SYSCALL_BY_NAME(get_thread_area), + SYSCALL_BY_NAME(getcpu), + SYSCALL_BY_NAME(getcwd), + SYSCALL_BY_NAME(getdents), + SYSCALL_BY_NAME(getdents64), + SYSCALL_BY_NAME(getegid), + SYSCALL_BY_NAME(geteuid), + SYSCALL_BY_NAME(getgid), + SYSCALL_BY_NAME(getgroups), + SYSCALL_BY_NAME(getitimer), + SYSCALL_BY_NAME(getpeername), + SYSCALL_BY_NAME(getpgid), + SYSCALL_BY_NAME(getpgrp), + SYSCALL_BY_NAME(getpid), + SYSCALL_BY_NAME(getpmsg), + SYSCALL_BY_NAME(getppid), + SYSCALL_BY_NAME(getpriority), + SYSCALL_BY_NAME(getrandom), + SYSCALL_BY_NAME(getresgid), + SYSCALL_BY_NAME(getresuid), + SYSCALL_BY_NAME(getrlimit), + SYSCALL_BY_NAME(getrusage), + SYSCALL_BY_NAME(getsid), + SYSCALL_BY_NAME(getsockname), + SYSCALL_BY_NAME(getsockopt), + SYSCALL_BY_NAME(gettid), + SYSCALL_BY_NAME(gettimeofday), + SYSCALL_BY_NAME(getuid), + SYSCALL_BY_NAME(getxattr), + SYSCALL_BY_NAME(init_module), + SYSCALL_BY_NAME(inotify_add_watch), + SYSCALL_BY_NAME(inotify_init), + SYSCALL_BY_NAME(inotify_init1), + SYSCALL_BY_NAME(inotify_rm_watch), + SYSCALL_BY_NAME(io_cancel), + SYSCALL_BY_NAME(io_destroy), + SYSCALL_BY_NAME(io_getevents), +#ifdef SYS_io_pgetevents + SYSCALL_BY_NAME(io_pgetevents), +#endif // SYS_io_pgetevents + SYSCALL_BY_NAME(io_setup), + SYSCALL_BY_NAME(io_submit), + SYSCALL_BY_NAME(ioctl), + SYSCALL_BY_NAME(ioperm), + SYSCALL_BY_NAME(iopl), + SYSCALL_BY_NAME(ioprio_get), + SYSCALL_BY_NAME(ioprio_set), + SYSCALL_BY_NAME(kcmp), + SYSCALL_BY_NAME(kexec_file_load), + SYSCALL_BY_NAME(kexec_load), + SYSCALL_BY_NAME(keyctl), + SYSCALL_BY_NAME(kill), + SYSCALL_BY_NAME(lchown), + SYSCALL_BY_NAME(lgetxattr), + SYSCALL_BY_NAME(link), + SYSCALL_BY_NAME(linkat), + SYSCALL_BY_NAME(listen), + SYSCALL_BY_NAME(listxattr), + SYSCALL_BY_NAME(llistxattr), + SYSCALL_BY_NAME(lookup_dcookie), + SYSCALL_BY_NAME(lremovexattr), + SYSCALL_BY_NAME(lseek), + SYSCALL_BY_NAME(lsetxattr), + SYSCALL_BY_NAME(lstat), + SYSCALL_BY_NAME(madvise), + SYSCALL_BY_NAME(mbind), +#ifdef SYS_membarrier + SYSCALL_BY_NAME(membarrier), +#endif // SYS_membarrier + SYSCALL_BY_NAME(memfd_create), + SYSCALL_BY_NAME(migrate_pages), + SYSCALL_BY_NAME(mincore), + SYSCALL_BY_NAME(mkdir), + SYSCALL_BY_NAME(mkdirat), + SYSCALL_BY_NAME(mknod), + SYSCALL_BY_NAME(mknodat), + SYSCALL_BY_NAME(mlock), +#ifdef SYS_mlock2 + SYSCALL_BY_NAME(mlock2), +#endif // SYS_mlock2 + SYSCALL_BY_NAME(mlockall), + SYSCALL_BY_NAME(mmap), + SYSCALL_BY_NAME(modify_ldt), + SYSCALL_BY_NAME(mount), + SYSCALL_BY_NAME(move_pages), + SYSCALL_BY_NAME(mprotect), + SYSCALL_BY_NAME(mq_getsetattr), + SYSCALL_BY_NAME(mq_notify), + SYSCALL_BY_NAME(mq_open), + SYSCALL_BY_NAME(mq_timedreceive), + SYSCALL_BY_NAME(mq_timedsend), + SYSCALL_BY_NAME(mq_unlink), + SYSCALL_BY_NAME(mremap), + SYSCALL_BY_NAME(msgctl), + SYSCALL_BY_NAME(msgget), + SYSCALL_BY_NAME(msgrcv), + SYSCALL_BY_NAME(msgsnd), + SYSCALL_BY_NAME(msync), + SYSCALL_BY_NAME(munlock), + SYSCALL_BY_NAME(munlockall), + SYSCALL_BY_NAME(munmap), + SYSCALL_BY_NAME(name_to_handle_at), + SYSCALL_BY_NAME(nanosleep), + SYSCALL_BY_NAME(newfstatat), + SYSCALL_BY_NAME(nfsservctl), + SYSCALL_BY_NAME(open), + SYSCALL_BY_NAME(open_by_handle_at), + SYSCALL_BY_NAME(openat), + SYSCALL_BY_NAME(pause), + SYSCALL_BY_NAME(perf_event_open), + SYSCALL_BY_NAME(personality), + SYSCALL_BY_NAME(pipe), + SYSCALL_BY_NAME(pipe2), + SYSCALL_BY_NAME(pivot_root), + SYSCALL_BY_NAME(poll), + SYSCALL_BY_NAME(ppoll), + SYSCALL_BY_NAME(prctl), + SYSCALL_BY_NAME(pread64), + SYSCALL_BY_NAME(preadv), +#ifdef SYS_preadv2 + SYSCALL_BY_NAME(preadv2), +#endif // SYS_preadv2 + SYSCALL_BY_NAME(pkey_mprotect), + SYSCALL_BY_NAME(pkey_alloc), + SYSCALL_BY_NAME(pkey_free), + SYSCALL_BY_NAME(prlimit64), + SYSCALL_BY_NAME(process_vm_readv), + SYSCALL_BY_NAME(process_vm_writev), + SYSCALL_BY_NAME(pselect6), + SYSCALL_BY_NAME(ptrace), + SYSCALL_BY_NAME(putpmsg), + SYSCALL_BY_NAME(pwrite64), + SYSCALL_BY_NAME(pwritev), +#ifdef SYS_pwritev2 + SYSCALL_BY_NAME(pwritev2), +#endif // SYS_pwritev2 + SYSCALL_BY_NAME(query_module), + SYSCALL_BY_NAME(quotactl), + SYSCALL_BY_NAME(read), + SYSCALL_BY_NAME(readahead), + SYSCALL_BY_NAME(readlink), + SYSCALL_BY_NAME(readlinkat), + SYSCALL_BY_NAME(readv), + SYSCALL_BY_NAME(reboot), + SYSCALL_BY_NAME(recvfrom), + SYSCALL_BY_NAME(recvmmsg), + SYSCALL_BY_NAME(recvmsg), + SYSCALL_BY_NAME(remap_file_pages), + SYSCALL_BY_NAME(removexattr), + SYSCALL_BY_NAME(rename), + SYSCALL_BY_NAME(renameat), + SYSCALL_BY_NAME(renameat2), + SYSCALL_BY_NAME(request_key), + SYSCALL_BY_NAME(restart_syscall), + SYSCALL_BY_NAME(rmdir), +#ifdef SYS_rseq + SYSCALL_BY_NAME(rseq), +#endif // SYS_rseq + SYSCALL_BY_NAME(rt_sigaction), + SYSCALL_BY_NAME(rt_sigpending), + SYSCALL_BY_NAME(rt_sigprocmask), + SYSCALL_BY_NAME(rt_sigqueueinfo), + SYSCALL_BY_NAME(rt_sigreturn), + SYSCALL_BY_NAME(rt_sigsuspend), + SYSCALL_BY_NAME(rt_sigtimedwait), + SYSCALL_BY_NAME(rt_tgsigqueueinfo), + SYSCALL_BY_NAME(sched_get_priority_max), + SYSCALL_BY_NAME(sched_get_priority_min), + SYSCALL_BY_NAME(sched_getaffinity), + SYSCALL_BY_NAME(sched_getattr), + SYSCALL_BY_NAME(sched_getparam), + SYSCALL_BY_NAME(sched_getscheduler), + SYSCALL_BY_NAME(sched_rr_get_interval), + SYSCALL_BY_NAME(sched_setaffinity), + SYSCALL_BY_NAME(sched_setattr), + SYSCALL_BY_NAME(sched_setparam), + SYSCALL_BY_NAME(sched_setscheduler), + SYSCALL_BY_NAME(sched_yield), + SYSCALL_BY_NAME(seccomp), + SYSCALL_BY_NAME(security), + SYSCALL_BY_NAME(select), + SYSCALL_BY_NAME(semctl), + SYSCALL_BY_NAME(semget), + SYSCALL_BY_NAME(semop), + SYSCALL_BY_NAME(semtimedop), + SYSCALL_BY_NAME(sendfile), + SYSCALL_BY_NAME(sendmmsg), + SYSCALL_BY_NAME(sendmsg), + SYSCALL_BY_NAME(sendto), + SYSCALL_BY_NAME(set_mempolicy), + SYSCALL_BY_NAME(set_robust_list), + SYSCALL_BY_NAME(set_thread_area), + SYSCALL_BY_NAME(set_tid_address), + SYSCALL_BY_NAME(setdomainname), + SYSCALL_BY_NAME(setfsgid), + SYSCALL_BY_NAME(setfsuid), + SYSCALL_BY_NAME(setgid), + SYSCALL_BY_NAME(setgroups), + SYSCALL_BY_NAME(sethostname), + SYSCALL_BY_NAME(setitimer), + SYSCALL_BY_NAME(setns), + SYSCALL_BY_NAME(setpgid), + SYSCALL_BY_NAME(setpriority), + SYSCALL_BY_NAME(setregid), + SYSCALL_BY_NAME(setresgid), + SYSCALL_BY_NAME(setresuid), + SYSCALL_BY_NAME(setreuid), + SYSCALL_BY_NAME(setrlimit), + SYSCALL_BY_NAME(setsid), + SYSCALL_BY_NAME(setsockopt), + SYSCALL_BY_NAME(settimeofday), + SYSCALL_BY_NAME(setuid), + SYSCALL_BY_NAME(setxattr), + SYSCALL_BY_NAME(shmat), + SYSCALL_BY_NAME(shmctl), + SYSCALL_BY_NAME(shmdt), + SYSCALL_BY_NAME(shmget), + SYSCALL_BY_NAME(shutdown), + SYSCALL_BY_NAME(sigaltstack), + SYSCALL_BY_NAME(signalfd), + SYSCALL_BY_NAME(signalfd4), + SYSCALL_BY_NAME(socket), + SYSCALL_BY_NAME(socketpair), + SYSCALL_BY_NAME(splice), + SYSCALL_BY_NAME(stat), + SYSCALL_BY_NAME(statfs), +#ifdef SYS_statx + SYSCALL_BY_NAME(statx), +#endif // SYS_statx + SYSCALL_BY_NAME(swapoff), + SYSCALL_BY_NAME(swapon), + SYSCALL_BY_NAME(symlink), + SYSCALL_BY_NAME(symlinkat), + SYSCALL_BY_NAME(sync), + SYSCALL_BY_NAME(sync_file_range), + SYSCALL_BY_NAME(syncfs), + SYSCALL_BY_NAME(sysfs), + SYSCALL_BY_NAME(sysinfo), + SYSCALL_BY_NAME(syslog), + SYSCALL_BY_NAME(tee), + SYSCALL_BY_NAME(tgkill), + SYSCALL_BY_NAME(time), + SYSCALL_BY_NAME(timer_create), + SYSCALL_BY_NAME(timer_delete), + SYSCALL_BY_NAME(timer_getoverrun), + SYSCALL_BY_NAME(timer_gettime), + SYSCALL_BY_NAME(timer_settime), + SYSCALL_BY_NAME(timerfd_create), + SYSCALL_BY_NAME(timerfd_gettime), + SYSCALL_BY_NAME(timerfd_settime), + SYSCALL_BY_NAME(times), + SYSCALL_BY_NAME(tkill), + SYSCALL_BY_NAME(truncate), + SYSCALL_BY_NAME(tuxcall), + SYSCALL_BY_NAME(umask), + SYSCALL_BY_NAME(umount2), + SYSCALL_BY_NAME(uname), + SYSCALL_BY_NAME(unlink), + SYSCALL_BY_NAME(unlinkat), + SYSCALL_BY_NAME(unshare), + SYSCALL_BY_NAME(uselib), + SYSCALL_BY_NAME(userfaultfd), + SYSCALL_BY_NAME(ustat), + SYSCALL_BY_NAME(utime), + SYSCALL_BY_NAME(utimensat), + SYSCALL_BY_NAME(utimes), + SYSCALL_BY_NAME(vfork), + SYSCALL_BY_NAME(vhangup), + SYSCALL_BY_NAME(vmsplice), + SYSCALL_BY_NAME(vserver), + SYSCALL_BY_NAME(wait4), + SYSCALL_BY_NAME(waitid), + SYSCALL_BY_NAME(write), + SYSCALL_BY_NAME(writev), +}; + +static int +compare_named_entries(const void *k, const void *e) { + const char* name = (const char*) k; + struct named_syscall_entry* entry = (struct named_syscall_entry*) e; + return strcmp(name, entry->s_name); +} + +const struct syscall_info * +get_syscall_info_by_name(const char* syscall_name) { + + struct named_syscall_entry* res = + bsearch(syscall_name, &syscalls_by_name[0], ARRAY_SIZE(syscalls_by_name), + sizeof(struct named_syscall_entry), compare_named_entries); + + if(res == NULL) { + return &unknown_syscall; + } + + return res->s_info; +} + +#define RETURN_TYPE(scinfo) \ + (scinfo)->s_return_type.r_type + +bool +syscall_never_returns(long syscall_number) { + return RETURN_TYPE(get_syscall_info(syscall_number, NULL)) == rnone; +} + + + +#undef SYSCALL +#undef S_NOARGS +#undef S_UARG +#undef S_NARG +#undef S_RET +#undef SYSCALL_BY_NAME +#undef ARRAY_SIZE diff --git a/src/daemon/handler/h_metadentry.cpp b/src/daemon/handler/h_metadentry.cpp index c30e2327fc18490fc5e1596251dda1e7333bc0ed..fad762fbbdc68d69996f0f180d34f08cfcd3460e 100644 --- a/src/daemon/handler/h_metadentry.cpp +++ b/src/daemon/handler/h_metadentry.cpp @@ -22,8 +22,8 @@ using namespace std; static hg_return_t rpc_srv_mk_node(hg_handle_t handle) { - rpc_mk_node_in_t in{}; - rpc_err_out_t out{}; + rpc_mk_node_in_t in; + rpc_err_out_t out; auto ret = margo_get_input(handle, &in); if (ret != HG_SUCCESS) @@ -132,7 +132,7 @@ static hg_return_t rpc_srv_rm_node(hg_handle_t handle) { if (ret != HG_SUCCESS) ADAFS_DATA->spdlogger()->error("{}() Failed to retrieve input from handle", __func__); assert(ret == HG_SUCCESS); - ADAFS_DATA->spdlogger()->debug("Got remove node RPC with path {}", in.path); + ADAFS_DATA->spdlogger()->debug("Got remove node RPC with path '{}'", in.path); try { // Remove metadentry if exists on the node diff --git a/src/daemon/main.cpp b/src/daemon/main.cpp index 39a2362a4a8cbd6df5bd1f9281ce28bfa397f468..45410e469c384bd0ea4666cb710f64f4e058a658 100644 --- a/src/daemon/main.cpp +++ b/src/daemon/main.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -262,7 +263,7 @@ void shutdown_handler(int dummy) { void initialize_loggers() { std::string path = DEFAULT_DAEMON_LOG_PATH; // Try to get log path from env variable - std::string env_path_key = ENV_PREFIX; + std::string env_path_key = DAEMON_ENV_PREFIX; env_path_key += "DAEMON_LOG_PATH"; char* env_path = getenv(env_path_key.c_str()); if (env_path != nullptr) { @@ -271,7 +272,7 @@ void initialize_loggers() { spdlog::level::level_enum level = get_spdlog_level(DEFAULT_DAEMON_LOG_LEVEL); // Try to get log path from env variable - std::string env_level_key = ENV_PREFIX; + std::string env_level_key = DAEMON_ENV_PREFIX; env_level_key += "LOG_LEVEL"; char* env_level = getenv(env_level_key.c_str()); if (env_level != nullptr) { @@ -350,11 +351,8 @@ int main(int argc, const char* argv[]) { if (vm.count("hosts-file")) { hosts_file = vm["hosts-file"].as(); } else { - try { - hosts_file = gkfs::get_env_own("HOSTS_FILE"); - } catch (const exception& e) { - hosts_file = DEFAULT_HOSTS_FILE; - } + hosts_file = + gkfs::env::get_var(gkfs::env::HOSTS_FILE, DEFAULT_HOSTS_FILE); } ADAFS_DATA->hosts_file(hosts_file); diff --git a/src/global/env_util.cpp b/src/global/env_util.cpp index efcd38e4410b7fffd2c627d2d53a9cf5cdc5f6b8..60078c6eb073addcbd3e2fb540b8b9447ba9827e 100644 --- a/src/global/env_util.cpp +++ b/src/global/env_util.cpp @@ -11,27 +11,20 @@ SPDX-License-Identifier: MIT */ -#include -#include +#include #include -#include - +#include namespace gkfs { +namespace env { -using namespace std; +std::string +get_var(const std::string& name, + const std::string& default_value) { -string get_env(const string& env_name) { - char* env_value = getenv(env_name.c_str()); - if (env_value == nullptr) { - throw runtime_error("Environment variable not set: " + env_name); - } - return env_value; + const char* const val = ::secure_getenv(name.c_str()); + return val != nullptr ? std::string(val) : default_value; } -string get_env_own(const string& env_name) { - string env_key = ENV_PREFIX + env_name; - return get_env(env_key); -} - -} +} // namespace env +} // namespace gkfs