Loading CMakeLists.txt +19 −0 Original line number Diff line number Diff line Loading @@ -112,6 +112,25 @@ if(SYMLINK_SUPPORT) endif() message(STATUS "Symlink support: ${SYMLINK_SUPPORT}") option(USE_BITSET_FOR_INTERNAL_FDS "Use std::bitset to track internal fds" ON) if(USE_BITSET_FOR_INTERNAL_FDS) add_definitions(-DUSE_BITSET_FOR_INTERNAL_FDS) execute_process(COMMAND getconf OPEN_MAX OUTPUT_VARIABLE GETCONF_MAX_FDS OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET) if(NOT GETCONF_MAX_FDS) set(GETCONF_MAX_FDS=512) endif() add_definitions(-DMAX_OPEN_FDS=${GETCONF_MAX_FDS}) endif() message(STATUS "Use std::bitset for internal fd tracking: ${USE_BITSET_FOR_INTERNAL_FDS}") if(USE_BITSET_FOR_INTERNAL_FDS) message(STATUS "Max open files: ${GETCONF_MAX_FDS}") endif() configure_file(include/global/configure.hpp.in include/global/configure.hpp) # Imported target Loading include/client/preload_context.hpp +15 −0 Original line number Diff line number Diff line Loading @@ -22,6 +22,10 @@ #include <vector> #include <string> #ifdef USE_BITSET_FOR_INTERNAL_FDS #include <bitset> #endif // USE_BITSET_FOR_INTERNAL_FDS /* Forward declarations */ class OpenFileMap; class Distributor; Loading Loading @@ -67,6 +71,13 @@ class PreloadContext { bool interception_enabled_; #ifdef USE_BITSET_FOR_INTERNAL_FDS std::bitset<MAX_OPEN_FDS> internal_fds_; #else std::set<int> internal_fds_; #endif // USE_BITSET_FOR_INTERNAL_FDS public: static PreloadContext* getInstance() { static PreloadContext instance; Loading Loading @@ -109,6 +120,10 @@ class PreloadContext { void enable_interception(); void disable_interception(); bool interception_enabled() const; void register_internal_fd(int fd); void unregister_internal_fd(int fd); bool is_internal_fd(int fd) const; }; Loading src/client/hooks.cpp +7 −0 Original line number Diff line number Diff line Loading @@ -61,6 +61,13 @@ int hook_close(int fd) { CTX->file_map()->remove(fd); return 0; } if(CTX->is_internal_fd(fd)) { // the client application (for some reason) is trying to close an // internal fd: ignore it return 0; } return syscall_no_intercept(SYS_close, fd); } Loading src/client/intercept.cpp +513 −268 Original line number Diff line number Diff line Loading @@ -27,8 +27,230 @@ #define NOT_HOOKED 1 #define HOOKED 0 #if 0 static void log_write(const char *fmt, ...) { int log_fd = 2; if (log_fd < 0) return; char buf[0x1000]; int len; va_list ap; va_start(ap, fmt); len = vsnprintf(buf, sizeof(buf) - 1, fmt, ap); va_end(ap); if (len < 1) return; buf[len++] = '\n'; syscall_no_intercept(SYS_write, log_fd, buf, len); } #endif static inline int hook(long syscall_number, static __thread bool reentrance_guard_flag; /* * hook_internal -- interception hook for internal syscalls * * This hook is basically used to keep track of file descriptors created * internally by the library itself. This is important because some * applications (e.g. ssh) may attempt to close all open file descriptors * which would leave the library internals in an inconsistent state. * We forward syscalls to the kernel but we keep track of any syscalls that may * create or destroy a file descriptor so that we can mark them as 'internal'. */ static inline int hook_internal(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5, long *result) { switch (syscall_number) { case SYS_open: *result = syscall_no_intercept(syscall_number, reinterpret_cast<char*>(arg0), static_cast<int>(arg1), static_cast<mode_t>(arg2)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_creat: *result = syscall_no_intercept(syscall_number, reinterpret_cast<const char*>(arg0), O_WRONLY | O_CREAT | O_TRUNC, static_cast<mode_t>(arg1)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_openat: *result = syscall_no_intercept(syscall_number, static_cast<int>(arg0), reinterpret_cast<const char*>(arg1), static_cast<int>(arg2), static_cast<mode_t>(arg3)); if(*result != -1) { CTX->register_internal_fd(*result); } break; // epoll_create and epoll_create1 have the same prototype case SYS_epoll_create: case SYS_epoll_create1: *result = syscall_no_intercept(syscall_number, static_cast<int>(arg0)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_dup: *result = syscall_no_intercept(syscall_number, static_cast<unsigned int>(arg0)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_dup2: *result = syscall_no_intercept(syscall_number, static_cast<unsigned int>(arg0), static_cast<unsigned int>(arg1)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_dup3: *result = syscall_no_intercept(syscall_number, static_cast<unsigned int>(arg0), static_cast<unsigned int>(arg1), static_cast<int>(arg2)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_inotify_init: *result = syscall_no_intercept(syscall_number); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_inotify_init1: *result = syscall_no_intercept(syscall_number, static_cast<int>(arg0)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_perf_event_open: *result = syscall_no_intercept(syscall_number, reinterpret_cast<struct perf_event_attr*>(arg0), static_cast<pid_t>(arg1), static_cast<int>(arg2), static_cast<int>(arg3), static_cast<unsigned long>(arg4)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_signalfd: *result = syscall_no_intercept(syscall_number, static_cast<int>(arg0), reinterpret_cast<const sigset_t*>(arg1)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_signalfd4: *result = syscall_no_intercept(syscall_number, static_cast<int>(arg0), reinterpret_cast<const sigset_t*>(arg1), static_cast<int>(arg2)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_timerfd_create: *result = syscall_no_intercept(syscall_number, static_cast<int>(arg0), static_cast<int>(arg1)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_close: *result = syscall_no_intercept(syscall_number, static_cast<int>(arg0)); CTX->unregister_internal_fd(*result); break; default: /* * Ignore any other syscalls * i.e.: pass them on to the kernel * as would normally happen. */ #ifndef NDEBUG CTX->log()->trace("Syscall [{}, {}] Passthrough", syscall_names[syscall_number], syscall_number); #endif return NOT_HOOKED; } #ifndef NDEBUG CTX->log()->trace("Syscall [{}, {}] Intercepted", syscall_names[syscall_number], syscall_number); #endif #if 0 log_write("Internal syscall [%s, %d] = %d", syscall_names[syscall_number]);//, syscall_number, *result); #endif return HOOKED; } /* * hook -- interception hook for application syscalls * * This hook is used to implement any application filesystem-related syscalls. */ static inline int hook(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5, long *result) Loading Loading @@ -316,8 +538,20 @@ static inline int hook(long syscall_number, } static __thread bool guard_flag; /* * hook_guard_wrapper -- a wrapper which can notice reentrance. * * The reentrance_guard_flag flag allows the library to distinguish the hooking * of its own syscalls. E.g. while handling an open() syscall, * libgkfs_intercept might call fopen(), which in turn uses an open() * syscall internally. This internally used open() syscall is once again * forwarded to libgkfs_intercept, but using this flag we can notice this * case of reentering itself. * * XXX This approach still contains a very significant bug, as libgkfs_intercept * being called inside a signal handler might easily forward a mock fd to the * kernel. */ int hook_guard_wrapper(long syscall_number, long arg0, long arg1, long arg2, Loading @@ -326,19 +560,30 @@ hook_guard_wrapper(long syscall_number, { assert(CTX->interception_enabled()); if (guard_flag) { return NOT_HOOKED; } #if 0 log_write("syscall %s called from %s", syscall_names[syscall_number], reentrance_guard_flag ? "gkfs" : "client"); #endif int is_hooked; guard_flag = true; if (reentrance_guard_flag) { int oerrno = errno; is_hooked = hook_internal(syscall_number, arg0, arg1, arg2, arg3, arg4, arg5, syscall_return_value); errno = oerrno; return is_hooked; } reentrance_guard_flag = true; int oerrno = errno; is_hooked = hook(syscall_number, arg0, arg1, arg2, arg3, arg4, arg5, syscall_return_value); errno = oerrno; guard_flag = false; reentrance_guard_flag = false; return is_hooked; } Loading src/client/preload.cpp +53 −0 Original line number Diff line number Diff line Loading @@ -26,6 +26,9 @@ #include <client/rpc/hg_rpcs.hpp> #include <hermes.hpp> #include <sys/types.h> #include <dirent.h> #include <fstream> Loading Loading @@ -104,6 +107,33 @@ bool init_hermes_client(const std::string& transport_prefix) { return true; } static inline std::set<int> query_open_fds() { std::set<int> fds; const std::string path{"/proc/self/fd"}; std::unique_ptr<DIR, decltype(&::closedir)> dirp( ::opendir(path.c_str()), closedir); struct dirent entry; struct dirent *result; while (::readdir_r(dirp.get(), &entry, &result) == 0 && result != NULL) { const std::string name{entry.d_name}; if(name == "." || name == ".." || std::stoi(name) == dirfd(dirp.get())) { continue; } fds.insert(std::stoi(name)); } return fds; } /** * This function is only called in the preload constructor and initializes Loading @@ -111,6 +141,17 @@ bool init_hermes_client(const std::string& transport_prefix) { */ void init_ld_environment_() { // Client applications such as ssh attempt to close all open file // descriptors, which causes havoc with the interception library's internal // state. To account for this, in the interception code we keep track of // internal fds by distinguishing between internal syscalls (i.e. those // coming from internal code) application syscalls. The problem is that // at this point in initialization we have not enabled interception yet, // but the initialization process itself needs to create file descriptors. // To solve this problem, we find out which fds are created by the // initialization process and manually protect them at this point auto pre_init_fds = query_open_fds(); // initialize Hermes interface to Mercury if (!init_hermes_client(RPC_PROTOCOL)) { exit_error_msg(EXIT_FAILURE, "Unable to initialize Hermes RPC client"); Loading @@ -130,6 +171,17 @@ void init_ld_environment_() { exit_error_msg(EXIT_FAILURE, "Unable to fetch file system configurations from daemon process through RPC."); } auto post_init_fds = query_open_fds(); std::set<int> internal_fds{3}; // fd 3 is created by the logging system std::set_difference(post_init_fds.begin(), post_init_fds.end(), pre_init_fds.begin(), pre_init_fds.end(), std::inserter(internal_fds, internal_fds.end())); for(const auto& fd : internal_fds) { CTX->register_internal_fd(fd); } CTX->log()->info("{}() Environment initialization successful.", __func__); } Loading Loading @@ -178,6 +230,7 @@ void log_prog_name() { * Called initially ONCE when preload library is used with the LD_PRELOAD environment variable */ void init_preload() { init_logging(); CTX->log()->debug("Initialized logging subsystem"); log_prog_name(); Loading Loading
CMakeLists.txt +19 −0 Original line number Diff line number Diff line Loading @@ -112,6 +112,25 @@ if(SYMLINK_SUPPORT) endif() message(STATUS "Symlink support: ${SYMLINK_SUPPORT}") option(USE_BITSET_FOR_INTERNAL_FDS "Use std::bitset to track internal fds" ON) if(USE_BITSET_FOR_INTERNAL_FDS) add_definitions(-DUSE_BITSET_FOR_INTERNAL_FDS) execute_process(COMMAND getconf OPEN_MAX OUTPUT_VARIABLE GETCONF_MAX_FDS OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET) if(NOT GETCONF_MAX_FDS) set(GETCONF_MAX_FDS=512) endif() add_definitions(-DMAX_OPEN_FDS=${GETCONF_MAX_FDS}) endif() message(STATUS "Use std::bitset for internal fd tracking: ${USE_BITSET_FOR_INTERNAL_FDS}") if(USE_BITSET_FOR_INTERNAL_FDS) message(STATUS "Max open files: ${GETCONF_MAX_FDS}") endif() configure_file(include/global/configure.hpp.in include/global/configure.hpp) # Imported target Loading
include/client/preload_context.hpp +15 −0 Original line number Diff line number Diff line Loading @@ -22,6 +22,10 @@ #include <vector> #include <string> #ifdef USE_BITSET_FOR_INTERNAL_FDS #include <bitset> #endif // USE_BITSET_FOR_INTERNAL_FDS /* Forward declarations */ class OpenFileMap; class Distributor; Loading Loading @@ -67,6 +71,13 @@ class PreloadContext { bool interception_enabled_; #ifdef USE_BITSET_FOR_INTERNAL_FDS std::bitset<MAX_OPEN_FDS> internal_fds_; #else std::set<int> internal_fds_; #endif // USE_BITSET_FOR_INTERNAL_FDS public: static PreloadContext* getInstance() { static PreloadContext instance; Loading Loading @@ -109,6 +120,10 @@ class PreloadContext { void enable_interception(); void disable_interception(); bool interception_enabled() const; void register_internal_fd(int fd); void unregister_internal_fd(int fd); bool is_internal_fd(int fd) const; }; Loading
src/client/hooks.cpp +7 −0 Original line number Diff line number Diff line Loading @@ -61,6 +61,13 @@ int hook_close(int fd) { CTX->file_map()->remove(fd); return 0; } if(CTX->is_internal_fd(fd)) { // the client application (for some reason) is trying to close an // internal fd: ignore it return 0; } return syscall_no_intercept(SYS_close, fd); } Loading
src/client/intercept.cpp +513 −268 Original line number Diff line number Diff line Loading @@ -27,8 +27,230 @@ #define NOT_HOOKED 1 #define HOOKED 0 #if 0 static void log_write(const char *fmt, ...) { int log_fd = 2; if (log_fd < 0) return; char buf[0x1000]; int len; va_list ap; va_start(ap, fmt); len = vsnprintf(buf, sizeof(buf) - 1, fmt, ap); va_end(ap); if (len < 1) return; buf[len++] = '\n'; syscall_no_intercept(SYS_write, log_fd, buf, len); } #endif static inline int hook(long syscall_number, static __thread bool reentrance_guard_flag; /* * hook_internal -- interception hook for internal syscalls * * This hook is basically used to keep track of file descriptors created * internally by the library itself. This is important because some * applications (e.g. ssh) may attempt to close all open file descriptors * which would leave the library internals in an inconsistent state. * We forward syscalls to the kernel but we keep track of any syscalls that may * create or destroy a file descriptor so that we can mark them as 'internal'. */ static inline int hook_internal(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5, long *result) { switch (syscall_number) { case SYS_open: *result = syscall_no_intercept(syscall_number, reinterpret_cast<char*>(arg0), static_cast<int>(arg1), static_cast<mode_t>(arg2)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_creat: *result = syscall_no_intercept(syscall_number, reinterpret_cast<const char*>(arg0), O_WRONLY | O_CREAT | O_TRUNC, static_cast<mode_t>(arg1)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_openat: *result = syscall_no_intercept(syscall_number, static_cast<int>(arg0), reinterpret_cast<const char*>(arg1), static_cast<int>(arg2), static_cast<mode_t>(arg3)); if(*result != -1) { CTX->register_internal_fd(*result); } break; // epoll_create and epoll_create1 have the same prototype case SYS_epoll_create: case SYS_epoll_create1: *result = syscall_no_intercept(syscall_number, static_cast<int>(arg0)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_dup: *result = syscall_no_intercept(syscall_number, static_cast<unsigned int>(arg0)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_dup2: *result = syscall_no_intercept(syscall_number, static_cast<unsigned int>(arg0), static_cast<unsigned int>(arg1)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_dup3: *result = syscall_no_intercept(syscall_number, static_cast<unsigned int>(arg0), static_cast<unsigned int>(arg1), static_cast<int>(arg2)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_inotify_init: *result = syscall_no_intercept(syscall_number); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_inotify_init1: *result = syscall_no_intercept(syscall_number, static_cast<int>(arg0)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_perf_event_open: *result = syscall_no_intercept(syscall_number, reinterpret_cast<struct perf_event_attr*>(arg0), static_cast<pid_t>(arg1), static_cast<int>(arg2), static_cast<int>(arg3), static_cast<unsigned long>(arg4)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_signalfd: *result = syscall_no_intercept(syscall_number, static_cast<int>(arg0), reinterpret_cast<const sigset_t*>(arg1)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_signalfd4: *result = syscall_no_intercept(syscall_number, static_cast<int>(arg0), reinterpret_cast<const sigset_t*>(arg1), static_cast<int>(arg2)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_timerfd_create: *result = syscall_no_intercept(syscall_number, static_cast<int>(arg0), static_cast<int>(arg1)); if(*result != -1) { CTX->register_internal_fd(*result); } break; case SYS_close: *result = syscall_no_intercept(syscall_number, static_cast<int>(arg0)); CTX->unregister_internal_fd(*result); break; default: /* * Ignore any other syscalls * i.e.: pass them on to the kernel * as would normally happen. */ #ifndef NDEBUG CTX->log()->trace("Syscall [{}, {}] Passthrough", syscall_names[syscall_number], syscall_number); #endif return NOT_HOOKED; } #ifndef NDEBUG CTX->log()->trace("Syscall [{}, {}] Intercepted", syscall_names[syscall_number], syscall_number); #endif #if 0 log_write("Internal syscall [%s, %d] = %d", syscall_names[syscall_number]);//, syscall_number, *result); #endif return HOOKED; } /* * hook -- interception hook for application syscalls * * This hook is used to implement any application filesystem-related syscalls. */ static inline int hook(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5, long *result) Loading Loading @@ -316,8 +538,20 @@ static inline int hook(long syscall_number, } static __thread bool guard_flag; /* * hook_guard_wrapper -- a wrapper which can notice reentrance. * * The reentrance_guard_flag flag allows the library to distinguish the hooking * of its own syscalls. E.g. while handling an open() syscall, * libgkfs_intercept might call fopen(), which in turn uses an open() * syscall internally. This internally used open() syscall is once again * forwarded to libgkfs_intercept, but using this flag we can notice this * case of reentering itself. * * XXX This approach still contains a very significant bug, as libgkfs_intercept * being called inside a signal handler might easily forward a mock fd to the * kernel. */ int hook_guard_wrapper(long syscall_number, long arg0, long arg1, long arg2, Loading @@ -326,19 +560,30 @@ hook_guard_wrapper(long syscall_number, { assert(CTX->interception_enabled()); if (guard_flag) { return NOT_HOOKED; } #if 0 log_write("syscall %s called from %s", syscall_names[syscall_number], reentrance_guard_flag ? "gkfs" : "client"); #endif int is_hooked; guard_flag = true; if (reentrance_guard_flag) { int oerrno = errno; is_hooked = hook_internal(syscall_number, arg0, arg1, arg2, arg3, arg4, arg5, syscall_return_value); errno = oerrno; return is_hooked; } reentrance_guard_flag = true; int oerrno = errno; is_hooked = hook(syscall_number, arg0, arg1, arg2, arg3, arg4, arg5, syscall_return_value); errno = oerrno; guard_flag = false; reentrance_guard_flag = false; return is_hooked; } Loading
src/client/preload.cpp +53 −0 Original line number Diff line number Diff line Loading @@ -26,6 +26,9 @@ #include <client/rpc/hg_rpcs.hpp> #include <hermes.hpp> #include <sys/types.h> #include <dirent.h> #include <fstream> Loading Loading @@ -104,6 +107,33 @@ bool init_hermes_client(const std::string& transport_prefix) { return true; } static inline std::set<int> query_open_fds() { std::set<int> fds; const std::string path{"/proc/self/fd"}; std::unique_ptr<DIR, decltype(&::closedir)> dirp( ::opendir(path.c_str()), closedir); struct dirent entry; struct dirent *result; while (::readdir_r(dirp.get(), &entry, &result) == 0 && result != NULL) { const std::string name{entry.d_name}; if(name == "." || name == ".." || std::stoi(name) == dirfd(dirp.get())) { continue; } fds.insert(std::stoi(name)); } return fds; } /** * This function is only called in the preload constructor and initializes Loading @@ -111,6 +141,17 @@ bool init_hermes_client(const std::string& transport_prefix) { */ void init_ld_environment_() { // Client applications such as ssh attempt to close all open file // descriptors, which causes havoc with the interception library's internal // state. To account for this, in the interception code we keep track of // internal fds by distinguishing between internal syscalls (i.e. those // coming from internal code) application syscalls. The problem is that // at this point in initialization we have not enabled interception yet, // but the initialization process itself needs to create file descriptors. // To solve this problem, we find out which fds are created by the // initialization process and manually protect them at this point auto pre_init_fds = query_open_fds(); // initialize Hermes interface to Mercury if (!init_hermes_client(RPC_PROTOCOL)) { exit_error_msg(EXIT_FAILURE, "Unable to initialize Hermes RPC client"); Loading @@ -130,6 +171,17 @@ void init_ld_environment_() { exit_error_msg(EXIT_FAILURE, "Unable to fetch file system configurations from daemon process through RPC."); } auto post_init_fds = query_open_fds(); std::set<int> internal_fds{3}; // fd 3 is created by the logging system std::set_difference(post_init_fds.begin(), post_init_fds.end(), pre_init_fds.begin(), pre_init_fds.end(), std::inserter(internal_fds, internal_fds.end())); for(const auto& fd : internal_fds) { CTX->register_internal_fd(fd); } CTX->log()->info("{}() Environment initialization successful.", __func__); } Loading Loading @@ -178,6 +230,7 @@ void log_prog_name() { * Called initially ONCE when preload library is used with the LD_PRELOAD environment variable */ void init_preload() { init_logging(); CTX->log()->debug("Initialized logging subsystem"); log_prog_name(); Loading