From 8ce7ef50af1e32b8a13061dbd82c7b37d06a4f9b Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Wed, 19 Mar 2025 21:21:02 +0100 Subject: [PATCH] Implement fd protection mechanism and update documentation --- CHANGELOG.md | 4 ++- README.md | 4 +++ include/client/env.hpp | 1 + include/client/preload_context.hpp | 7 +++++ src/client/open_file_map.cpp | 43 +++++++++++++++++++----------- src/client/preload.cpp | 22 +++++++++++---- src/client/preload_context.cpp | 14 +++++++++- 7 files changed, 72 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 62eb43983..99beaab1b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,9 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ### New - Added cppcheck code checking capabilities ([!214](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/214)) - Tests to cover proxy and malleability ([!222](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/222)) - + - New fd generation method ([!225](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/202)) + - Use LIBGKFS_PROTECT_FD=1 to enable the original method of assignation and protection. + ### Changed - Tests check ret for -1 instead of 10000 fd ([!320](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/320)) - Allow some more script tests to run as pthread_at_fork solved some issues. diff --git a/README.md b/README.md index 989b52547..03d89091e 100644 --- a/README.md +++ b/README.md @@ -585,6 +585,10 @@ until the file is closed. The cache does not impact the consistency of the file with the corresponding daemon (default: 1000). The file size is further synchronized when the file is `close()`d or when `fsync()` is called. +##### Protecting FDs +When the user creates a fd, this is protected from normal fds with a recolocation. This theoretically protects the fd from being closed from outside. However a new fd assignation system has been developed and is activated by default. +- `LIBGKFS_PROTECT_FD=1` - Enable the original method of assignation and protection. + ### Daemon #### Logging - `GKFS_DAEMON_LOG_PATH` - Path to the log file of the daemon. diff --git a/include/client/env.hpp b/include/client/env.hpp index 3dabfb8ef..ec400aba4 100644 --- a/include/client/env.hpp +++ b/include/client/env.hpp @@ -68,6 +68,7 @@ static constexpr auto METRICS_PATH = ADD_PREFIX("METRICS_PATH"); static constexpr auto METRICS_IP_PORT = ADD_PREFIX("METRICS_IP_PORT"); #endif +static constexpr auto PROTECT_FD = ADD_PREFIX("PROTECT_FD"); static constexpr auto NUM_REPL = ADD_PREFIX("NUM_REPL"); static constexpr auto PROXY_PID_FILE = ADD_PREFIX("PROXY_PID_FILE"); namespace cache { diff --git a/include/client/preload_context.hpp b/include/client/preload_context.hpp index 877cce2ac..b0662c063 100644 --- a/include/client/preload_context.hpp +++ b/include/client/preload_context.hpp @@ -138,6 +138,7 @@ private: std::bitset protected_fds_; std::string hostname; int replicas_; + bool protect_fds_{false}; std::shared_ptr write_metrics_; std::shared_ptr read_metrics_; @@ -314,6 +315,12 @@ public: int get_replicas(); + bool + protect_fds() const; + + void + protect_fds(bool protect); + const std::shared_ptr write_metrics(); diff --git a/src/client/open_file_map.cpp b/src/client/open_file_map.cpp index a42d6fc16..1a5193a45 100644 --- a/src/client/open_file_map.cpp +++ b/src/client/open_file_map.cpp @@ -142,22 +142,29 @@ OpenFileMap::exist(const int fd) { int OpenFileMap::safe_generate_fd_idx_() { - auto fd = generate_fd_idx(); - /* - * Check if fd is still in use and generate another if yes - * Note that this can only happen once the all fd indices within the int has - * been used to the int::max Once this limit is exceeded, we set fd_idx back - * to 3 and begin anew. Only then, if a file was open for a long time will - * we have to generate another index. - * - * This situation can only occur when all fd indices have been given away - * once and we start again, in which case the fd_validation_needed flag is - * set. fd_validation is set to false, if - */ - if(fd_validation_needed) { - while(exist(fd)) { - fd = generate_fd_idx(); + int fd = 0; + if(CTX->protect_fds()) { + fd = generate_fd_idx(); + /* + * Check if fd is still in use and generate another if yes + * Note that this can only happen once the all fd indices within the int + * has been used to the int::max Once this limit is exceeded, we set + * fd_idx back to 3 and begin anew. Only then, if a file was open for a + * long time will we have to generate another index. + * + * This situation can only occur when all fd indices have been given + * away once and we start again, in which case the fd_validation_needed + * flag is set. fd_validation is set to false, if + */ + if(fd_validation_needed) { + while(exist(fd)) { + fd = generate_fd_idx(); + } } + } else { + // Some architectures do not support SYS_open + fd = syscall_no_intercept(SYS_openat, AT_FDCWD, "/dev/null", O_RDWR, + S_IRUSR | S_IWUSR); } return fd; } @@ -178,6 +185,10 @@ OpenFileMap::remove(const int fd) { return false; } files_.erase(fd); + if(!CTX->protect_fds()) { + // We close the dev null fd + close(fd); + } if(fd_validation_needed && files_.empty()) { fd_validation_needed = false; LOG(DEBUG, "fd_validation flag reset"); @@ -237,7 +248,7 @@ OpenFileMap::generate_fd_idx() { * which tells can tell the OpenFileMap that it should check if this fd * is really safe to use. */ - fd_idx = 100000; + fd_idx = 10000; fd_validation_needed = true; } return fd_idx++; diff --git a/src/client/preload.cpp b/src/client/preload.cpp index ead12ede1..7f22669e1 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -359,13 +359,14 @@ init_preload() { // The original errno value will be restored after initialization to not // leak internal error codes auto oerrno = errno; + + CTX->enable_interception(); + gkfs::preload::start_self_interception(); + if(!init) { init = true; pthread_atfork(&at_fork_syscall, &at_parent_syscall, &at_child_syscall); } - CTX->enable_interception(); - gkfs::preload::start_self_interception(); - CTX->init_logging(); // from here ownwards it is safe to print messages LOG(DEBUG, "Logging subsystem initialized"); @@ -380,7 +381,17 @@ init_preload() { // To prevent this for our internal // initialization code, we forcefully occupy the user fd range to force // such modules to create fds in our private range. - CTX->protect_user_fds(); + if(gkfs::env::var_is_set(gkfs::env::PROTECT_FD)) { + CTX->protect_fds(true); + LOG(INFO, "Protecting user fds"); + } else { + CTX->protect_fds(false); + LOG(INFO, "Not protecting user fds"); + } + + if(CTX->protect_fds()) { + CTX->protect_user_fds(); + } log_prog_name(); gkfs::path::init_cwd(); @@ -390,7 +401,8 @@ init_preload() { gkfs::preload::init_environment(); CTX->enable_interception(); - CTX->unprotect_user_fds(); + if(CTX->protect_fds()) + CTX->unprotect_user_fds(); auto forwarding_map_file = gkfs::env::get_var( gkfs::env::FORWARDING_MAP_FILE, gkfs::config::forwarding_file_path); diff --git a/src/client/preload_context.cpp b/src/client/preload_context.cpp index abb506c4b..6e71ec80d 100644 --- a/src/client/preload_context.cpp +++ b/src/client/preload_context.cpp @@ -487,11 +487,21 @@ PreloadContext::interception_enabled() const { return interception_enabled_; } +bool +PreloadContext::protect_fds() const { + return protect_fds_; +} + +void +PreloadContext::protect_fds(bool protect) { + protect_fds_ = protect; +} int PreloadContext::register_internal_fd(int fd) { assert(fd >= 0); - + if(!protect_fds()) + return fd; if(!internal_fds_must_relocate_) { LOG(DEBUG, "registering fd {} as internal (no relocation needed)", fd); assert(fd >= MIN_INTERNAL_FD); @@ -559,6 +569,8 @@ PreloadContext::register_internal_fd(int fd) { void PreloadContext::unregister_internal_fd(int fd) { + if(!protect_fds()) + return; LOG(DEBUG, "unregistering internal fd {} >= {} -> {}'", fd, MIN_INTERNAL_FD, fd >= MIN_INTERNAL_FD); -- GitLab