From 3a6ed443b66adc70d235acaf84ae8fa8dd4e1ba3 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Mon, 12 May 2025 15:38:02 +0200 Subject: [PATCH] basic mmap support --- CHANGELOG.md | 1 + include/client/gkfs_functions.hpp | 10 ++++++ include/client/hooks.hpp | 8 +++++ include/client/user_functions.hpp | 10 ++++++ src/client/gkfs_functions.cpp | 60 +++++++++++++++++++++++++++++++ src/client/hooks.cpp | 38 ++++++++++++++++++++ src/client/intercept.cpp | 16 +++++++++ 7 files changed, 143 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c733dd493..8fda6e2c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - Use LIBGKFS_PROTECT_FD=1 to enable the original method of assignation and protection. - Lock system (server level) ([!245](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/245)) - Use PROTECT_FILES_GENERATOR=1 and PROTECT_FILES_CONSUMER=1 to enable. Generator, creates transparent .lockgekko files that blocks the open (for some seconds) of any consumer. Multiple opens / closes for generator are managed. + - Basic mmap support ([!247](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/245)) ### Changed - Tests check ret for -1 instead of 10000 fd ([!320](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/320)) diff --git a/include/client/gkfs_functions.hpp b/include/client/gkfs_functions.hpp index 73a89ba60..36a585139 100644 --- a/include/client/gkfs_functions.hpp +++ b/include/client/gkfs_functions.hpp @@ -182,6 +182,16 @@ int gkfs_rename(const std::string& old_path, const std::string& new_path); #endif // HAS_RENAME +// gkfs_mmap +void* +gkfs_mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset); + +int +gkfs_munmap(void* addr, size_t length); + +int +gkfs_msync(void* addr, size_t length, int flags); + } // namespace gkfs::syscall // gkfs_getsingleserverdir is using extern "C" to demangle it for C usage diff --git a/include/client/hooks.hpp b/include/client/hooks.hpp index e40b581fd..0c921d452 100644 --- a/include/client/hooks.hpp +++ b/include/client/hooks.hpp @@ -233,6 +233,14 @@ hook_fallocate(int fd, int mode, off_t offset, off_t len); int hook_fadvise64(int fd, off_t offset, off_t len, int advice); +void* +hook_mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset); + +int +hook_munmap(void* addr, size_t length); + +int +hook_msync(void* addr, size_t length, int flags); } // namespace gkfs::hook #endif diff --git a/include/client/user_functions.hpp b/include/client/user_functions.hpp index d861c22e2..0312df198 100644 --- a/include/client/user_functions.hpp +++ b/include/client/user_functions.hpp @@ -133,6 +133,16 @@ gkfs_rename(const std::string& old_path, const std::string& new_path); int gkfs_fsync(unsigned int fd); +// add mmap +void* +gkfs_mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset); + +int +gkfs_munmap(void* addr, size_t length); + +int +gkfs_msync(void* addr, size_t length, int flags); + } // namespace syscall namespace malleable { diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 06ecedb27..2dd5a1e2d 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -114,6 +114,8 @@ struct dirent_extended { namespace { +// set to store void * addr, fd, length and offset +std::set> mmap_set; /** * Checks if metadata for parent directory exists (can be disabled with * GKFS_CREATE_CHECK_PARENTS). errno may be set @@ -1930,6 +1932,64 @@ gkfs_get_file_list(const std::string& path) { return file_list; } + +void* +gkfs_mmap(void* addr, size_t length, int prot, int flags, int fd, + off_t offset) { + void* ptr = malloc(length); + if(ptr == nullptr) { + return MAP_FAILED; + } + // store info on mmap_set + mmap_set.insert(std::make_tuple(ptr, fd, length, offset)); + gkfs::syscall::gkfs_pread(fd, ptr, length, offset); + return ptr; +} + +int +gkfs_msync(void* addr, size_t length, int flags) { + // check if addr is from gekkofs (mmap_set) + // if so, get the fd and offset + // pwrite length to the original offset + + for(const auto& tuple : mmap_set) { + if(std::get<0>(tuple) == addr) { + int fd = std::get<1>(tuple); + off_t offset = std::get<3>(tuple); + gkfs::syscall::gkfs_pwrite(fd, addr, length, offset); + return 0; + } + } + return -1; +} + + +int +gkfs_munmap(void* addr, size_t length) { + // check if addr is from gekkofs (mmap_set) + // if so, get the fd and offset + // pwrite length to the original offset + // return + // if not just go to the normal msync + if(mmap_set.size() != 0) { + // use find_if std::algorithm + // if found, call msync + + auto it = std::find_if( + mmap_set.begin(), mmap_set.end(), + [&addr](const std::tuple& t) { + return std::get<0>(t) == addr; + }); + if(it != mmap_set.end()) { + gkfs_msync(addr, length, 0); + free(addr); + mmap_set.erase(it); + return 0; + } + } + return -1; +} + } // namespace gkfs::syscall diff --git a/src/client/hooks.cpp b/src/client/hooks.cpp index 7f5befa06..7429f4f05 100644 --- a/src/client/hooks.cpp +++ b/src/client/hooks.cpp @@ -1098,4 +1098,42 @@ hook_fadvise64(int fd, off_t offset, off_t len, int advice) { } return syscall_no_intercept_wrapper(SYS_fadvise64, fd, offset, len, advice); } + +// mmap, munmap, msync +void* +hook_mmap(void* addr, size_t length, int prot, int flags, int fd, + off_t offset) { + LOG(DEBUG, + "{}() called with addr '{}' length '{}' prot '{}' flags '{}' fd '{}' offset '{}'", + __func__, fmt::ptr(addr), length, prot, flags, fd, offset); + + if(CTX->file_map()->exist(fd)) { + return gkfs::syscall::gkfs_mmap(addr, length, prot, flags, fd, offset); + } + return reinterpret_cast(syscall_no_intercept_wrapper( + SYS_mmap, addr, length, prot, flags, fd, offset)); +} + +int +hook_munmap(void* addr, size_t length) { + LOG(DEBUG, "{}() called with addr '{}' length '{}'", __func__, + fmt::ptr(addr), length); + + auto res = gkfs::syscall::gkfs_munmap(addr, length); + if(res == 0) + return res; + return syscall_no_intercept_wrapper(SYS_munmap, addr, length); +} + +int +hook_msync(void* addr, size_t length, int flags) { + LOG(DEBUG, "{}() called with addr '{}' length '{}' flags '{}'", __func__, + fmt::ptr(addr), length, flags); + + auto res = gkfs::syscall::gkfs_msync(addr, length, flags); + if(res == 0) + return res; + return syscall_no_intercept_wrapper(SYS_msync, addr, length, flags); +} + } // namespace gkfs::hook diff --git a/src/client/intercept.cpp b/src/client/intercept.cpp index 5dd803a6d..aab1284d6 100644 --- a/src/client/intercept.cpp +++ b/src/client/intercept.cpp @@ -864,6 +864,22 @@ hook(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, static_cast(arg0), static_cast(arg1), static_cast(arg2), static_cast(arg4)); break; + // Intercept mmap, msync and munmap + case SYS_mmap: + *result = reinterpret_cast(gkfs::hook::hook_mmap( + reinterpret_cast(arg0), static_cast(arg1), + static_cast(arg2), static_cast(arg3), + static_cast(arg4), static_cast(arg5))); + break; + case SYS_msync: + *result = gkfs::hook::hook_msync(reinterpret_cast(arg0), + static_cast(arg1), + static_cast(arg2)); + break; + case SYS_munmap: + *result = gkfs::hook::hook_munmap(reinterpret_cast(arg0), + static_cast(arg1)); + break; default: // ignore any other syscalls, i.e.: pass them on to the kernel -- GitLab