From bc023a9ab189ef9818563fb0af12614793593318 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Wed, 21 May 2025 16:03:40 +0200 Subject: [PATCH 1/2] java syscall solved --- CHANGELOG.md | 3 + include/client/path.hpp | 2 + include/client/syscalls/args.hpp | 92 +++++++++++++++++++ include/client/syscalls/detail/syscall_info.h | 1 + include/config.hpp | 4 +- src/client/hooks.cpp | 4 +- src/client/path.cpp | 1 - src/client/preload_context.cpp | 17 ++++ src/client/syscalls/detail/syscall_info.c | 2 +- 9 files changed, 120 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a792df8a1..40ab79790 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,9 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - gkfs_do_write uses int instead of ssize_t causing overflow ([!229](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/229)) - proxy remove metadata has inverted return values ([!237](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/237)) - Rename and symlink support leveraged ([!246](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/246)) + - Java with syscalls deadlocks as it try to resolve paths (malloc) in a locking situation ([!255](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/255)) + - It also solves flock missing implementation when we are ouside gekkofs + - Some features in syscall_intercept still hangs if we do not lower the debug information. ## [0.9.4] - 2025-03 ### New diff --git a/include/client/path.hpp b/include/client/path.hpp index 87acc327a..7bdc840e9 100644 --- a/include/client/path.hpp +++ b/include/client/path.hpp @@ -42,6 +42,8 @@ namespace gkfs::path { +static const std::string excluded_paths[2] = {"sys/", "proc/"}; + unsigned int match_components(const std::string& path, unsigned int& path_components, const std::vector& components); diff --git a/include/client/syscalls/args.hpp b/include/client/syscalls/args.hpp index ea391ba97..c833f82dd 100644 --- a/include/client/syscalls/args.hpp +++ b/include/client/syscalls/args.hpp @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -76,6 +77,7 @@ enum class type { mmap_prot = ::arg_type_t::mmap_prot, mmap_flags = ::arg_type_t::mmap_flags, clone_flags = ::arg_type_t::clone_flags, + clone3_args = ::arg_type_t::clone3_args, signum = ::arg_type_t::signum, sigproc_how = ::arg_type_t::sigproc_how, generic = ::arg_type_t::arg, @@ -96,6 +98,7 @@ static constexpr auto whence = type::whence; static constexpr auto mmap_prot = type::mmap_prot; static constexpr auto mmap_flags = type::mmap_flags; static constexpr auto clone_flags = type::clone_flags; +static constexpr auto clone3_args = type::clone3_args; static constexpr auto signum = type::signum; static constexpr auto sigproc_how = type::sigproc_how; static constexpr auto generic = type::generic; @@ -154,6 +157,9 @@ format_mmap_flags_arg_to(FmtBuffer& buffer, const printable_arg& parg); template inline void format_clone_flags_arg_to(FmtBuffer& buffer, const printable_arg& parg); +template inline void +format_clone3_args_arg_to(FmtBuffer& buffer, const printable_arg& parg); + template inline void format_signum_arg_to(FmtBuffer& buffer, const printable_arg& parg); @@ -182,6 +188,7 @@ std::array, arg_type_max> formatters = { /* [mmap_prot] = */ format_mmap_prot_arg_to, /* [mmap_flags] = */ format_mmap_flags_arg_to, /* [clone_flags] = */ format_clone_flags_arg_to, + /* [clone3_args] = */ format_clone3_args_arg_to, /* [signum] = */ format_signum_arg_to, /* [sigproc_how] = */ format_sigproc_how_arg_to, /* [arg] = */ format_arg_to, @@ -444,6 +451,91 @@ format_clone_flags_arg_to(FmtBuffer& buffer, const printable_arg& parg) { return; } +/** + * format_clone3_args_arg_to - format a 'args' argument + * + * Format a 'args' argument (such as those passed to clone3()) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_clone3_args_arg_to(FmtBuffer& buffer, const printable_arg& parg) { + + // struct clone_args { + // u64 flags; /* Flags bit mask */ + // u64 pidfd; /* Where to store PID file descriptor + // (int *) */ + // u64 child_tid; /* Where to store child TID, + // in child's memory (pid_t *) */ + // u64 parent_tid; /* Where to store child TID, + // in parent's memory (pid_t *) */ + // u64 exit_signal; /* Signal to deliver to parent on + // child termination */ + // u64 stack; /* Pointer to lowest byte of stack */ + // u64 stack_size; /* Size of stack */ + // u64 tls; /* Location of new TLS */ + // u64 set_tid; /* Pointer to a pid_t array + // (since Linux 5.5) */ + // u64 set_tid_size; /* Number of elements in set_tid + // (since Linux 5.5) */ + // u64 cgroup; /* File descriptor for target cgroup + // of child (since Linux 5.7) */ + // }; + + + struct clone_args* ca = reinterpret_cast(parg.value); + /* Names for clone3() args arg */ + const auto flag_names = + utils::make_array( + FLAG_ENTRY(CLONE_VM), + FLAG_ENTRY(CLONE_FS), + FLAG_ENTRY(CLONE_FILES), + FLAG_ENTRY(CLONE_SIGHAND), + FLAG_ENTRY(CLONE_PTRACE), + FLAG_ENTRY(CLONE_VFORK), + FLAG_ENTRY(CLONE_PARENT), + FLAG_ENTRY(CLONE_THREAD), + FLAG_ENTRY(CLONE_NEWNS), + FLAG_ENTRY(CLONE_SYSVSEM), + FLAG_ENTRY(CLONE_SETTLS), + FLAG_ENTRY(CLONE_PARENT_SETTID), + FLAG_ENTRY(CLONE_CHILD_CLEARTID), + FLAG_ENTRY(CLONE_DETACHED), + FLAG_ENTRY(CLONE_UNTRACED), + FLAG_ENTRY(CLONE_CHILD_SETTID), +#ifdef CLONE_NEWCGROUP + FLAG_ENTRY(CLONE_NEWCGROUP), +#endif + FLAG_ENTRY(CLONE_NEWUTS), + FLAG_ENTRY(CLONE_NEWIPC), + FLAG_ENTRY(CLONE_NEWUSER), + FLAG_ENTRY(CLONE_NEWPID), + FLAG_ENTRY(CLONE_NEWNET), + FLAG_ENTRY(CLONE_IO)); + + fmt::format_to(std::back_inserter(buffer), "{}=", "flags"); + format_flag_set(buffer, ca->flags, flag_names); + + fmt::format_to(std::back_inserter(buffer), "|", "signal"); + format_signum_arg_to(buffer, {"", ca->exit_signal}); + + fmt::format_to(std::back_inserter(buffer), ",{}={}", "pidfd", (void*)ca->pidfd); + fmt::format_to(std::back_inserter(buffer), ",{}={}", "child_tid", (void*)ca->child_tid); + fmt::format_to(std::back_inserter(buffer), ",{}={}", "parent_tid", (void*)ca->parent_tid); + fmt::format_to(std::back_inserter(buffer), ",{}={}", "stack", (void*)ca->stack); + fmt::format_to(std::back_inserter(buffer), ",{}={}", "stack_size", ca->stack_size); + fmt::format_to(std::back_inserter(buffer), ",{}={}", "tls", (void*)ca->tls); + fmt::format_to(std::back_inserter(buffer), ",{}={}", "set_tid", (void*)ca->set_tid); + // set_tid size and cgroup + fmt::format_to(std::back_inserter(buffer), ",{}={}", "set_tid_size", ca->set_tid_size); + fmt::format_to(std::back_inserter(buffer), ",{}={}", "cgroup", ca->cgroup); + + return; + + +} + + /** * format_signum_arg_to - format a 'signum' argument * diff --git a/include/client/syscalls/detail/syscall_info.h b/include/client/syscalls/detail/syscall_info.h index cca13cd82..b15b54da3 100644 --- a/include/client/syscalls/detail/syscall_info.h +++ b/include/client/syscalls/detail/syscall_info.h @@ -64,6 +64,7 @@ typedef enum { mmap_prot, /* protections for the mmap() family of syscalls */ mmap_flags, /* flags for the mmap() family of syscalls */ clone_flags, /* flags for the clone() syscall */ + clone3_args, /* args for the clone3() syscall */ signum, /* signal numbers */ sigproc_how, /* sigprocmask argument */ arg, /* generic argument, no special formatting */ diff --git a/include/config.hpp b/include/config.hpp index c1ba919ca..0c7544b9a 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -114,8 +114,8 @@ constexpr auto dir = "metadata"; // which metadata should be considered apart from size and mode // Blocks are used to store the rename status (-1 is a renamed file) constexpr auto use_atime = false; -constexpr auto use_ctime = false; -constexpr auto use_mtime = false; +constexpr auto use_ctime = true; +constexpr auto use_mtime = true; constexpr auto use_link_cnt = false; #ifdef HAS_RENAME constexpr auto use_blocks = true; diff --git a/src/client/hooks.cpp b/src/client/hooks.cpp index f1b7b89d0..06bf0bddc 100644 --- a/src/client/hooks.cpp +++ b/src/client/hooks.cpp @@ -435,9 +435,9 @@ hook_flock(unsigned long fd, int flags) { __func__, fd, flags); if(CTX->file_map()->exist(fd)) { - return 0; + return -ENOTSUP; } else - return -EBADF; + return syscall_no_intercept_wrapper(SYS_flock, fd, flags); } #ifdef SYS_access diff --git a/src/client/path.cpp b/src/client/path.cpp index 6ccb561c7..1792a0e70 100644 --- a/src/client/path.cpp +++ b/src/client/path.cpp @@ -62,7 +62,6 @@ using namespace std; namespace gkfs::path { -static const string excluded_paths[2] = {"sys/", "proc/"}; /** Match components in path * diff --git a/src/client/preload_context.cpp b/src/client/preload_context.cpp index 465ed926f..ca5f9479c 100644 --- a/src/client/preload_context.cpp +++ b/src/client/preload_context.cpp @@ -301,6 +301,21 @@ PreloadContext::relativize_fd_path(int dirfd, const char* raw_path, // We assume raw path is valid assert(raw_path != nullptr); + // Skips paths that are used on locking places (i.e. java + // /proc/sys/vm/overcommit_memory) and produces deadlocks as we call malloc + // inside. + if(dirfd == AT_FDCWD) { + for(auto& excl_path : gkfs::path::excluded_paths) { + // compare raw_path with excl_path + if(raw_path != nullptr && raw_path[0] == gkfs::path::separator) { + if(strncmp(raw_path + 1, excl_path.c_str(), + excl_path.length()) == 0) { + return RelativizeStatus::external; + } + } + } + } + std::string path; @@ -582,6 +597,8 @@ PreloadContext::unregister_internal_fd(int fd) { bool PreloadContext::is_internal_fd(int fd) const { + if(!protect_fds()) + return false; if(fd < MIN_INTERNAL_FD) { return false; diff --git a/src/client/syscalls/detail/syscall_info.c b/src/client/syscalls/detail/syscall_info.c index 89d849928..c4c9ba259 100644 --- a/src/client/syscalls/detail/syscall_info.c +++ b/src/client/syscalls/detail/syscall_info.c @@ -642,7 +642,7 @@ SYSCALL(getpmsg, 5, S_RET(rdec), S_NARG(arg, "arg0"), SYSCALL(pidfd_open, 2, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(arg, "flags")), #endif #ifdef SYS_clone3 - SYSCALL(clone3, 4, S_RET(rdec), S_NARG(arg, "flags"), S_NARG(ptr, "child_tid"), S_NARG(ptr, "parent_tid"), S_NARG(ptr, "tls")), + SYSCALL(clone3, 4, S_RET(rdec), S_NARG(clone3_args, "flags"), S_NARG(arg, "size")), #endif #ifdef SYS_close_range SYSCALL(close_range, 3, S_RET(rdec), S_NARG(dec, "low"), S_NARG(dec, "high"), S_NARG(arg, "flags")), -- GitLab From 8f51bf56ea019c60632da6096e092fd1171ba576 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Thu, 22 May 2025 07:09:10 +0200 Subject: [PATCH 2/2] flock return 0 is needed for wacomm --- src/client/hooks.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/client/hooks.cpp b/src/client/hooks.cpp index 06bf0bddc..c97630fd7 100644 --- a/src/client/hooks.cpp +++ b/src/client/hooks.cpp @@ -435,7 +435,7 @@ hook_flock(unsigned long fd, int flags) { __func__, fd, flags); if(CTX->file_map()->exist(fd)) { - return -ENOTSUP; + return 0; } else return syscall_no_intercept_wrapper(SYS_flock, fd, flags); } -- GitLab