diff --git a/CHANGELOG.md b/CHANGELOG.md index e566843c0e45c76a5d74e40905e8cc97bd005a31..319429f07602ec83710cbf7798bb1b36a7d5dde7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ### New ### Changed - Unify dependency scripts (dl and compile): Unify `-d` and `-p` flags ([!174](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/174)). +- Support for `close_range` syscall. ([!201](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/201)). + - Removal of `O_PATH` check in `gkfs_open`, allows cp with asterisk. This is for newer kernels. + - Support to print newer syscall (although not implemented). Added syscall number to log for easy capture missing ones. ### Removed ### Fixed diff --git a/include/client/syscalls/decoder.hpp b/include/client/syscalls/decoder.hpp index 3172cc2df291c118dcf3de9c13cfa19f7177b10c..04d4ecb1be7d308c08540455ba599e3f3bc5bf3a 100644 --- a/include/client/syscalls/decoder.hpp +++ b/include/client/syscalls/decoder.hpp @@ -60,7 +60,8 @@ decode(FmtBuffer& buffer, const long syscall_number, const auto sc = lookup_by_number(syscall_number, argv); - fmt::format_to(std::back_inserter(buffer), "{}(", sc.name()); + fmt::format_to(std::back_inserter(buffer), "{} {}(", sc.name(), + syscall_number); for(int i = 0; i < sc.num_args(); ++i) { const auto arg = sc.args().at(i); diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 17dd464808a330bf15449ccbfad9ab899f99645d..fc85399afd98467eb249a3039b6de586b3bcc668 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -140,12 +140,6 @@ namespace gkfs::syscall { int gkfs_open(const std::string& path, mode_t mode, int flags) { - if(flags & O_PATH) { - LOG(ERROR, "`O_PATH` flag is not supported"); - errno = ENOTSUP; - return -1; - } - // metadata object filled during create or stat gkfs::metadata::Metadata md{}; if(flags & O_CREAT) { @@ -1643,7 +1637,9 @@ gkfs_close(unsigned int fd) { if(CTX->is_internal_fd(fd)) { // the client application (for some reason) is trying to close an // internal fd: ignore it - return 0; + LOG(ERROR, "{}() closing an internal fd '{}'", __func__, fd); + errno = EACCES; + return -1; } return -1; diff --git a/src/client/intercept.cpp b/src/client/intercept.cpp index 1086010e884214ecbcadb1e83790e54a57b52b2b..45346fdc8bc51fdc7c02f813363148ca5f782c4a 100644 --- a/src/client/intercept.cpp +++ b/src/client/intercept.cpp @@ -31,7 +31,7 @@ #include #include #include - +#include #include #include @@ -408,11 +408,25 @@ hook_internal(long syscall_number, long arg0, long arg1, long arg2, long arg3, CTX->unregister_internal_fd(arg0); } break; - +#ifdef SYS_close_range + case SYS_close_range: + *result = syscall_no_intercept_wrapper( + syscall_number, static_cast(arg0), + static_cast(arg1), static_cast(arg2)); + if(*result >= 0) { + for(auto i = arg0; i < arg1; i++) { + if(arg1 == INT_MAX or i >= GKFS_MAX_INTERNAL_FDS) { + break; + } + CTX->unregister_internal_fd(i); + } + } + break; +#endif default: - // ignore any other syscalls, i.e.: pass them on to the kernel - // (syscalls forwarded to the kernel that return are logged in - // hook_forwarded_syscall()) + // ignore any other syscalls, i.e.: pass them on to the + // kernel (syscalls forwarded to the kernel that return are + // logged in hook_forwarded_syscall()) ::save_current_syscall_info(gkfs::syscall::from_internal_code | gkfs::syscall::to_kernel | gkfs::syscall::not_executed); @@ -430,7 +444,8 @@ hook_internal(long syscall_number, long arg0, long arg1, long arg2, long arg3, /* * hook -- interception hook for application syscalls * - * This hook is used to implement any application filesystem-related syscalls. + * This hook is used to implement any application filesystem-related + * syscalls. */ inline int hook(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, @@ -486,6 +501,19 @@ hook(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, case SYS_close: *result = gkfs::hook::hook_close(static_cast(arg0)); break; +#ifdef SYS_close_range + case SYS_close_range: + for(auto i = arg0; i <= arg1; i++) { + if(i >= GKFS_MAX_OPEN_FDS) + break; + if(CTX->file_map()->exist(i)) { + gkfs::syscall::gkfs_close(i); + } + *result = 0; + } + *result = 0; + break; +#endif // SYS_close_range #ifdef SYS_stat case SYS_stat: *result = diff --git a/src/client/preload_context.cpp b/src/client/preload_context.cpp index ff38514923a10134e4ace0864f0b3ad9f6608bca..a15813b2513a66cff49f60e67f41912930865bce 100644 --- a/src/client/preload_context.cpp +++ b/src/client/preload_context.cpp @@ -541,7 +541,8 @@ PreloadContext::register_internal_fd(int fd) { void PreloadContext::unregister_internal_fd(int fd) { - LOG(DEBUG, "unregistering internal fd {}", fd); + LOG(DEBUG, "unregistering internal fd {} >= {} -> {}'", fd, MIN_INTERNAL_FD, + fd >= MIN_INTERNAL_FD); assert(fd >= MIN_INTERNAL_FD); diff --git a/src/client/syscalls/detail/syscall_info.c b/src/client/syscalls/detail/syscall_info.c index 8c6ca4feecffebf0788e490eeea9adbdc56556e7..954486f54cfeb7d1d3a1a94cb42acd36303a278e 100644 --- a/src/client/syscalls/detail/syscall_info.c +++ b/src/client/syscalls/detail/syscall_info.c @@ -478,9 +478,6 @@ SYSCALL(getpmsg, 5, S_RET(rdec), S_NARG(arg, "arg0"), SYSCALL(readlinkat, 4, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(ptr, "buf"), S_NARG(arg, "bufsiz")), SYSCALL(fchmodat, 3, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "filename"), S_NARG(octal_mode, "mode")), SYSCALL(faccessat, 3, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(octal_mode, "mode")), -#ifdef SYS_faccessat2 - SYSCALL(faccessat2, 4, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(octal_mode, "mode"), S_NARG(arg, "flags")), -#endif SYSCALL(pselect6, 6, S_RET(rdec), S_NARG(dec, "nfds"), S_NARG(ptr, "readfds"), S_NARG(ptr, "writefds"), S_NARG(ptr, "exceptfds"), S_NARG(ptr, "timeval"), S_NARG(ptr, "sigmask")), SYSCALL(ppoll, 5, S_RET(rdec), S_NARG(ptr, "fds"), S_NARG(dec, "nfds"), S_NARG(ptr, "tmo_p"), S_NARG(ptr, "sigmask"), S_NARG(dec, "sigsetsize")), SYSCALL(unshare, 1, S_RET(rdec), S_NARG(arg, "unshare_flags")), @@ -596,8 +593,108 @@ SYSCALL(getpmsg, 5, S_RET(rdec), S_NARG(arg, "arg0"), #endif // SYS_io_pgetevents #ifdef SYS_rseq - SYSCALL(rseq, 4, S_RET(rdec), S_NARG(ptr, "rseq"), S_NARG(dec, "rseq_len"), S_NARG(arg, "flags"), S_NARG(signum, "sig")) + SYSCALL(rseq, 4, S_RET(rdec), S_NARG(ptr, "rseq"), S_NARG(dec, "rseq_len"), S_NARG(arg, "flags"), S_NARG(signum, "sig")), #endif // SYS_rseq +// ifdef the next syscalls +#ifdef SYS_pidfd_send_signal + SYSCALL(pidfd_send_signal, 3, S_RET(rdec), S_NARG(dec, "pidfd"), S_NARG(signum, "sig"), S_NARG(arg, "flags")), +#endif +#ifdef SYS_io_uring_setup + SYSCALL(io_uring_setup, 2, S_RET(rdec), S_NARG(arg, "entries"), S_NARG(ptr, "ring_addr")), +#endif +#ifdef SYS_io_uring_enter + SYSCALL(io_uring_enter, 4, S_RET(rdec), S_NARG(arg, "ring_fd"), S_NARG(dec, "to_submit"), S_NARG(dec, "min_complete"), S_NARG(arg, "flags")), +#endif +#ifdef SYS_io_uring_register + SYSCALL(io_uring_register, 4, S_RET(rdec), S_NARG(arg, "ring_fd"), S_NARG(arg, "opcode"), S_NARG(ptr, "arg"), S_NARG(arg, "nr_args")), +#endif +#ifdef SYS_open_tree + SYSCALL(open_tree, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(open_flags, "flags")), +#endif +#ifdef SYS_move_mount + SYSCALL(move_mount, 3, S_RET(rdec), S_NARG(cstr, "src"), S_NARG(cstr, "dst"), S_NARG(arg, "flags")), +#endif +#ifdef SYS_fsopen + SYSCALL(fsopen, 3, S_RET(rdec), S_NARG(cstr, "fs_type"), S_NARG(cstr, "pathname"), S_NARG(open_flags, "flags")), +#endif +#ifdef SYS_fsconfig + SYSCALL(fsconfig, 3, S_RET(rdec), S_NARG(arg, "cmd"), S_NARG(ptr, "argp"), S_NARG(ptr, "resp")), +#endif +#ifdef SYS_fsmount + SYSCALL(fsmount, 5, S_RET(rdec), S_NARG(cstr, "fs_type"), S_NARG(cstr, "pathname"), S_NARG(cstr, "type"), S_NARG(arg, "flags"), S_NARG(ptr, "data")), +#endif +#ifdef SYS_fspick + SYSCALL(fspick, 4, S_RET(rdec), S_NARG(arg, "arg0"), S_NARG(arg, "arg1"), S_NARG(arg, "arg2"), S_NARG(arg, "arg3")), +#endif +#ifdef SYS_pidfd_open + SYSCALL(pidfd_open, 2, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(arg, "flags")), +#endif +#ifdef SYS_clone3 + SYSCALL(clone3, 4, S_RET(rdec), S_NARG(arg, "flags"), S_NARG(ptr, "child_tid"), S_NARG(ptr, "parent_tid"), S_NARG(ptr, "tls")), +#endif +#ifdef SYS_close_range + SYSCALL(close_range, 3, S_RET(rdec), S_NARG(dec, "low"), S_NARG(dec, "high"), S_NARG(arg, "flags")), +#endif +#ifdef SYS_openat2 + SYSCALL(openat2, 4, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(open_flags, "flags"), S_NARG(ptr, "how")), +#endif +#ifdef SYS_pidfd_getfd + SYSCALL(pidfd_getfd, 3, S_RET(rdec), S_NARG(dec, "pidfd"), S_NARG(arg, "fd"), S_NARG(arg, "flags")), +#endif +#ifdef SYS_faccessat2 + SYSCALL(faccessat2, 4, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(octal_mode, "mode"), S_NARG(arg, "flags")), +#endif +#ifdef SYS_process_madvise + SYSCALL(process_madvise, 4, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "addr"), S_NARG(dec, "length"), S_NARG(arg, "advice")), +#endif +#ifdef SYS_epoll_pwait2 + SYSCALL(epoll_pwait2, 6, S_RET(rdec), S_NARG(fd, "epfd"), S_NARG(ptr, "events"), S_NARG(dec, "maxevents"), S_NARG(dec, "timeout"), S_NARG(ptr, "sigmask"), S_NARG(dec, "sigsetsize")), +#endif +#ifdef SYS_mount_setattr + SYSCALL(mount_setattr, 3, S_RET(rdec), S_NARG(cstr, "path"), S_NARG(ptr, "attr"), S_NARG(arg, "flags")), +#endif +#ifdef SYS_quotactl_fd + SYSCALL(quotactl_fd, 4, S_RET(rdec), S_NARG(arg, "cmd"), S_NARG(fd, "fd"), S_NARG(arg, "id"), S_NARG(ptr, "addr")), +#endif +#ifdef SYS_landlock_create_ruleset + SYSCALL(landlock_create_ruleset, 1, S_RET(rdec), S_NARG(arg, "flags")), +#endif +#ifdef SYS_landlock_add_rule + SYSCALL(landlock_add_rule, 3, S_RET(rdec), S_NARG(dec, "ruleset"), S_NARG(arg, "rule"), S_NARG(arg, "flags")), +#endif +#ifdef SYS_landlock_restrict_self + SYSCALL(landlock_restrict_self, 1, S_RET(rdec), S_NARG(dec, "ruleset")), +#endif +#ifdef SYS_memfd_secret + SYSCALL(memfd_secret, 3, S_RET(rdec), S_NARG(cstr, "name"), S_NARG(arg, "flags"), S_NARG(ptr, "secret")), +#endif +#ifdef SYS_process_mrelease + SYSCALL(process_mrelease, 2, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "addr")), +#endif +#ifdef SYS_futex_waitv + SYSCALL(futex_waitv, 5, S_RET(rdec), S_NARG(ptr, "uaddr"), S_NARG(arg, "op"), S_NARG(ptr, "val"), S_NARG(ptr, "timeout"), S_NARG(dec, "flags")), +#endif +#ifdef SYS_set_mempolicy_home_node + SYSCALL(set_mempolicy_home_node, 1, S_RET(rdec), S_NARG(arg, "node")), +#endif +#ifdef SYS_cachestat + SYSCALL(cachestat, 1, S_RET(rdec), S_NARG(ptr, "cs")), +#endif +#ifdef SYS_fchmodat2 + SYSCALL(fchmodat2, 4, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(octal_mode, "mode"), S_NARG(arg, "flags")), +#endif +#ifdef SYS_map_shadow_stack + SYSCALL(map_shadow_stack, 1, S_RET(rdec), S_NARG(arg, "flags")), +#endif +#ifdef SYS_futex_wake + SYSCALL(futex_wake, 3, S_RET(rdec), S_NARG(ptr, "uaddr"), S_NARG(dec, "nr_wake"), S_NARG(arg, "flags")), +#endif +#ifdef SYS_futex_wait + SYSCALL(futex_wait, 4, S_RET(rdec), S_NARG(ptr, "uaddr"), S_NARG(arg, "op"), S_NARG(ptr, "val"), S_NARG(ptr, "timeout")), +#endif +#ifdef SYS_futex_requeue + SYSCALL(futex_requeue, 5, S_RET(rdec), S_NARG(ptr, "uaddr1"), S_NARG(ptr, "uaddr2"), S_NARG(arg, "op"), S_NARG(ptr, "val"), S_NARG(ptr, "timeout")) +#endif }; static const struct syscall_info unknown_syscall = { @@ -742,7 +839,13 @@ const struct named_syscall_entry syscalls_by_name[] = { SYSCALL_BY_NAME(clock_nanosleep), SYSCALL_BY_NAME(clock_settime), SYSCALL_BY_NAME(clone), +#ifdef SYS_clone3 + SYSCALL_BY_NAME(clone3), +#endif SYSCALL_BY_NAME(close), +#ifdef SYS_close_range + SYSCALL_BY_NAME(close_range), +#endif SYSCALL_BY_NAME(connect), #ifdef SYS_copy_file_range SYSCALL_BY_NAME(copy_file_range), diff --git a/tests/integration/syscalls/test_error_operations.py b/tests/integration/syscalls/test_error_operations.py index 97ae68889b3dc2ca86ff762b9fce194422799c9c..f8c3285cb878de555e9ceb4f728f058dd95946d7 100644 --- a/tests/integration/syscalls/test_error_operations.py +++ b/tests/integration/syscalls/test_error_operations.py @@ -46,8 +46,8 @@ def test_open_error(gkfs_daemon, gkfs_client): file = gkfs_daemon.mountdir / "file" file2 = gkfs_daemon.mountdir / "file2" file3 = gkfs_daemon.mountdir / "file3" - - flags = [os.O_PATH, os.O_CREAT | os.O_DIRECTORY] + # O_PATH is now supported as cp uses it.. + flags = [os.O_CREAT | os.O_DIRECTORY] # create a file in gekkofs for flag in flags: