From d36b892dbbeef1a5ea60163040e6148d9981de6c Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Thu, 1 Feb 2024 10:49:15 +0100 Subject: [PATCH 1/3] Mountdir existence is no longer required for clients Previously, an empty mountdir directory must have existed for no reason. As a result, the daemon created and removed this empty directory. This has been removed unfinished: optimizing path resolution performance Moving path code from preload context, updating hooks.cpp with new method fix merge conflicts. not functional --- include/client/path.hpp | 12 + include/client/preload_context.hpp | 11 - include/config.hpp | 10 + src/client/hooks.cpp | 551 ++++++++++++++++------------- src/client/path.cpp | 261 +++++++++++--- src/client/preload_context.cpp | 144 ++++---- src/daemon/daemon.cpp | 28 +- 7 files changed, 623 insertions(+), 394 deletions(-) diff --git a/include/client/path.hpp b/include/client/path.hpp index 2294c28a4..528f51fb4 100644 --- a/include/client/path.hpp +++ b/include/client/path.hpp @@ -32,6 +32,8 @@ namespace gkfs::path { +enum class NormalizeStatus { ok, fd_unknown, fd_not_a_dir }; + unsigned int match_components(const std::string& path, unsigned int& path_components, const std::vector& components); @@ -40,6 +42,16 @@ bool resolve(const std::string& path, std::string& resolved, bool resolve_last_link = true); +NormalizeStatus +normalize(int dirfd, const char* raw_path, std::string& normalized_path, + bool resolve_last_link = true); + +std::string +normalize(const char* raw_path, bool resolve_last_link = true); + +bool +is_in_gkfs(std::string& path, bool cut_mountdir_prefix = false); + std::string get_sys_cwd(); diff --git a/include/client/preload_context.hpp b/include/client/preload_context.hpp index 4be2eeeb0..60f635d3b 100644 --- a/include/client/preload_context.hpp +++ b/include/client/preload_context.hpp @@ -70,8 +70,6 @@ struct FsConfig { std::string rootdir; }; -enum class RelativizeStatus { internal, external, fd_unknown, fd_not_a_dir }; - /** * Singleton class of the client context with all relevant global data */ @@ -169,15 +167,6 @@ public: void auto_sm(bool auto_sm); - RelativizeStatus - relativize_fd_path(int dirfd, const char* raw_path, - std::string& relative_path, int flags = 0, - bool resolve_last_link = true) const; - - bool - relativize_path(const char* raw_path, std::string& relative_path, - bool resolve_last_link = true) const; - const std::shared_ptr& file_map() const; diff --git a/include/config.hpp b/include/config.hpp index ad162fb6a..17f2bd952 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -109,6 +109,16 @@ namespace rocksdb { constexpr auto use_write_ahead_log = false; } // namespace rocksdb +namespace preload { +/* + * This option allows a symlink outside of the GekkoFS namespace into the + * GekkoFS namespace. This is by default disabled as it incurs a significant + * overhead during GekkoFS' path resolution. Therefore, it is recommended to + * keep it disabled + */ +constexpr auto allow_symlinks_into_gkfs = false; +} // namespace preload + namespace stats { constexpr auto max_stats = 1000000; ///< How many stats will be stored constexpr auto prometheus_gateway = "127.0.0.1:9091"; diff --git a/src/client/hooks.cpp b/src/client/hooks.cpp index 1ab78c8ba..93864a1e0 100644 --- a/src/client/hooks.cpp +++ b/src/client/hooks.cpp @@ -37,12 +37,15 @@ #include +#include #include +#include extern "C" { #include #include #include +#include } namespace { @@ -60,28 +63,30 @@ namespace gkfs::hook { int hook_openat(int dirfd, const char* cpath, int flags, mode_t mode) { - LOG(DEBUG, "{}() called with fd: {}, path: \"{}\", flags: {}, mode: {}", + LOG(INFO, "{}() called with fd: {}, path: \"{}\", flags: {}, mode: {}", __func__, dirfd, cpath, flags, mode); - std::string resolved; - auto rstatus = CTX->relativize_fd_path(dirfd, cpath, resolved); - switch(rstatus) { - case gkfs::preload::RelativizeStatus::fd_unknown: + std::string normalized_path{}; + auto status = gkfs::path::normalize(dirfd, cpath, normalized_path); + + switch(status) { + case gkfs::path::NormalizeStatus::fd_unknown: return syscall_no_intercept_wrapper(SYS_openat, dirfd, cpath, flags, mode); - case gkfs::preload::RelativizeStatus::external: - return syscall_no_intercept_wrapper(SYS_openat, dirfd, - resolved.c_str(), flags, mode); - - case gkfs::preload::RelativizeStatus::fd_not_a_dir: + case gkfs::path::NormalizeStatus::fd_not_a_dir: return -ENOTDIR; - case gkfs::preload::RelativizeStatus::internal: - return with_errno(gkfs::syscall::gkfs_open(resolved, mode, flags)); - + case gkfs::path::NormalizeStatus::ok: + if(gkfs::path::is_in_gkfs(normalized_path, true)) + return with_errno( + gkfs::syscall::gkfs_open(normalized_path, mode, flags)); + else + return syscall_no_intercept_wrapper(SYS_openat, dirfd, + normalized_path.c_str(), + flags, mode); default: - LOG(ERROR, "{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown", __func__); return -EINVAL; } } @@ -112,12 +117,12 @@ hook_stat(const char* path, struct stat* buf) { LOG(DEBUG, "{}() called with path: \"{}\", buf: {}", __func__, path, fmt::ptr(buf)); - std::string rel_path; - if(CTX->relativize_path(path, rel_path, false)) { - return with_errno(gkfs::syscall::gkfs_stat(rel_path, buf)); - } - - return syscall_no_intercept_wrapper(SYS_stat, rel_path.c_str(), buf); + auto normalized_path = gkfs::path::normalize(path, false); + if(gkfs::path::is_in_gkfs(normalized_path, true)) + return with_errno(gkfs::syscall::gkfs_stat(normalized_path, buf)); + else + return syscall_no_intercept_wrapper(SYS_stat, normalized_path.c_str(), + buf); } #endif @@ -131,30 +136,29 @@ hook_statx(int dirfd, const char* path, int flags, unsigned int mask, "{}() called with dirfd: '{}', path: \"{}\", flags: '{}', mask: '{}', buf: '{}'", __func__, dirfd, path, flags, mask, fmt::ptr(buf)); - std::string resolved; - auto rstatus = CTX->relativize_fd_path(dirfd, path, resolved); - switch(rstatus) { - case gkfs::preload::RelativizeStatus::fd_unknown: + std::string normalized_path{}; + auto status = gkfs::path::normalize(dirfd, path, normalized_path); + + switch(status) { + case gkfs::path::NormalizeStatus::fd_unknown: return syscall_no_intercept_wrapper(SYS_statx, dirfd, path, flags, mask, buf); - case gkfs::preload::RelativizeStatus::external: - return syscall_no_intercept_wrapper( - SYS_statx, dirfd, resolved.c_str(), flags, mask, buf); - - case gkfs::preload::RelativizeStatus::fd_not_a_dir: + case gkfs::path::NormalizeStatus::fd_not_a_dir: return -ENOTDIR; - case gkfs::preload::RelativizeStatus::internal: - return with_errno(gkfs::syscall::gkfs_statx(dirfd, resolved.c_str(), - flags, mask, buf)); - + case gkfs::path::NormalizeStatus::ok: + if(gkfs::path::is_in_gkfs(normalized_path, true)) + return with_errno(gkfs::syscall::gkfs_statx( + dirfd, normalized_path.c_str(), flags, mask, buf)); + else + return syscall_no_intercept_wrapper(SYS_statx, dirfd, + normalized_path.c_str(), + flags, mask, buf); default: - LOG(ERROR, "{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown", __func__); return -EINVAL; } - - return syscall_no_intercept(SYS_statx, dirfd, path, flags, mask, buf); } #endif @@ -166,11 +170,12 @@ hook_lstat(const char* path, struct stat* buf) { LOG(DEBUG, "{}() called with path: \"{}\", buf: {}", __func__, path, fmt::ptr(buf)); - std::string rel_path; - if(CTX->relativize_path(path, rel_path)) { - return with_errno(gkfs::syscall::gkfs_stat(rel_path, buf)); - } - return syscall_no_intercept_wrapper(SYS_lstat, rel_path.c_str(), buf); + auto normalized_path = gkfs::path::normalize(path); + if(gkfs::path::is_in_gkfs(normalized_path, true)) + return with_errno(gkfs::syscall::gkfs_stat(normalized_path, buf)); + else + return syscall_no_intercept_wrapper(SYS_lstat, normalized_path.c_str(), + buf); } #endif @@ -200,25 +205,32 @@ hook_fstatat(int dirfd, const char* cpath, struct stat* buf, int flags) { LOG(DEBUG, "{}() called with path: \"{}\", fd: {}, buf: {}, flags: {}", __func__, cpath, dirfd, fmt::ptr(buf), flags); - std::string resolved; - auto rstatus = CTX->relativize_fd_path(dirfd, cpath, resolved, flags); - switch(rstatus) { - case gkfs::preload::RelativizeStatus::fd_unknown: + if(flags & AT_EMPTY_PATH) { + LOG(ERROR, "{}() AT_EMPTY_PATH flag not supported", __func__); + return -ENOTSUP; + } + + std::string normalized_path{}; + auto status = gkfs::path::normalize(dirfd, cpath, normalized_path); + + switch(status) { + case gkfs::path::NormalizeStatus::fd_unknown: return syscall_no_intercept_wrapper(SYS_newfstatat, dirfd, cpath, buf, flags); - case gkfs::preload::RelativizeStatus::external: - return syscall_no_intercept_wrapper(SYS_newfstatat, dirfd, - resolved.c_str(), buf, flags); - - case gkfs::preload::RelativizeStatus::fd_not_a_dir: + case gkfs::path::NormalizeStatus::fd_not_a_dir: return -ENOTDIR; - case gkfs::preload::RelativizeStatus::internal: - return with_errno(gkfs::syscall::gkfs_stat(resolved, buf)); - + case gkfs::path::NormalizeStatus::ok: + if(gkfs::path::is_in_gkfs(normalized_path, true)) + return with_errno( + gkfs::syscall::gkfs_stat(normalized_path, buf)); + else + return syscall_no_intercept_wrapper(SYS_newfstatat, dirfd, + normalized_path.c_str(), + buf, flags); default: - LOG(ERROR, "{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown", __func__); return -EINVAL; } } @@ -232,7 +244,8 @@ hook_read(unsigned int fd, void* buf, size_t count) { if(CTX->file_map()->exist(fd)) { return with_errno(gkfs::syscall::gkfs_read(fd, buf, count)); } - return syscall_no_intercept_wrapper(SYS_read, fd, buf, count); + return static_cast( + syscall_no_intercept_wrapper(SYS_read, fd, buf, count)); } int @@ -258,7 +271,8 @@ hook_readv(unsigned long fd, const struct iovec* iov, unsigned long iovcnt) { if(CTX->file_map()->exist(fd)) { return with_errno(gkfs::syscall::gkfs_readv(fd, iov, iovcnt)); } - return syscall_no_intercept_wrapper(SYS_readv, fd, iov, iovcnt); + return static_cast( + syscall_no_intercept_wrapper(SYS_readv, fd, iov, iovcnt)); } int @@ -274,7 +288,8 @@ hook_preadv(unsigned long fd, const struct iovec* iov, unsigned long iovcnt, if(CTX->file_map()->exist(fd)) { return with_errno(gkfs::syscall::gkfs_preadv(fd, iov, iovcnt, pos_l)); } - return syscall_no_intercept_wrapper(SYS_preadv, fd, iov, iovcnt, pos_l); + return static_cast( + syscall_no_intercept_wrapper(SYS_preadv, fd, iov, iovcnt, pos_l)); } int @@ -284,9 +299,11 @@ hook_write(unsigned int fd, const char* buf, size_t count) { fmt::ptr(buf), count); if(CTX->file_map()->exist(fd)) { - return with_errno(gkfs::syscall::gkfs_write(fd, buf, count)); + return static_cast( + with_errno(gkfs::syscall::gkfs_write(fd, buf, count))); } - return syscall_no_intercept_wrapper(SYS_write, fd, buf, count); + return static_cast( + syscall_no_intercept_wrapper(SYS_write, fd, buf, count)); } int @@ -296,11 +313,13 @@ hook_pwrite(unsigned int fd, const char* buf, size_t count, loff_t pos) { fd, fmt::ptr(buf), count, pos); if(CTX->file_map()->exist(fd)) { - return with_errno(gkfs::syscall::gkfs_pwrite_ws(fd, buf, count, pos)); + return static_cast( + with_errno(gkfs::syscall::gkfs_pwrite_ws(fd, buf, count, pos))); } /* Since kernel 2.6: pread() became pread64(), and pwrite() became * pwrite64(). */ - return syscall_no_intercept_wrapper(SYS_pwrite64, fd, buf, count, pos); + return static_cast( + syscall_no_intercept_wrapper(SYS_pwrite64, fd, buf, count, pos)); } int @@ -310,9 +329,11 @@ hook_writev(unsigned long fd, const struct iovec* iov, unsigned long iovcnt) { fmt::ptr(iov), iovcnt); if(CTX->file_map()->exist(fd)) { - return with_errno(gkfs::syscall::gkfs_writev(fd, iov, iovcnt)); + return static_cast( + with_errno(gkfs::syscall::gkfs_writev(fd, iov, iovcnt))); } - return syscall_no_intercept_wrapper(SYS_writev, fd, iov, iovcnt); + return static_cast( + syscall_no_intercept_wrapper(SYS_writev, fd, iov, iovcnt)); } int @@ -326,9 +347,11 @@ hook_pwritev(unsigned long fd, const struct iovec* iov, unsigned long iovcnt, __func__, fd, fmt::ptr(iov), iovcnt, pos_l, pos_h); if(CTX->file_map()->exist(fd)) { - return with_errno(gkfs::syscall::gkfs_pwritev(fd, iov, iovcnt, pos_l)); + return static_cast(with_errno( + gkfs::syscall::gkfs_pwritev(fd, iov, iovcnt, pos_l))); } - return syscall_no_intercept_wrapper(SYS_pwritev, fd, iov, iovcnt, pos_l); + return static_cast( + syscall_no_intercept_wrapper(SYS_pwritev, fd, iov, iovcnt, pos_l)); } int @@ -342,64 +365,66 @@ hook_unlinkat(int dirfd, const char* cpath, int flags) { return -EINVAL; } - std::string resolved; - auto rstatus = CTX->relativize_fd_path(dirfd, cpath, resolved, false); - switch(rstatus) { - case gkfs::preload::RelativizeStatus::fd_unknown: - return syscall_no_intercept_wrapper(SYS_unlinkat, dirfd, cpath, - flags); + std::string normalized_path{}; + auto status = gkfs::path::normalize(dirfd, cpath, normalized_path, false); - case gkfs::preload::RelativizeStatus::external: - return syscall_no_intercept_wrapper(SYS_unlinkat, dirfd, - resolved.c_str(), flags); + switch(status) { + case gkfs::path::NormalizeStatus::fd_unknown: + return static_cast(syscall_no_intercept_wrapper( + SYS_unlinkat, dirfd, cpath, flags)); - case gkfs::preload::RelativizeStatus::fd_not_a_dir: + case gkfs::path::NormalizeStatus::fd_not_a_dir: return -ENOTDIR; - case gkfs::preload::RelativizeStatus::internal: - if(flags & AT_REMOVEDIR) { - return with_errno(gkfs::syscall::gkfs_rmdir(resolved)); - } else { - return with_errno(gkfs::syscall::gkfs_remove(resolved)); - } - + case gkfs::path::NormalizeStatus::ok: + if(gkfs::path::is_in_gkfs(normalized_path, true)) { + if(flags & AT_REMOVEDIR) { + return with_errno( + gkfs::syscall::gkfs_rmdir(normalized_path)); + } else { + return with_errno( + gkfs::syscall::gkfs_remove(normalized_path)); + } + } else + return static_cast(syscall_no_intercept_wrapper( + SYS_unlinkat, dirfd, normalized_path.c_str(), flags)); default: - LOG(ERROR, "{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown", __func__); return -EINVAL; } } int hook_symlinkat(const char* oldname, int newdfd, const char* newname) { - LOG(DEBUG, "{}() called with oldname: \"{}\", newfd: {}, newname: \"{}\"", __func__, oldname, newdfd, newname); - std::string oldname_resolved; - if(CTX->relativize_path(oldname, oldname_resolved)) { + auto old_normalized_path = gkfs::path::normalize(oldname); + if(gkfs::path::is_in_gkfs(old_normalized_path)) { LOG(WARNING, "{}() operation not supported", __func__); return -ENOTSUP; } - std::string newname_resolved; - auto rstatus = - CTX->relativize_fd_path(newdfd, newname, newname_resolved, false); - switch(rstatus) { - case gkfs::preload::RelativizeStatus::fd_unknown: - return syscall_no_intercept_wrapper(SYS_symlinkat, oldname, newdfd, - newname); + std::string new_normalized_path{}; + auto status = + gkfs::path::normalize(newdfd, newname, new_normalized_path, false); - case gkfs::preload::RelativizeStatus::external: - return syscall_no_intercept_wrapper(SYS_symlinkat, oldname, newdfd, - newname_resolved.c_str()); + switch(status) { + case gkfs::path::NormalizeStatus::fd_unknown: + return static_cast(syscall_no_intercept_wrapper( + SYS_symlinkat, oldname, newdfd, newname)); - case gkfs::preload::RelativizeStatus::fd_not_a_dir: + case gkfs::path::NormalizeStatus::fd_not_a_dir: return -ENOTDIR; - case gkfs::preload::RelativizeStatus::internal: - LOG(WARNING, "{}() operation not supported", __func__); - return -ENOTSUP; - + case gkfs::path::NormalizeStatus::ok: + if(gkfs::path::is_in_gkfs(new_normalized_path)) { + LOG(WARNING, "{}() operation not supported", __func__); + return -ENOTSUP; + } else + return static_cast(syscall_no_intercept_wrapper( + SYS_symlinkat, oldname, newdfd, + new_normalized_path.c_str())); default: LOG(ERROR, "{}() relativize status unknown", __func__); return -EINVAL; @@ -423,15 +448,16 @@ hook_access(const char* path, int mask) { LOG(DEBUG, "{}() called path: \"{}\", mask: {}", __func__, path, mask); - std::string rel_path; - if(CTX->relativize_path(path, rel_path)) { - auto ret = gkfs::syscall::gkfs_access(rel_path, mask); + auto normalized_path = gkfs::path::normalize(path); + if(gkfs::path::is_in_gkfs(normalized_path, true)) { + auto ret = gkfs::syscall::gkfs_access(normalized_path, mask); if(ret < 0) { return -errno; } return ret; } - return syscall_no_intercept_wrapper(SYS_access, rel_path.c_str(), mask); + return static_cast(syscall_no_intercept_wrapper( + SYS_access, normalized_path.c_str(), mask)); } #endif @@ -441,25 +467,26 @@ hook_faccessat(int dirfd, const char* cpath, int mode) { LOG(DEBUG, "{}() called with dirfd: {}, path: \"{}\", mode: {}", __func__, dirfd, cpath, mode); - std::string resolved; - auto rstatus = CTX->relativize_fd_path(dirfd, cpath, resolved); - switch(rstatus) { - case gkfs::preload::RelativizeStatus::fd_unknown: - return syscall_no_intercept_wrapper(SYS_faccessat, dirfd, cpath, - mode); + std::string normalized_path{}; + auto status = gkfs::path::normalize(dirfd, cpath, normalized_path); - case gkfs::preload::RelativizeStatus::external: - return syscall_no_intercept_wrapper(SYS_faccessat, dirfd, - resolved.c_str(), mode); + switch(status) { + case gkfs::path::NormalizeStatus::fd_unknown: + return static_cast(syscall_no_intercept_wrapper( + SYS_faccessat, dirfd, cpath, mode)); - case gkfs::preload::RelativizeStatus::fd_not_a_dir: + case gkfs::path::NormalizeStatus::fd_not_a_dir: return -ENOTDIR; - case gkfs::preload::RelativizeStatus::internal: - return with_errno(gkfs::syscall::gkfs_access(resolved, mode)); - + case gkfs::path::NormalizeStatus::ok: + if(gkfs::path::is_in_gkfs(normalized_path, true)) + return with_errno( + gkfs::syscall::gkfs_access(normalized_path, mode)); + else + return static_cast(syscall_no_intercept_wrapper( + SYS_faccessat, dirfd, normalized_path.c_str(), mode)); default: - LOG(ERROR, "{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown", __func__); return -EINVAL; } } @@ -472,27 +499,27 @@ hook_faccessat2(int dirfd, const char* cpath, int mode, int flags) { "{}() called with dirfd: '{}', path: '{}', mode: '{}', flags: '{}'", __func__, dirfd, cpath, mode, flags); - std::string resolved; - auto rstatus = CTX->relativize_fd_path(dirfd, cpath, resolved); - switch(rstatus) { - case gkfs::preload::RelativizeStatus::fd_unknown: - return syscall_no_intercept_wrapper(SYS_faccessat2, dirfd, cpath, - mode, flags); + std::string normalized_path{}; + auto status = gkfs::path::normalize(dirfd, cpath, normalized_path); - case gkfs::preload::RelativizeStatus::external: - return syscall_no_intercept_wrapper(SYS_faccessat2, dirfd, - resolved.c_str(), mode, flags); + switch(status) { + case gkfs::path::NormalizeStatus::fd_unknown: + return static_cast(syscall_no_intercept_wrapper( + SYS_faccessat2, dirfd, cpath, mode, flags)); - case gkfs::preload::RelativizeStatus::fd_not_a_dir: + case gkfs::path::NormalizeStatus::fd_not_a_dir: return -ENOTDIR; - case gkfs::preload::RelativizeStatus::internal: - // we do not use permissions and therefore do not handle `flags` for - // now - return with_errno(gkfs::syscall::gkfs_access(resolved, mode)); - + case gkfs::path::NormalizeStatus::ok: + if(gkfs::path::is_in_gkfs(normalized_path, true)) + return with_errno( + gkfs::syscall::gkfs_access(normalized_path, mode)); + else + return static_cast(syscall_no_intercept_wrapper( + SYS_faccessat2, dirfd, normalized_path.c_str(), mode, + flags)); default: - LOG(ERROR, "{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown", __func__); return -EINVAL; } } @@ -523,11 +550,13 @@ hook_truncate(const char* path, long length) { LOG(DEBUG, "{}() called with path: {}, offset: {}", __func__, path, length); - std::string rel_path; - if(CTX->relativize_path(path, rel_path)) { - return with_errno(gkfs::syscall::gkfs_truncate(rel_path, length)); - } - return syscall_no_intercept_wrapper(SYS_truncate, rel_path.c_str(), length); + auto normalized_path = gkfs::path::normalize(path); + if(gkfs::path::is_in_gkfs(normalized_path, true)) + return with_errno( + gkfs::syscall::gkfs_truncate(normalized_path, length)); + else + return static_cast(syscall_no_intercept_wrapper( + SYS_truncate, normalized_path.c_str(), length)); } int @@ -539,7 +568,8 @@ hook_ftruncate(unsigned int fd, unsigned long length) { auto path = CTX->file_map()->get(fd)->path(); return with_errno(gkfs::syscall::gkfs_truncate(path, length)); } - return syscall_no_intercept_wrapper(SYS_ftruncate, fd, length); + return static_cast( + syscall_no_intercept_wrapper(SYS_ftruncate, fd, length)); } int @@ -550,7 +580,7 @@ hook_dup(unsigned int fd) { if(CTX->file_map()->exist(fd)) { return with_errno(gkfs::syscall::gkfs_dup(fd)); } - return syscall_no_intercept_wrapper(SYS_dup, fd); + return static_cast(syscall_no_intercept_wrapper(SYS_dup, fd)); } #ifdef SYS_dup2 int @@ -561,7 +591,8 @@ hook_dup2(unsigned int oldfd, unsigned int newfd) { if(CTX->file_map()->exist(oldfd)) { return with_errno(gkfs::syscall::gkfs_dup2(oldfd, newfd)); } - return syscall_no_intercept_wrapper(SYS_dup2, oldfd, newfd); + return static_cast( + syscall_no_intercept_wrapper(SYS_dup2, oldfd, newfd)); } #endif int @@ -576,7 +607,8 @@ hook_dup3(unsigned int oldfd, unsigned int newfd, int flags) { LOG(WARNING, "{}() Not supported", __func__); return -ENOTSUP; } - return syscall_no_intercept_wrapper(SYS_dup3, oldfd, newfd, flags); + return static_cast( + syscall_no_intercept_wrapper(SYS_dup3, oldfd, newfd, flags)); } #ifdef SYS_getdents int @@ -588,7 +620,8 @@ hook_getdents(unsigned int fd, struct linux_dirent* dirp, unsigned int count) { if(CTX->file_map()->exist(fd)) { return with_errno(gkfs::syscall::gkfs_getdents(fd, dirp, count)); } - return syscall_no_intercept_wrapper(SYS_getdents, fd, dirp, count); + return static_cast( + syscall_no_intercept_wrapper(SYS_getdents, fd, dirp, count)); } #endif @@ -602,7 +635,8 @@ hook_getdents64(unsigned int fd, struct linux_dirent64* dirp, if(CTX->file_map()->exist(fd)) { return with_errno(gkfs::syscall::gkfs_getdents64(fd, dirp, count)); } - return syscall_no_intercept_wrapper(SYS_getdents64, fd, dirp, count); + return static_cast( + syscall_no_intercept_wrapper(SYS_getdents64, fd, dirp, count)); } @@ -612,26 +646,26 @@ hook_mkdirat(int dirfd, const char* cpath, mode_t mode) { LOG(DEBUG, "{}() called with dirfd: {}, path: \"{}\", mode: {}", __func__, dirfd, cpath, mode); - std::string resolved; - auto rstatus = CTX->relativize_fd_path(dirfd, cpath, resolved); - switch(rstatus) { - case gkfs::preload::RelativizeStatus::external: - return syscall_no_intercept_wrapper(SYS_mkdirat, dirfd, - resolved.c_str(), mode); + std::string normalized_path{}; + auto status = gkfs::path::normalize(dirfd, cpath, normalized_path); - case gkfs::preload::RelativizeStatus::fd_unknown: - return syscall_no_intercept_wrapper(SYS_mkdirat, dirfd, cpath, - mode); + switch(status) { + case gkfs::path::NormalizeStatus::fd_unknown: + return static_cast(syscall_no_intercept_wrapper( + SYS_mkdirat, dirfd, cpath, mode)); - case gkfs::preload::RelativizeStatus::fd_not_a_dir: + case gkfs::path::NormalizeStatus::fd_not_a_dir: return -ENOTDIR; - case gkfs::preload::RelativizeStatus::internal: - return with_errno( - gkfs::syscall::gkfs_create(resolved, mode | S_IFDIR)); - + case gkfs::path::NormalizeStatus::ok: + if(gkfs::path::is_in_gkfs(normalized_path, true)) + return with_errno(gkfs::syscall::gkfs_create(normalized_path, + mode | S_IFDIR)); + else + return static_cast(syscall_no_intercept_wrapper( + SYS_mkdirat, dirfd, normalized_path.c_str(), mode)); default: - LOG(ERROR, "{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown", __func__); return -EINVAL; } } @@ -642,26 +676,26 @@ hook_fchmodat(int dirfd, const char* cpath, mode_t mode) { LOG(DEBUG, "{}() called dirfd: {}, path: \"{}\", mode: {}", __func__, dirfd, cpath, mode); - std::string resolved; - auto rstatus = CTX->relativize_fd_path(dirfd, cpath, resolved); - switch(rstatus) { - case gkfs::preload::RelativizeStatus::fd_unknown: - return syscall_no_intercept_wrapper(SYS_fchmodat, dirfd, cpath, - mode); + std::string normalized_path{}; + auto status = gkfs::path::normalize(dirfd, cpath, normalized_path); - case gkfs::preload::RelativizeStatus::external: - return syscall_no_intercept_wrapper(SYS_fchmodat, dirfd, - resolved.c_str(), mode); + switch(status) { + case gkfs::path::NormalizeStatus::fd_unknown: + return static_cast(syscall_no_intercept_wrapper( + SYS_fchmodat, dirfd, cpath, mode)); - case gkfs::preload::RelativizeStatus::fd_not_a_dir: + case gkfs::path::NormalizeStatus::fd_not_a_dir: return -ENOTDIR; - case gkfs::preload::RelativizeStatus::internal: - LOG(WARNING, "{}() operation not supported", __func__); - return -ENOTSUP; - + case gkfs::path::NormalizeStatus::ok: + if(gkfs::path::is_in_gkfs(normalized_path)) { + LOG(WARNING, "{}() operation not supported", __func__); + return -ENOTSUP; + } else + return static_cast(syscall_no_intercept_wrapper( + SYS_fchmodat, dirfd, normalized_path.c_str(), mode)); default: - LOG(ERROR, "{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown", __func__); return -EINVAL; } } @@ -675,7 +709,7 @@ hook_fchmod(unsigned int fd, mode_t mode) { LOG(WARNING, "{}() operation not supported", __func__); return -ENOTSUP; } - return syscall_no_intercept_wrapper(SYS_fchmod, fd, mode); + return static_cast(syscall_no_intercept_wrapper(SYS_fchmod, fd, mode)); } int @@ -683,11 +717,11 @@ hook_chdir(const char* path) { LOG(DEBUG, "{}() called with path: \"{}\"", __func__, path); - std::string rel_path; - bool internal = CTX->relativize_path(path, rel_path); + auto normalized_path = gkfs::path::normalize(path); + auto internal = gkfs::path::is_in_gkfs(normalized_path, true); if(internal) { // path falls in our namespace - auto md = gkfs::utils::get_metadata(rel_path); + auto md = gkfs::utils::get_metadata(normalized_path); if(!md) { LOG(ERROR, "{}() path {} errno {}", __func__, path, errno); return -errno; @@ -697,16 +731,16 @@ hook_chdir(const char* path) { LOG(ERROR, "{}() path is not a directory", __func__); return -ENOTDIR; } - // TODO get complete path from relativize_path instead of - // removing mountdir and then adding again here - rel_path.insert(0, CTX->mountdir()); - if(gkfs::path::has_trailing_slash(rel_path)) { + // readd mountdir prefix again for setting current working directory + normalized_path.insert(0, CTX->mountdir()); + if(gkfs::path::has_trailing_slash(normalized_path)) { // open_dir is '/' - rel_path.pop_back(); + normalized_path.pop_back(); } } + // TODO fix with LEAF try { - gkfs::path::set_cwd(rel_path, internal); + gkfs::path::set_cwd(normalized_path, internal); } catch(const std::system_error& se) { return -(se.code().value()); } @@ -763,7 +797,7 @@ hook_getcwd(char* buf, unsigned long size) { } strcpy(buf, CTX->cwd().c_str()); - return (CTX->cwd().size() + 1); + return static_cast((CTX->cwd().size() + 1)); } int @@ -772,26 +806,27 @@ hook_readlinkat(int dirfd, const char* cpath, char* buf, int bufsiz) { LOG(DEBUG, "{}() called with dirfd: {}, path \"{}\", buf: {}, bufsize: {}", __func__, dirfd, cpath, fmt::ptr(buf), bufsiz); - std::string resolved; - auto rstatus = CTX->relativize_fd_path(dirfd, cpath, resolved, false); - switch(rstatus) { - case gkfs::preload::RelativizeStatus::fd_unknown: - return syscall_no_intercept_wrapper(SYS_readlinkat, dirfd, cpath, - buf, bufsiz); + std::string normalized_path{}; + auto status = gkfs::path::normalize(dirfd, cpath, normalized_path, false); - case gkfs::preload::RelativizeStatus::external: - return syscall_no_intercept_wrapper(SYS_readlinkat, dirfd, - resolved.c_str(), buf, bufsiz); + switch(status) { + case gkfs::path::NormalizeStatus::fd_unknown: + return static_cast(syscall_no_intercept_wrapper( + SYS_readlinkat, dirfd, cpath, buf, bufsiz)); - case gkfs::preload::RelativizeStatus::fd_not_a_dir: + case gkfs::path::NormalizeStatus::fd_not_a_dir: return -ENOTDIR; - case gkfs::preload::RelativizeStatus::internal: - LOG(WARNING, "{}() not supported", __func__); - return -ENOTSUP; - + case gkfs::path::NormalizeStatus::ok: + if(gkfs::path::is_in_gkfs(normalized_path)) { + LOG(WARNING, "{}() not supported", __func__); + return -ENOTSUP; + } else + return static_cast(syscall_no_intercept_wrapper( + SYS_readlinkat, dirfd, normalized_path.c_str(), buf, + bufsiz)); default: - LOG(ERROR, "{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown", __func__); return -EINVAL; } } @@ -803,7 +838,8 @@ hook_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) { arg); if(!CTX->file_map()->exist(fd)) { - return syscall_no_intercept_wrapper(SYS_fcntl, fd, cmd, arg); + return static_cast( + syscall_no_intercept_wrapper(SYS_fcntl, fd, cmd, arg)); } int ret; switch(cmd) { @@ -878,64 +914,85 @@ hook_renameat(int olddfd, const char* oldname, int newdfd, const char* newname, "newname \"{}\", flags {}", __func__, olddfd, oldname, newdfd, newname, flags); - const char* oldpath_pass; - std::string oldpath_resolved; - auto oldpath_status = - CTX->relativize_fd_path(olddfd, oldname, oldpath_resolved); - switch(oldpath_status) { - case gkfs::preload::RelativizeStatus::fd_unknown: - oldpath_pass = oldname; - break; + const char* oldpath_pass = nullptr; + std::string oldpath_normalized{}; + auto status = gkfs::path::normalize(olddfd, oldname, oldpath_normalized); + auto oldpath_in_gkfs = false; - case gkfs::preload::RelativizeStatus::external: - oldpath_pass = oldpath_resolved.c_str(); + switch(status) { + case gkfs::path::NormalizeStatus::fd_unknown: + oldpath_pass = oldname; break; - case gkfs::preload::RelativizeStatus::fd_not_a_dir: + case gkfs::path::NormalizeStatus::fd_not_a_dir: return -ENOTDIR; - case gkfs::preload::RelativizeStatus::internal: - break; + case gkfs::path::NormalizeStatus::ok: + if(gkfs::path::is_in_gkfs(oldpath_normalized)) { + oldpath_in_gkfs = true; + break; + } else { + oldpath_pass = oldpath_normalized.c_str(); + break; + } default: LOG(ERROR, "{}() relativize status unknown", __func__); return -EINVAL; } - const char* newpath_pass; - std::string newpath_resolved; - auto newpath_status = - CTX->relativize_fd_path(newdfd, newname, newpath_resolved); - switch(newpath_status) { - case gkfs::preload::RelativizeStatus::fd_unknown: - newpath_pass = newname; - break; + const char* newpath_pass = nullptr; + std::string newpath_normalized{}; + status = gkfs::path::normalize(newdfd, newname, newpath_normalized); - case gkfs::preload::RelativizeStatus::external: - newpath_pass = newpath_resolved.c_str(); + switch(status) { + case gkfs::path::NormalizeStatus::fd_unknown: + newpath_pass = newname; break; - case gkfs::preload::RelativizeStatus::fd_not_a_dir: + case gkfs::path::NormalizeStatus::fd_not_a_dir: return -ENOTDIR; - case gkfs::preload::RelativizeStatus::internal: + case gkfs::path::NormalizeStatus::ok: + if(gkfs::path::is_in_gkfs(newpath_normalized)) { + if(oldpath_in_gkfs) { + // both old and new path are in gkfs #ifdef HAS_RENAME - if(oldpath_status == gkfs::preload::RelativizeStatus::internal) { - return with_errno(gkfs::syscall::gkfs_rename(oldpath_resolved, - newpath_resolved)); - } else { - return -ENOTSUP; - } + return with_errno(gkfs::syscall::gkfs_rename( + oldpath_normalized_path, newpath_normalized_path)); #else - return -ENOTSUP; + LOG(WARNING, + "{}() Rename inside gkfs but disabled. Consult CMake options.", + __func__); + return -ENOTSUP; #endif + } else { + LOG(WARNING, + "{}() Renaming from other file system to gkfs not supported", + __func__); + return -ENOTSUP; + } + } else { + // old pass in gkfs is now allowed + if(oldpath_in_gkfs) { + LOG(WARNING, + "{}() Renaming from gkfs to other file system not supported", + __func__); + return -ENOTSUP; + } else { + // rename operation is completely outside the gkfs namespace + newpath_pass = newpath_normalized.c_str(); + break; + } + } + default: LOG(ERROR, "{}() relativize status unknown", __func__); return -EINVAL; } - return syscall_no_intercept_wrapper(SYS_renameat2, olddfd, oldpath_pass, - newdfd, newpath_pass, flags); + return static_cast(syscall_no_intercept_wrapper( + SYS_renameat2, olddfd, oldpath_pass, newdfd, newpath_pass, flags)); } int @@ -944,11 +1001,12 @@ hook_statfs(const char* path, struct statfs* buf) { LOG(DEBUG, "{}() called with path: \"{}\", buf: {}", __func__, path, fmt::ptr(buf)); - std::string rel_path; - if(CTX->relativize_path(path, rel_path)) { + auto normalized_path = gkfs::path::normalize(path); + if(gkfs::path::is_in_gkfs(normalized_path, true)) return with_errno(gkfs::syscall::gkfs_statfs(buf)); - } - return syscall_no_intercept_wrapper(SYS_statfs, rel_path.c_str(), buf); + else + return static_cast(syscall_no_intercept_wrapper( + SYS_statfs, normalized_path.c_str(), buf)); } int @@ -959,7 +1017,7 @@ hook_fstatfs(unsigned int fd, struct statfs* buf) { if(CTX->file_map()->exist(fd)) { return with_errno(gkfs::syscall::gkfs_statfs(buf)); } - return syscall_no_intercept_wrapper(SYS_fstatfs, fd, buf); + return static_cast(syscall_no_intercept_wrapper(SYS_fstatfs, fd, buf)); } /* The function should broadcast a flush message (pmem_persist i.e.) if the @@ -974,7 +1032,7 @@ hook_fsync(unsigned int fd) { return 0; } - return syscall_no_intercept_wrapper(SYS_fsync, fd); + return static_cast(syscall_no_intercept_wrapper(SYS_fsync, fd)); } int @@ -983,11 +1041,12 @@ hook_getxattr(const char* path, const char* name, void* value, size_t size) { LOG(DEBUG, "{}() called with path '{}' name '{}' value '{}' size '{}'", __func__, path, name, fmt::ptr(value), size); - std::string rel_path; - if(CTX->relativize_path(path, rel_path)) { + auto normalized_path = gkfs::path::normalize(path); + if(gkfs::path::is_in_gkfs(normalized_path, true)) { return -ENOTSUP; } - return syscall_no_intercept_wrapper(SYS_getxattr, path, name, value, size); + return static_cast(syscall_no_intercept_wrapper( + SYS_getxattr, normalized_path.c_str(), name, value, size)); } } // namespace gkfs::hook diff --git a/src/client/path.cpp b/src/client/path.cpp index 5eb2f0a31..221e0bdb6 100644 --- a/src/client/path.cpp +++ b/src/client/path.cpp @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include @@ -38,6 +40,7 @@ #include #include #include +#include extern "C" { #include @@ -46,53 +49,64 @@ extern "C" { using namespace std; -namespace gkfs::path { - -static const string excluded_paths[2] = {"sys/", "proc/"}; - -/** Match components in path - * - * Returns the number of consecutive components at start of `path` - * that match the ones in `components` vector. - * - * `path_components` will be set to the total number of components found in - * `path` - * - * Example: - * ```ÏÏ - * unsigned int tot_comp; - * path_match_components("/matched/head/with/tail", &tot_comp, ["matched", - * "head", "no"]) == 2; tot_comp == 4; - * ``` +namespace { +const string excluded_paths[2] = {"sys/", "proc/"}; +/** + * Normalize a given path with `.` and `..` components. This will not resolve + * symlinks + * @param path + * @return normalized path */ -unsigned int -match_components(const string& path, unsigned int& path_components, - const ::vector& components) { - unsigned int matched = 0; - unsigned int processed_components = 0; +string +normalize(const string& path) { + + string normalized{}; // final normalized path + normalized.reserve(path.size()); string::size_type comp_size = 0; // size of current component string::size_type start = 0; // start index of curr component string::size_type end = 0; // end index of curr component (last processed // Path Separator "separator") + stack slash_idx{}; + slash_idx.push(0); // index of all slashes in resolved path (used for + // rollback due to `..`) while(++end < path.size()) { start = end; - + // Skip sequence of multiple path-separators. + while(start < path.size() && path[start] == gkfs::path::separator) { + start++; + } // Find next component - end = path.find(path::separator, start); + end = path.find_first_of(gkfs::path::separator, start); if(end == string::npos) { end = path.size(); } - comp_size = end - start; - if(matched == processed_components && - path.compare(start, comp_size, components.at(matched)) == 0) { - ++matched; + + // component is empty (this must be the last component) + if(comp_size == 0) { + break; } - ++processed_components; + // component is '.', we skip it + if(comp_size == 1 && path.at(start) == '.') { + continue; + } + // component is '..' we need to rollback normalized path + if(comp_size == 2 && path.at(start) == '.' && + path.at(start + 1) == '.') { + if(!normalized.empty()) { + normalized.erase(slash_idx.top()); + slash_idx.pop(); + } + continue; + } + // add `/` to the normalized path + normalized.push_back(gkfs::path::separator); + slash_idx.push(normalized.size() - 1); + normalized.append(path, start, comp_size); } - path_components = processed_components; - return matched; + LOG(INFO, "path: '{}', normalized: '{}'", path, normalized); + return normalized; } /** Resolve path to its canonical representation @@ -113,7 +127,7 @@ resolve(const string& path, string& resolved, bool resolve_last_link) { LOG(DEBUG, "path: \"{}\", resolved: \"{}\", resolve_last_link: {}", path, resolved, resolve_last_link); - assert(path::is_absolute(path)); + assert(gkfs::path::is_absolute(path)); for(auto& excl_path : excluded_paths) { if(path.compare(1, excl_path.length(), excl_path) == 0) { @@ -132,8 +146,9 @@ resolve(const string& path, string& resolved, bool resolve_last_link) { string::size_type start = 0; // start index of curr component string::size_type end = 0; // end index of curr component (last processed // Path Separator "separator") - string::size_type last_slash_pos = - 0; // index of last slash in resolved path + stack slash_idx{}; + slash_idx.push(0); // index of all slashes in resolved path (used for + // rollback due to `..`) resolved.clear(); resolved.reserve(path.size()); @@ -141,12 +156,12 @@ resolve(const string& path, string& resolved, bool resolve_last_link) { start = end; /* Skip sequence of multiple path-separators. */ - while(start < path.size() && path[start] == path::separator) { + while(start < path.size() && path[start] == gkfs::path::separator) { ++start; } // Find next component - end = path.find(path::separator, start); + end = path.find_first_of(gkfs::path::separator, start); if(end == string::npos) { end = path.size(); } @@ -164,12 +179,8 @@ resolve(const string& path, string& resolved, bool resolve_last_link) { path.at(start + 1) == '.') { // component is '..' we need to rollback resolved path if(!resolved.empty()) { - resolved.erase(last_slash_pos); - /* TODO Optimization - * the previous slash position should be stored. - * The following search could be avoided. - */ - last_slash_pos = resolved.find_last_of(path::separator); + resolved.erase(slash_idx.top()); + slash_idx.pop(); } if(resolved_components > 0) { if(matched_components == resolved_components) { @@ -181,11 +192,15 @@ resolve(const string& path, string& resolved, bool resolve_last_link) { } // add `/` to the reresolved path - resolved.push_back(path::separator); - last_slash_pos = resolved.size() - 1; + resolved.push_back(gkfs::path::separator); + slash_idx.push(resolved.size() - 1); resolved.append(path, start, comp_size); - - if(matched_components < mnt_components.size()) { + /* + * This will be true for all path components outside of GKFS and up to + * the mountdir's parent path The mountdir directory is considered to be + * inside GKFS as it is entirely virtual and does not need to exist + */ + if(matched_components < mnt_components.size() - 1) { // Outside GekkoFS if(matched_components == resolved_components && path.compare(start, comp_size, @@ -216,10 +231,15 @@ resolve(const string& path, string& resolved, bool resolve_last_link) { } // substituute resolved with new link path resolved = link_resolved.get(); - matched_components = match_components( + matched_components = gkfs::path::match_components( resolved, resolved_components, mnt_components); // set matched counter to value coherent with the new path - last_slash_pos = resolved.find_last_of(path::separator); + stack slash_idx_new{}; + for(size_t i = 0; i < resolved.size(); i++) { + if(resolved[i] == gkfs::path::separator) + slash_idx_new.push(i); + } + slash_idx = slash_idx_new; continue; } else if((!S_ISDIR(st.st_mode)) && (end != path.size())) { resolved.append(path, end, string::npos); @@ -239,12 +259,153 @@ resolve(const string& path, string& resolved, bool resolve_last_link) { } if(resolved.empty()) { - resolved.push_back(path::separator); + resolved.push_back(gkfs::path::separator); } LOG(DEBUG, "external: \"{}\"", resolved); return false; } +} // namespace + +namespace gkfs::path { + +/** Match components in path + * + * Returns the number of consecutive components at start of `path` + * that match the ones in `components` vector. + * + * `path_components` will be set to the total number of components found in + * `path` + * + * Example: + * ```ÏÏ + * unsigned int tot_comp; + * path_match_components("/matched/head/with/tail", &tot_comp, ["matched", + * "head", "no"]) == 2; tot_comp == 4; + * ``` + */ +unsigned int +match_components(const string& path, unsigned int& path_components, + const ::vector& components) { + unsigned int matched = 0; + unsigned int processed_components = 0; + string::size_type comp_size = 0; // size of current component + string::size_type start = 0; // start index of curr component + string::size_type end = 0; // end index of curr component (last processed + // Path Separator "separator") + + while(++end < path.size()) { + start = end; + + // Find next component + end = path.find(path::separator, start); + if(end == string::npos) { + end = path.size(); + } + + comp_size = end - start; + if(matched == processed_components && + path.compare(start, comp_size, components.at(matched)) == 0) { + ++matched; + } + ++processed_components; + } + path_components = processed_components; + return matched; +} + +NormalizeStatus +normalize(int dirfd, const char* raw_path, std::string& normalized_path, + bool resolve_last_link) { + // TODO when LEAF is available: return concated path and throw Status + // instead. Relativize path should be called only after the library + // constructor has been executed + assert(CTX->interception_enabled()); + // If we run the constructor we also already setup the mountdir + assert(!CTX->mountdir().empty()); + + // We assume raw path is valid + assert(raw_path != nullptr); + + std::string path{}; + + if(raw_path[0] != gkfs::path::separator) { + // path is relative + if(dirfd == AT_FDCWD) { + // path is relative to cwd + path = gkfs::path::prepend_path(CTX->cwd(), raw_path); + } else { + if(!CTX->file_map()->exist(dirfd)) { + return NormalizeStatus::fd_unknown; + } + // path is relative to fd + auto dir = CTX->file_map()->get_dir(dirfd); + if(dir == nullptr) { + return NormalizeStatus::fd_not_a_dir; + } + path = CTX->mountdir(); + path.append(dir->path()); + path.push_back(gkfs::path::separator); + path.append(raw_path); + } + } else { + path = raw_path; + } + normalized_path = ::normalize(path); + return NormalizeStatus::ok; +} + +std::string +normalize(const char* raw_path, bool resolve_last_link) { + // TODO when LEAF is available: return concated path and throw Status + // instead. Relativize path should be called only after the library + // constructor has been executed + assert(CTX->interception_enabled()); + // If we run the constructor we also already setup the mountdir + assert(!CTX->mountdir().empty()); + + // We assume raw path is valid + assert(raw_path != nullptr); + + std::string path; + + if(raw_path[0] != gkfs::path::separator) { + /* Path is not absolute, we need to prepend CWD; + * First reserve enough space to minimize memory copy + */ + path = gkfs::path::prepend_path(CTX->cwd(), raw_path); + } else { + path = raw_path; + } + return ::normalize(path); +} + +/** + * Checks if a path is within gkfs namespace. normalize_path() should have been + * called before as the mountpoint in the path is checked from the beginning and + * must therefore be absolute. + * + * cut_mountdir_prefix can be passed to remove the mountdir prefix from path: + * /tmp/mountdir/gkfsfile -> /gkfsfile It modifies the given path instead of + * returning a new string to avoid copying, as this function is performance + * critical + * + * @param path (absolute path) + * @param cut_mountdir_prefix (default false, if true gkfs mountpoint path is + * cut if it is within gkfs namespace) + * @return true if within gkfs namespace else false + */ +bool +is_in_gkfs(std::string& path, bool cut_mountdir_prefix) { + if(path.rfind(CTX->mountdir(), 0) != std::string::npos) { + if(cut_mountdir_prefix) + path.erase(1, CTX->mountdir().size()); + return true; + } else { + return false; + } +} + string get_sys_cwd() { char temp[path::max_length]; diff --git a/src/client/preload_context.cpp b/src/client/preload_context.cpp index f8ac782fe..a42be9aae 100644 --- a/src/client/preload_context.cpp +++ b/src/client/preload_context.cpp @@ -183,84 +183,74 @@ PreloadContext::auto_sm(bool auto_sm) { PreloadContext::auto_sm_ = auto_sm; } -RelativizeStatus -PreloadContext::relativize_fd_path(int dirfd, const char* raw_path, - std::string& relative_path, int flags, - bool resolve_last_link) const { - - // Relativize path should be called only after the library constructor has - // been executed - assert(interception_enabled_); - // If we run the constructor we also already setup the mountdir - assert(!mountdir_.empty()); - - // We assume raw path is valid - assert(raw_path != nullptr); - - std::string path; - - if(raw_path != nullptr && raw_path[0] != gkfs::path::separator) { - // path is relative - if(dirfd == AT_FDCWD) { - // path is relative to cwd - path = gkfs::path::prepend_path(cwd_, raw_path); - } else { - if(!ofm_->exist(dirfd)) { - return RelativizeStatus::fd_unknown; - } else { - // check if we have the AT_EMPTY_PATH flag - // for fstatat. - if(flags & AT_EMPTY_PATH) { - relative_path = ofm_->get(dirfd)->path(); - return RelativizeStatus::internal; - } - } - // path is relative to fd - auto dir = ofm_->get_dir(dirfd); - if(dir == nullptr) { - return RelativizeStatus::fd_not_a_dir; - } - path = mountdir_; - path.append(dir->path()); - path.push_back(gkfs::path::separator); - path.append(raw_path); - } - } else { - path = raw_path; - } - - if(gkfs::path::resolve(path, relative_path, resolve_last_link)) { - return RelativizeStatus::internal; - } - return RelativizeStatus::external; -} -bool -PreloadContext::relativize_path(const char* raw_path, - std::string& relative_path, - bool resolve_last_link) const { - // Relativize path should be called only after the library constructor has - // been executed - assert(interception_enabled_); - // If we run the constructor we also already setup the mountdir - assert(!mountdir_.empty()); - - // We assume raw path is valid - assert(raw_path != nullptr); - - std::string path; - - if(raw_path != nullptr && raw_path[0] != gkfs::path::separator) { - /* Path is not absolute, we need to prepend CWD; - * First reserve enough space to minimize memory copy - */ - path = gkfs::path::prepend_path(cwd_, raw_path); - } else { - path = raw_path; - } - - return gkfs::path::resolve(path, relative_path, resolve_last_link); -} +// RelativizeStatus PreloadContext::relativize_fd_path(int dirfd, +// const char* raw_path, +// std::string& +// relative_path, bool +// resolve_last_link) const { +// +// // Relativize path should be called only after the library constructor has +// been executed assert(interception_enabled_); +// // If we run the constructor we also already setup the mountdir +// assert(!mountdir_.empty()); +// +// // We assume raw path is valid +// assert(raw_path != nullptr); +// +// std::string path; +// +// if (raw_path[0] != gkfs::path::separator) { +// // path is relative +// if (dirfd == AT_FDCWD) { +// // path is relative to cwd +// path = gkfs::path::prepend_path(cwd_, raw_path); +// } else { +// if (!ofm_->exist(dirfd)) { +// return RelativizeStatus::fd_unknown; +// } +// // path is relative to fd +// auto dir = ofm_->get_dir(dirfd); +// if (dir == nullptr) { +// return RelativizeStatus::fd_not_a_dir; +// } +// path = mountdir_; +// path.append(dir->path()); +// path.push_back(gkfs::path::separator); +// path.append(raw_path); +// } +// } else { +// path = raw_path; +// } +// +// if (gkfs::path::resolve(path, relative_path, resolve_last_link)) { +// return RelativizeStatus::internal; +// } +// return RelativizeStatus::external; +//} +// +// bool PreloadContext::relativize_path(const char* raw_path, std::string& +// relative_path, bool resolve_last_link) const { +// // Relativize path should be called only after the library constructor has +// been executed assert(interception_enabled_); +// // If we run the constructor we also already setup the mountdir +// assert(!mountdir_.empty()); +// +// // We assume raw path is valid +// assert(raw_path != nullptr); +// +// std::string path; +// +// if (raw_path[0] != gkfs::path::separator) { +// /* Path is not absolute, we need to prepend CWD; +// * First reserve enough space to minimize memory copy +// */ +// path = gkfs::path::prepend_path(cwd_, raw_path); +// } else { +// path = raw_path; +// } +// return gkfs::path::resolve(path, relative_path, resolve_last_link); +//} const std::shared_ptr& PreloadContext::file_map() const { diff --git a/src/daemon/daemon.cpp b/src/daemon/daemon.cpp index cae92f201..2c0974ac7 100644 --- a/src/daemon/daemon.cpp +++ b/src/daemon/daemon.cpp @@ -392,9 +392,6 @@ agios_initialize() { */ void destroy_enviroment() { - GKFS_DATA->spdlogger()->debug("{}() Removing mount directory", __func__); - std::error_code ecode; - fs::remove_all(GKFS_DATA->mountdir(), ecode); GKFS_DATA->spdlogger()->debug("{}() Freeing I/O executions streams", __func__); for(unsigned int i = 0; i < RPC_DATA->io_streams().size(); i++) { @@ -426,8 +423,8 @@ destroy_enviroment() { if(!keep_rootdir) { GKFS_DATA->spdlogger()->info("{}() Removing rootdir and metadir ...", __func__); - fs::remove_all(GKFS_DATA->metadir(), ecode); - fs::remove_all(GKFS_DATA->rootdir(), ecode); + fs::remove_all(GKFS_DATA->metadir()); + fs::remove_all(GKFS_DATA->rootdir()); } GKFS_DATA->close_stats(); } @@ -543,11 +540,22 @@ parse_input(const cli_options& opts, const CLI::App& desc) { GKFS_DATA->hosts_file(hosts_file); assert(desc.count("--mountdir")); - auto mountdir = opts.mountdir; - // Create mountdir. We use this dir to get some information on the - // underlying fs with statfs in gkfs_statfs - fs::create_directories(mountdir); - GKFS_DATA->mountdir(fs::canonical(mountdir).native()); + // Store mountdir and ensure parent dir exists as it is required for path + // resolution on the client + try { + fs::path mountdir = opts.mountdir; + auto mountdir_parent = fs::canonical(mountdir.parent_path()); + GKFS_DATA->mountdir(fmt::format("{}/{}", mountdir_parent.native(), + mountdir.filename().native())); + GKFS_DATA->spdlogger()->info("{}() Mountdir '{}'", __func__, + GKFS_DATA->mountdir()); + } catch(const std::exception& e) { + auto emsg = fmt::format( + "Parent directory for given mountdir does not exist. err '{}' Exiting ...", + e.what()); + cerr << emsg << endl; + exit(EXIT_FAILURE); + } assert(desc.count("--rootdir")); auto rootdir = opts.rootdir; -- GitLab From 52fea5865421e36c7c6e187ad195b9d6550a99df Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Thu, 1 Feb 2024 13:42:26 +0100 Subject: [PATCH 2/3] fix --- src/client/hooks.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/client/hooks.cpp b/src/client/hooks.cpp index 93864a1e0..05ee507ed 100644 --- a/src/client/hooks.cpp +++ b/src/client/hooks.cpp @@ -205,13 +205,9 @@ hook_fstatat(int dirfd, const char* cpath, struct stat* buf, int flags) { LOG(DEBUG, "{}() called with path: \"{}\", fd: {}, buf: {}, flags: {}", __func__, cpath, dirfd, fmt::ptr(buf), flags); - if(flags & AT_EMPTY_PATH) { - LOG(ERROR, "{}() AT_EMPTY_PATH flag not supported", __func__); - return -ENOTSUP; - } - std::string normalized_path{}; auto status = gkfs::path::normalize(dirfd, cpath, normalized_path); + LOG(INFO, "{}() normalized path: \"{}\"", __func__, normalized_path); switch(status) { case gkfs::path::NormalizeStatus::fd_unknown: -- GitLab From 87afff64dde98d3ed40a7675b10225b198c83670 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Tue, 5 Mar 2024 13:42:52 +0100 Subject: [PATCH 3/3] update --- include/client/path.hpp | 8 +++++--- src/client/hooks.cpp | 13 ++++++------- src/client/path.cpp | 29 +++++++++++++++++++---------- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/include/client/path.hpp b/include/client/path.hpp index 528f51fb4..6a96eb8a6 100644 --- a/include/client/path.hpp +++ b/include/client/path.hpp @@ -32,7 +32,7 @@ namespace gkfs::path { -enum class NormalizeStatus { ok, fd_unknown, fd_not_a_dir }; +enum class NormalizeStatus { internal, external, fd_unknown, fd_not_a_dir }; unsigned int match_components(const std::string& path, unsigned int& path_components, @@ -44,13 +44,15 @@ resolve(const std::string& path, std::string& resolved, NormalizeStatus normalize(int dirfd, const char* raw_path, std::string& normalized_path, - bool resolve_last_link = true); + int flags = 0, bool resolve_last_link = true); std::string normalize(const char* raw_path, bool resolve_last_link = true); bool -is_in_gkfs(std::string& path, bool cut_mountdir_prefix = false); +is_in_gkfs(std::string& path); + +std::string make_internal(std::string path); std::string get_sys_cwd(); diff --git a/src/client/hooks.cpp b/src/client/hooks.cpp index 05ee507ed..467ff2fae 100644 --- a/src/client/hooks.cpp +++ b/src/client/hooks.cpp @@ -77,12 +77,11 @@ hook_openat(int dirfd, const char* cpath, int flags, mode_t mode) { case gkfs::path::NormalizeStatus::fd_not_a_dir: return -ENOTDIR; - case gkfs::path::NormalizeStatus::ok: - if(gkfs::path::is_in_gkfs(normalized_path, true)) - return with_errno( - gkfs::syscall::gkfs_open(normalized_path, mode, flags)); - else - return syscall_no_intercept_wrapper(SYS_openat, dirfd, + case gkfs::path::NormalizeStatus::internal: + gkfs::path::make_internal(normalized_path); + return with_errno(gkfs::syscall::gkfs_open(normalized_path, mode, flags)); + case gkfs::path::NormalizeStatus::external: + return syscall_no_intercept_wrapper(SYS_openat, dirfd, normalized_path.c_str(), flags, mode); default: @@ -206,7 +205,7 @@ hook_fstatat(int dirfd, const char* cpath, struct stat* buf, int flags) { __func__, cpath, dirfd, fmt::ptr(buf), flags); std::string normalized_path{}; - auto status = gkfs::path::normalize(dirfd, cpath, normalized_path); + auto status = gkfs::path::normalize(dirfd, cpath, normalized_path, flags); LOG(INFO, "{}() normalized path: \"{}\"", __func__, normalized_path); switch(status) { diff --git a/src/client/path.cpp b/src/client/path.cpp index 221e0bdb6..f0ff6db23 100644 --- a/src/client/path.cpp +++ b/src/client/path.cpp @@ -315,7 +315,7 @@ match_components(const string& path, unsigned int& path_components, } NormalizeStatus -normalize(int dirfd, const char* raw_path, std::string& normalized_path, +normalize(int dirfd, const char* raw_path, std::string& normalized_path, int flags, bool resolve_last_link) { // TODO when LEAF is available: return concated path and throw Status // instead. Relativize path should be called only after the library @@ -337,6 +337,13 @@ normalize(int dirfd, const char* raw_path, std::string& normalized_path, } else { if(!CTX->file_map()->exist(dirfd)) { return NormalizeStatus::fd_unknown; + } else { + // check if we have the AT_EMPTY_PATH flag + // for fstatat. + if(flags & AT_EMPTY_PATH) { + normalized_path = CTX->file_map()->get(dirfd)->path(); + return NormalizeStatus::internal; + } } // path is relative to fd auto dir = CTX->file_map()->get_dir(dirfd); @@ -351,8 +358,12 @@ normalize(int dirfd, const char* raw_path, std::string& normalized_path, } else { path = raw_path; } + normalized_path = ::normalize(path); - return NormalizeStatus::ok; + if(is_in_gkfs(normalized_path)) { + return NormalizeStatus::internal; + } else + return NormalizeStatus::external; } std::string @@ -396,14 +407,12 @@ normalize(const char* raw_path, bool resolve_last_link) { * @return true if within gkfs namespace else false */ bool -is_in_gkfs(std::string& path, bool cut_mountdir_prefix) { - if(path.rfind(CTX->mountdir(), 0) != std::string::npos) { - if(cut_mountdir_prefix) - path.erase(1, CTX->mountdir().size()); - return true; - } else { - return false; - } +is_in_gkfs(std::string& path) { + return path.rfind(CTX->mountdir(), 0) != std::string::npos; +} + +inline void make_internal(string& path) { + path.erase(1, CTX->mountdir().size()); } string -- GitLab