From ded36979178a2e1975db13e684ad11cb03d25466 Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Mon, 12 May 2025 14:59:48 +0200 Subject: [PATCH] Symlink and rename support, compss --- CHANGELOG.md | 1 + include/client/gkfs_functions.hpp | 3 +- include/client/path.hpp | 2 + include/client/rpc/rpc_types.hpp | 122 +++++++++++ include/client/syscalls/args.hpp | 1 + include/client/user_functions.hpp | 16 +- include/common/common_defs.hpp | 4 + include/common/rpc/rpc_types.hpp | 6 + include/daemon/handler/rpc_defs.hpp | 5 + src/client/gkfs_functions.cpp | 288 +++++++++++++++---------- src/client/hooks.cpp | 43 +++- src/client/path.cpp | 19 +- src/client/preload.cpp | 14 +- src/client/rpc/forward_metadata.cpp | 36 +++- src/client/rpc/rpc_types.cpp | 3 + src/daemon/daemon.cpp | 9 +- src/daemon/handler/srv_metadata.cpp | 87 ++++++-- src/proxy/util.cpp | 4 +- test/symlink_test.cpp | 6 +- test/wr_test.cpp | 2 +- tests/integration/shell/test_concat.py | 2 +- 21 files changed, 509 insertions(+), 164 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 37f104261..c733dd493 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - Dup3 is supported if O_CLOEXEC is not used (i.e. hexdump) ([!228](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/228)) - gkfs_do_write uses int instead of ssize_t causing overflow ([!229](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/229)) - proxy remove metadata has inverted return values ([!237](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/237)) + - Rename and symlink support leveraged ([!246](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_requests/246)) ## [0.9.4] - 2025-03 ### New diff --git a/include/client/gkfs_functions.hpp b/include/client/gkfs_functions.hpp index 142f7a0d3..73a89ba60 100644 --- a/include/client/gkfs_functions.hpp +++ b/include/client/gkfs_functions.hpp @@ -67,7 +67,8 @@ gkfs_access(const std::string& path, int mask, bool follow_links = true); // Implementation of stat, // Follow links is true by default int -gkfs_stat(const std::string& path, struct stat* buf, bool follow_links = true); +gkfs_stat(const std::string& path, struct stat* buf, bool follow_links = true, + bool bypass_rename = false); // Implementation of statx, it uses the normal stat and maps the information to // the statx structure Follow links is true by default diff --git a/include/client/path.hpp b/include/client/path.hpp index 91cf13a2a..87acc327a 100644 --- a/include/client/path.hpp +++ b/include/client/path.hpp @@ -54,10 +54,12 @@ resolve(const std::string& path, bool resolve_last_link = true); std::pair resolve_new(const std::string& path, bool resolve_last_link = true); +#ifdef GKFS_USE_LEGACY_PATH_RESOLVE [[deprecated( "Use GKFS_USE_LEGACY_PATH_RESOLVE to use old implementation")]] bool resolve(const std::string& path, std::string& resolved, bool resolve_last_link = true); +#endif std::string get_sys_cwd(); diff --git a/include/client/rpc/rpc_types.hpp b/include/client/rpc/rpc_types.hpp index c9a2a4ed4..d093f4455 100644 --- a/include/client/rpc/rpc_types.hpp +++ b/include/client/rpc/rpc_types.hpp @@ -1343,6 +1343,128 @@ struct mk_symlink { #endif // HAS_SYMLINKS + +#ifdef HAS_RENAME + +//============================================================================== +// definitions for rename +struct rename { + + // forward declarations of public input/output types for this RPC + class input; + + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = rename; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_rename_in_t; + using mercury_output_type = rpc_err_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 40; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = 0; + + // RPC name + constexpr static const auto name = gkfs::rpc::tag::rename; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_rename_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_err_out_t); + + class input { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, const std::string& target_path) + : m_path(path), m_target_path(target_path) {} + + input(input&& rhs) = default; + + input(const input& other) = default; + + input& + operator=(input&& rhs) = default; + + input& + operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + std::string + target_path() const { + return m_target_path; + } + + explicit input(const rpc_rename_in_t& other) + : m_path(other.path), m_target_path(other.target_path) {} + + explicit + operator rpc_rename_in_t() { + return {m_path.c_str(), m_target_path.c_str()}; + } + + private: + std::string m_path; + std::string m_target_path; + }; + + class output { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : m_err() {} + + output(int32_t err) : m_err(err) {} + + output(output&& rhs) = default; + + output(const output& other) = default; + + output& + operator=(output&& rhs) = default; + + output& + operator=(const output& other) = default; + + explicit output(const rpc_err_out_t& out) { + m_err = out.err; + } + + int32_t + err() const { + return m_err; + } + + private: + int32_t m_err; + }; +}; + +#endif // HAS_RENAME + //============================================================================== // definitions for remove data struct remove_data { diff --git a/include/client/syscalls/args.hpp b/include/client/syscalls/args.hpp index d53a74d0d..ea391ba97 100644 --- a/include/client/syscalls/args.hpp +++ b/include/client/syscalls/args.hpp @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include diff --git a/include/client/user_functions.hpp b/include/client/user_functions.hpp index 7825a64c6..d861c22e2 100644 --- a/include/client/user_functions.hpp +++ b/include/client/user_functions.hpp @@ -84,7 +84,8 @@ ssize_t gkfs_pread(int fd, void* buf, size_t count, off64_t offset); int -gkfs_stat(const std::string& path, struct stat* buf, bool follow_links = true); +gkfs_stat(const std::string& path, struct stat* buf, bool follow_links = true, + bool bypass_rename = false); int gkfs_statx(int dirfs, const std::string& path, int flags, unsigned int mask, @@ -118,6 +119,19 @@ gkfs_getdents64(unsigned int fd, struct linux_dirent64* dirp, int gkfs_truncate(const std::string& path, off_t offset); +#ifdef HAS_SYMLINKS +int +gkfs_mk_symlink(const std::string& path, const std::string& target); +int +gkfs_readlink(const std::string& path, char* buf, int bufsize); +#endif +#ifdef HAS_RENAME +int +gkfs_rename(const std::string& old_path, const std::string& new_path); +#endif + +int +gkfs_fsync(unsigned int fd); } // namespace syscall namespace malleable { diff --git a/include/common/common_defs.hpp b/include/common/common_defs.hpp index 0f7e1ee75..70e50ca3a 100644 --- a/include/common/common_defs.hpp +++ b/include/common/common_defs.hpp @@ -71,6 +71,10 @@ constexpr auto get_dirents_extended = "rpc_srv_get_dirents_extended"; #ifdef HAS_SYMLINKS constexpr auto mk_symlink = "rpc_srv_mk_symlink"; #endif +#ifdef HAS_RENAME +constexpr auto rename = "rpc_srv_rename"; +#endif + constexpr auto write = "rpc_srv_write_data"; constexpr auto read = "rpc_srv_read_data"; constexpr auto truncate = "rpc_srv_trunc_data"; diff --git a/include/common/rpc/rpc_types.hpp b/include/common/rpc/rpc_types.hpp index d19269533..dd1f6f091 100644 --- a/include/common/rpc/rpc_types.hpp +++ b/include/common/rpc/rpc_types.hpp @@ -94,6 +94,12 @@ MERCURY_GEN_PROC(rpc_mk_symlink_in_t, ((hg_const_string_t) (path))(( hg_const_string_t) (target_path))) #endif +#ifdef HAS_RENAME +MERCURY_GEN_PROC(rpc_rename_in_t, ((hg_const_string_t) (path))( + (hg_const_string_t) (target_path))) + +#endif + // data MERCURY_GEN_PROC( diff --git a/include/daemon/handler/rpc_defs.hpp b/include/daemon/handler/rpc_defs.hpp index 8b99ef120..44d35fcf9 100644 --- a/include/daemon/handler/rpc_defs.hpp +++ b/include/daemon/handler/rpc_defs.hpp @@ -73,6 +73,11 @@ DECLARE_MARGO_RPC_HANDLER(rpc_srv_mk_symlink) #endif +#ifdef HAS_RENAME +DECLARE_MARGO_RPC_HANDLER(rpc_srv_rename) +#endif + + // data DECLARE_MARGO_RPC_HANDLER(rpc_srv_remove_data) diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 1b338fc0a..06ecedb27 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -300,7 +300,8 @@ gkfs_open(const std::string& path, mode_t mode, int flags) { // get renamed path from target and retrieve metadata from it auto md_ = gkfs::utils::get_metadata(md.target_path()); new_path = md.target_path(); - while(!md_.value().target_path().empty()) { + while(!md_.value().target_path().empty() and + md_.value().blocks() != -1) { new_path = md_.value().target_path(); md_ = gkfs::utils::get_metadata(md_.value().target_path(), false); @@ -322,7 +323,7 @@ gkfs_open(const std::string& path, mode_t mode, int flags) { return -1; } } - + // RENAMED OR SYMLINK NOT PROTECTED return CTX->file_map()->add( std::make_shared(new_path, flags)); } @@ -342,7 +343,6 @@ gkfs_open(const std::string& path, mode_t mode, int flags) { return -1; } } - auto fd = CTX->file_map()->add( std::make_shared(path, flags)); @@ -372,6 +372,9 @@ gkfs_create(const std::string& path, mode_t mode) { case 0: mode |= S_IFREG; break; +#ifdef HAS_SYMLINKS + case S_IFLNK: +#endif case S_IFREG: // intentionally fall-through case S_IFDIR: break; @@ -426,7 +429,7 @@ gkfs_create(const std::string& path, mode_t mode) { */ int gkfs_remove(const std::string& path) { -#ifdef HAS_SYMLINKS + #ifdef HAS_RENAME auto md = gkfs::utils::get_metadata(path); if(!md) { @@ -438,30 +441,35 @@ gkfs_remove(const std::string& path) { errno = EISDIR; return -1; } + if(md.value().blocks() == -1) { errno = ENOENT; return -1; } else { - if(!md.value().target_path().empty()) { - auto md_ = gkfs::utils::get_metadata(md.value().target_path()); - std::string new_path = md.value().target_path(); - while(!md.value().target_path().empty()) { - new_path = md.value().target_path(); - md = gkfs::utils::get_metadata(md.value().target_path(), false); - if(!md) { + if(!md->is_link()) { + if(!md.value().target_path().empty()) { + auto md_ = gkfs::utils::get_metadata(md.value().target_path()); + std::string new_path = md.value().target_path(); + while(!md.value().target_path().empty() and + md.value().blocks() != -1) { + new_path = md.value().target_path(); + md = gkfs::utils::get_metadata(md.value().target_path(), + false); + if(!md) { + return -1; + } + } + auto err = gkfs::rpc::forward_remove(new_path, false, + CTX->get_replicas()); + if(err) { + errno = err; return -1; } } - auto err = gkfs::rpc::forward_remove(new_path, false, - CTX->get_replicas()); - if(err) { - errno = err; - return -1; - } } } #endif // HAS_RENAME -#endif // HAS_SYMLINKS + int err = 0; if(gkfs::config::proxy::fwd_remove && CTX->use_proxy()) { err = gkfs::rpc::forward_remove_proxy(path, false); @@ -490,7 +498,7 @@ gkfs_access(const std::string& path, const int mask, bool follow_links) { LOG(DEBUG, "File does not exist '{}'", path); return -1; } -#ifdef HAS_SYMLINKS + #ifdef HAS_RENAME LOG(DEBUG, "Checking for renamed file '{}'", path); if(md.value().blocks() == -1) { @@ -499,7 +507,8 @@ gkfs_access(const std::string& path, const int mask, bool follow_links) { return -1; } else { - while(!md.value().target_path().empty()) { + + while(!md.value().target_path().empty() and md.value().blocks() != -1) { LOG(DEBUG, "File exist but it is renamed '{} -> {}'", path, md.value().target_path()); md = gkfs::utils::get_metadata(md.value().target_path(), false); @@ -511,11 +520,10 @@ gkfs_access(const std::string& path, const int mask, bool follow_links) { } } #endif // HAS_RENAME -#endif // HAS_SYMLINKS return 0; } -#ifdef HAS_SYMLINKS + #ifdef HAS_RENAME /** * gkfs wrapper for rename() system calls @@ -531,36 +539,27 @@ gkfs_access(const std::string& path, const int mask, bool follow_links) { int gkfs_rename(const string& old_path, const string& new_path) { auto md_old = gkfs::utils::get_metadata(old_path, false); - + std::string original_path = old_path; // if the file is not found, or it is a renamed one cancel. if(!md_old || md_old.value().blocks() == -1) { return -1; } + + auto md_new = gkfs::utils::get_metadata(new_path, false); if(md_new) { // the new file exists... check for circular... if(md_new.value().blocks() == -1 && md_old.value().target_path() == new_path) { - // the new file is a renamed file, so we need to get the metadata of - // the original file. + // the new file is a renamed file, so we need to get the + // metadata of the original file. LOG(DEBUG, "Destroying Circular Rename '{}' --> '{}'", old_path, new_path); - gkfs::metadata::MetadentryUpdateFlags flags{}; - flags.atime = false; - flags.mtime = false; - flags.ctime = false; - flags.blocks = true; - flags.mode = false; - flags.size = false; - flags.uid = false; - flags.gid = false; - flags.link_count = false; + md_old.value().blocks(0); md_old.value().target_path(""); - - auto err = gkfs::rpc::forward_update_metadentry( - new_path, md_old.value(), flags, 0); - + // We update the target_path + auto err = gkfs::rpc::forward_rename(new_path, "", md_old.value()); if(err) { errno = err; return -1; @@ -578,18 +577,47 @@ gkfs_rename(const string& old_path, const string& new_path) { return 0; } return -1; - } + } else { - auto err = gkfs::rpc::forward_rename(old_path, new_path, md_old.value()); - if(err) { - errno = err; - return -1; - } + if(!md_old.value().target_path().empty()) { + + + // the file is a renamed one, we need to get the metadata of the + // original file. (There will be only one level) + + original_path = md_old.value().target_path(); + if(!S_ISLNK(md_old->mode())) { + md_old = gkfs::utils::get_metadata(original_path, false); + + if(!md_old) { + return -1; + } + } + + auto is_dir = false; + if(S_ISDIR(md_old->mode())) + is_dir = true; + // Remove intermediate file + gkfs::rpc::forward_remove(old_path, is_dir, CTX->get_replicas()); + } + int err = 0; + if(!S_ISLNK(md_old->mode())) { + err = gkfs::rpc::forward_rename(original_path, new_path, + md_old.value()); + } else { + // Was a link so do a forward symlink to regenerate it + err = gkfs_mk_symlink(new_path, original_path); + } + if(err) { + errno = err; + return -1; + } + } return 0; } #endif -#endif + /** * gkfs wrapper for stat() system calls @@ -600,27 +628,42 @@ gkfs_rename(const string& old_path, const string& new_path) { * @return 0 on success, -1 on failure */ int -gkfs_stat(const string& path, struct stat* buf, bool follow_links) { + +gkfs_stat(const string& path, struct stat* buf, bool follow_links, + bool bypass_rename) { auto md = gkfs::utils::get_metadata(path, follow_links); if(!md) { return -1; } -#ifdef HAS_SYMLINKS + + std::string new_path = path; #ifdef HAS_RENAME - if(md.value().blocks() == -1) { - errno = ENOENT; - return -1; - } else { - while(!md.value().target_path().empty()) { - md = gkfs::utils::get_metadata(md.value().target_path(), false); - if(!md) { + if(md->is_link() == false) { + if(md.value().blocks() == -1) { + // This may not be correct in the case of fstat, + // then we will check bypass_rename + if(!bypass_rename) { + errno = ENOENT; return -1; } + } else { + while(!md.value().target_path().empty() and + md.value().blocks() != -1) { + new_path = md.value().target_path(); + md = gkfs::utils::get_metadata(md.value().target_path(), false); + + if(!md) { + return -1; + } + } + if(md.value().blocks() == -1) + md.value().blocks(md->size() / 4096); } } #endif -#endif - gkfs::utils::metadata_to_stat(path, *md, *buf); + // Stat should use new_path in order that the inode of a renamed file is + // equal to the original + gkfs::utils::metadata_to_stat(new_path, *md, *buf); return 0; } @@ -645,21 +688,27 @@ gkfs_statx(int dirfs, const std::string& path, int flags, unsigned int mask, if(!md) { return -1; } -#ifdef HAS_SYMLINKS + #ifdef HAS_RENAME - if(md.value().blocks() == -1) { - errno = ENOENT; - return -1; - } else { - while(!md.value().target_path().empty()) { - md = gkfs::utils::get_metadata(md.value().target_path(), false); - if(!md) { - return -1; + if(md->is_link() == false) { + if(md.value().blocks() == -1) { + errno = ENOENT; + return -1; + } else { + while(!md.value().target_path().empty() and + md.value().blocks() != -1) { + md = gkfs::utils::get_metadata(md.value().target_path(), false); + + if(!md) { + return -1; + } } + if(md.value().blocks() == -1) + md.value().blocks(md->size() / 4096); } } #endif -#endif + struct stat tmp{}; gkfs::utils::metadata_to_stat(path, *md, tmp); @@ -778,8 +827,8 @@ gkfs_lseek(unsigned int fd, off_t offset, unsigned int whence) { } /** - * gkfs wrapper for lseek() system calls with available shared ptr to gkfs - * FileMap errno may be set + * gkfs wrapper for lseek() system calls with available shared ptr to + * gkfs FileMap errno may be set * @param gkfs_fd * @param offset * @param whence @@ -900,12 +949,12 @@ gkfs_truncate(const std::string& path, off_t old_size, off_t new_size) { int gkfs_truncate(const std::string& path, off_t length) { /* TODO CONCURRENCY: - * At the moment we first ask the length to the metadata-server in order to - * know which data-server have data to be deleted. + * At the moment we first ask the length to the metadata-server in + * order to know which data-server have data to be deleted. * - * From the moment we issue the gkfs_stat and the moment we issue the - * gkfs_trunc_data, some more data could have been added to the file and the - * length increased. + * From the moment we issue the gkfs_stat and the moment we issue + * the gkfs_trunc_data, some more data could have been added to the + * file and the length increased. */ if(length < 0) { LOG(DEBUG, "Length is negative: {}", length); @@ -919,14 +968,14 @@ gkfs_truncate(const std::string& path, off_t length) { } // If rename is enabled we need to check if the file is renamed -#ifdef HAS_SYMLINKS + #ifdef HAS_RENAME if(md.value().blocks() == -1) { errno = ENOENT; return -1; } else if(!md.value().target_path().empty()) { std::string new_path; - while(!md.value().target_path().empty()) { + while(!md.value().target_path().empty() and md.value().blocks() != -1) { new_path = md.value().target_path(); md = gkfs::utils::get_metadata(md.value().target_path()); } @@ -940,7 +989,7 @@ gkfs_truncate(const std::string& path, off_t length) { } return gkfs_truncate(new_path, size, length); } -#endif + #endif auto size = md->size(); @@ -952,7 +1001,7 @@ gkfs_truncate(const std::string& path, off_t length) { errno = EINVAL; return -1; } - gkfs_lseek(output_fd, 0, SEEK_END); + gkfs_lseek(output_fd, (off64_t) 0, SEEK_END); ssize_t n = static_cast(length) - size; // Zeroes the buffer. All make_* are value initialized auto buf = std::make_unique(n); @@ -1000,8 +1049,8 @@ gkfs_dup2(const int oldfd, const int newfd) { * @param buf * @param count * @param offset - * @param update_pos pos should only be updated for some write operations (see - * man 2 pwrite) + * @param update_pos pos should only be updated for some write + * operations (see man 2 pwrite) * @return written size or -1 on error */ ssize_t @@ -1045,9 +1094,9 @@ gkfs_do_write(gkfs::filemap::OpenFile& file, const char* buf, size_t count, } if(is_append) { // When append is set the EOF is set to the offset - // forward_update_metadentry_size returns. This is because it is an - // atomic operation on the server and reserves the space for this - // append + // forward_update_metadentry_size returns. This is because + // it is an atomic operation on the server and reserves the + // space for this append if(ret_offset.second == -1) { LOG(ERROR, "update_metadentry_size() received -1 as starting offset. " @@ -1106,8 +1155,8 @@ gkfs_do_write(gkfs::filemap::OpenFile& file, const char* buf, size_t count, * @param buf * @param count * @param offset - * @param update_pos pos should only be updated for some write operations (see - * man 2 pwrite) + * @param update_pos pos should only be updated for some write + * operations (see man 2 pwrite) * @return written size or -1 on error */ ssize_t @@ -1244,8 +1293,8 @@ gkfs_do_read(const gkfs::filemap::OpenFile& file, char* buf, size_t count, return -1; } - // Zeroing buffer before read is only relevant for sparse files. Otherwise - // sparse regions contain invalid data. + // Zeroing buffer before read is only relevant for sparse files. + // Otherwise sparse regions contain invalid data. if constexpr(gkfs::config::io::zero_buffer_before_read) { memset(buf, 0, sizeof(char) * count); } @@ -1427,8 +1476,8 @@ gkfs_opendir(const std::string& path) { return -1; } pair> ret{}; - // Use cache: Get all entries from all servers for the basic metadata - // this is used in get_metadata() later to avoid stat RPCs + // Use cache: Get all entries from all servers for the basic + // metadata this is used in get_metadata() later to avoid stat RPCs if(CTX->use_dentry_cache()) { ret.second = make_shared(path); std::vectoris_internal_fd(fd)) { - // the client application (for some reason) is trying to close an - // internal fd: ignore it + // the client application (for some reason) is trying to close + // an internal fd: ignore it LOG(ERROR, "{}() closing an internal fd '{}'", __func__, fd); errno = EACCES; return -1; @@ -1758,7 +1811,6 @@ gkfs_close(unsigned int fd) { } #ifdef HAS_SYMLINKS -#ifdef GKFS_ENABLE_UNUSED_FUNCTIONS /** * gkfs wrapper for make symlink() system calls * errno may be set @@ -1791,13 +1843,19 @@ gkfs_mk_symlink(const std::string& path, const std::string& target_path) { return -1; } + // Path should exists auto link_md = gkfs::utils::get_metadata(path, false); if(link_md) { LOG(DEBUG, "Link exists: '{}'", path); errno = EEXIST; return -1; } - + LOG(DEBUG, "Create file: {}", path); + // create target_path file (we create it regular) + auto create = gkfs_create(path, 0); + if(create) { + return -1; + } auto err = gkfs::rpc::forward_mk_symlink(path, target_path); if(err) { errno = err; @@ -1842,7 +1900,6 @@ gkfs_readlink(const std::string& path, char* buf, int bufsize) { return path_size; } #endif -#endif std::vector @@ -1876,8 +1933,9 @@ gkfs_get_file_list(const std::string& path) { } // namespace gkfs::syscall -/* This function defines an extension of the dirents prepared to do a find-like - * function The function only sends the request to the specified server +/* This function defines an extension of the dirents prepared to do a + * find-like function The function only sends the request to the specified + * server */ extern "C" int gkfs_getsingleserverdir(const char* path, struct dirent_extended* dirp, @@ -1906,8 +1964,8 @@ gkfs_getsingleserverdir(const char* path, struct dirent_extended* dirp, auto de = open_dir[pos]; /* * Calculate the total dentry size within the 'dirent_extended` - * depending on the file name size. The size is then aligned to the size - * of `long` boundary. + * depending on the file name size. The size is then aligned to the + * size of `long` boundary. */ auto total_size = ALIGN(offsetof(struct dirent_extended, d_name) + (get<0>(de)).size() + 1, diff --git a/src/client/hooks.cpp b/src/client/hooks.cpp index bd3aaa3c2..7f5befa06 100644 --- a/src/client/hooks.cpp +++ b/src/client/hooks.cpp @@ -134,7 +134,7 @@ hook_stat(const char* path, struct stat* buf) { int hook_statx(int dirfd, const char* path, int flags, unsigned int mask, struct ::statx* buf) { - + bool follow = (flags & AT_SYMLINK_NOFOLLOW) == 0; LOG(DEBUG, "{}() called with dirfd: '{}', path: \"{}\", flags: '{}', mask: '{}', buf: '{}'", __func__, dirfd, path, flags, mask, fmt::ptr(buf)); @@ -154,9 +154,8 @@ hook_statx(int dirfd, const char* path, int flags, unsigned int mask, return -ENOTDIR; case gkfs::preload::RelativizeStatus::internal: - return with_errno(gkfs::syscall::gkfs_statx(dirfd, resolved.c_str(), - flags, mask, buf)); - + return with_errno(gkfs::syscall::gkfs_statx( + dirfd, resolved.c_str(), flags, mask, buf, follow)); default: LOG(ERROR, "{}() relativize status unknown: {}", __func__); return -EINVAL; @@ -174,7 +173,7 @@ hook_lstat(const char* path, struct stat* buf) { std::string rel_path; if(CTX->relativize_path(path, rel_path)) { - return with_errno(gkfs::syscall::gkfs_stat(rel_path, buf)); + return with_errno(gkfs::syscall::gkfs_stat(rel_path, buf, false)); } return gsl::narrow_cast( syscall_no_intercept_wrapper(SYS_lstat, rel_path.c_str(), buf)); @@ -193,7 +192,7 @@ hook_fstat(unsigned int fd, struct stat* buf) { // We can change file_map and recall auto md = gkfs::utils::get_metadata(path, false); if(md.has_value() && md.value().blocks() == -1) { - path = md.value().rename_path(); + path = md.value().target_path(); } #endif return with_errno(gkfs::syscall::gkfs_stat(path, buf)); @@ -207,8 +206,9 @@ hook_fstatat(int dirfd, const char* cpath, struct stat* buf, int flags) { LOG(DEBUG, "{}() called with path: \"{}\", fd: {}, buf: {}, flags: {}", __func__, cpath, dirfd, fmt::ptr(buf), flags); - + bool follow = (flags & AT_SYMLINK_NOFOLLOW) == 0; std::string resolved; + auto rstatus = CTX->relativize_fd_path(dirfd, cpath, resolved, flags); switch(rstatus) { case gkfs::preload::RelativizeStatus::fd_unknown: @@ -223,7 +223,7 @@ hook_fstatat(int dirfd, const char* cpath, struct stat* buf, int flags) { return -ENOTDIR; case gkfs::preload::RelativizeStatus::internal: - return with_errno(gkfs::syscall::gkfs_stat(resolved, buf)); + return with_errno(gkfs::syscall::gkfs_stat(resolved, buf, follow)); default: LOG(ERROR, "{}() relativize status unknown: {}", __func__); @@ -359,7 +359,7 @@ hook_unlinkat(int dirfd, const char* cpath, int flags) { case gkfs::preload::RelativizeStatus::external: return gsl::narrow_cast(syscall_no_intercept_wrapper( - SYS_unlinkat, dirfd, resolved.c_str(), flags)); + SYS_unlinkat, dirfd, cpath, flags)); case gkfs::preload::RelativizeStatus::fd_not_a_dir: return -ENOTDIR; @@ -379,14 +379,20 @@ hook_unlinkat(int dirfd, const char* cpath, int flags) { int hook_symlinkat(const char* oldname, int newdfd, const char* newname) { - LOG(DEBUG, "{}() called with oldname: \"{}\", newfd: {}, newname: \"{}\"", __func__, oldname, newdfd, newname); +#ifdef HAS_SYMLINKS + bool internal1 = false; +#endif std::string oldname_resolved; if(CTX->relativize_path(oldname, oldname_resolved)) { +#ifdef HAS_SYMLINKS + internal1 = true; +#else LOG(WARNING, "{}() operation not supported", __func__); return -ENOTSUP; +#endif } std::string newname_resolved; @@ -405,8 +411,17 @@ hook_symlinkat(const char* oldname, int newdfd, const char* newname) { return -ENOTDIR; case gkfs::preload::RelativizeStatus::internal: +#ifdef HAS_SYMLINKS + if(internal1) { // Parameters are inverted + return with_errno(gkfs::syscall::gkfs_mk_symlink( + newname_resolved, oldname_resolved)); + } + LOG(WARNING, "{}() operation not supported", __func__); + return -ENOTSUP; +#else LOG(WARNING, "{}() operation not supported", __func__); return -ENOTSUP; +#endif default: LOG(ERROR, "{}() relativize status unknown", __func__); @@ -704,7 +719,8 @@ hook_chdir(const char* path) { // path falls in our namespace auto md = gkfs::utils::get_metadata(rel_path); if(!md) { - LOG(ERROR, "{}() path {} errno {}", __func__, path, errno); + LOG(ERROR, "{}() path {} / {} errno {}", __func__, path, rel_path, + errno); return -errno; } @@ -802,6 +818,11 @@ hook_readlinkat(int dirfd, const char* cpath, char* buf, int bufsiz) { return -ENOTDIR; case gkfs::preload::RelativizeStatus::internal: +#ifdef HAS_SYMLINKS + return with_errno( + gkfs::syscall::gkfs_readlink(resolved, buf, bufsiz)); + +#endif return -EINVAL; default: diff --git a/src/client/path.cpp b/src/client/path.cpp index bd4ca2127..24b2319a3 100644 --- a/src/client/path.cpp +++ b/src/client/path.cpp @@ -58,6 +58,7 @@ extern "C" { #include +#include } using namespace std; @@ -81,6 +82,7 @@ static const string excluded_paths[2] = {"sys/", "proc/"}; * "head", "no"]) == 2; tot_comp == 4; * ``` */ +#ifdef GKFS_USE_LEGACY_PATH_RESOLVE unsigned int match_components(const string& path, unsigned int& path_components, const ::vector& components) { @@ -110,17 +112,27 @@ match_components(const string& path, unsigned int& path_components, path_components = processed_components; return matched; } +#endif + +static char* (*real_realpath)(const char* path, char* resolved_path) = nullptr; string follow_symlinks(const string& path) { struct stat st{}; - if(lstat(path.c_str(), &st) < 0) { + auto res = syscall_no_intercept(SYS_lstat, path.c_str(), &st); + if(res < 0) { LOG(DEBUG, "path \"{}\" does not exist", path); return path; } if(S_ISLNK(st.st_mode)) { auto link_resolved = ::unique_ptr(new char[PATH_MAX]); - if(realpath(path.c_str(), link_resolved.get()) == nullptr) { + if(real_realpath == nullptr) { + real_realpath = reinterpret_cast( + dlsym(((void*) -1l), "realpath")); + } + + if(real_realpath(path.c_str(), link_resolved.get()) == nullptr) { LOG(ERROR, "Failed to get realpath for link \"{}\". " @@ -224,6 +236,8 @@ resolve_new(const string& path, bool resolve_last_link) { * returns true if the resolved path fall inside GekkoFS namespace, * and false otherwise. */ + +#ifdef GKFS_USE_LEGACY_PATH_RESOLVE bool resolve(const string& path, string& resolved, bool resolve_last_link) { @@ -361,6 +375,7 @@ resolve(const string& path, string& resolved, bool resolve_last_link) { LOG(DEBUG, "external: \"{}\"", resolved); return false; } +#endif string get_sys_cwd() { diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 3c55090c0..f9605768b 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -394,9 +394,6 @@ init_preload() { if(gkfs::env::var_is_set(gkfs::env::PROTECT_FD)) { CTX->protect_fds(true); LOG(INFO, "Protecting user fds"); - } else { - CTX->protect_fds(false); - LOG(INFO, "Not protecting user fds"); } if(CTX->protect_fds()) { @@ -476,12 +473,21 @@ destroy_preload() { extern "C" int gkfs_init() { CTX->init_logging(); - // from here ownwards it is safe to print messages LOG(DEBUG, "Logging subsystem initialized"); + if(gkfs::env::var_is_set(gkfs::env::PROTECT_FD)) { + CTX->protect_fds(true); + LOG(INFO, "Protecting user fds"); + } + if(CTX->protect_fds()) { + CTX->protect_user_fds(); + } gkfs::preload::init_environment(); + if(CTX->protect_fds()) + CTX->unprotect_user_fds(); + return 0; } diff --git a/src/client/rpc/forward_metadata.cpp b/src/client/rpc/forward_metadata.cpp index ef5c659c1..1c2688e79 100644 --- a/src/client/rpc/forward_metadata.cpp +++ b/src/client/rpc/forward_metadata.cpp @@ -406,9 +406,34 @@ int forward_rename(const string& oldpath, const string& newpath, const gkfs::metadata::Metadata& md) { + auto endp = CTX->hosts().at( CTX->distributor()->locate_file_metadata(oldpath, 0)); + if(newpath == "") { // Just cleanup rename status + try { + LOG(DEBUG, "Sending RPC ..."); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that + // we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = ld_network_service + ->post(endp, oldpath, newpath) + .get() + .at(0); + + LOG(DEBUG, "Got response success: {}", out.err()); + + // return out.err() ? out.err() : 0; + return 0; + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); + return EBUSY; + } + } + + try { LOG(DEBUG, "Sending RPC ..."); // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we @@ -484,11 +509,10 @@ forward_rename(const string& oldpath, const string& newpath, // returning one result and a broadcast(endpoint_set) returning a // result_set. When that happens we can remove the .at(0) :/ // Update new file with target link = oldpath - auto out = - ld_network_service - ->post(endp2, newpath, oldpath) - .get() - .at(0); + auto out = ld_network_service + ->post(endp2, newpath, oldpath) + .get() + .at(0); LOG(DEBUG, "Got response success: {}", out.err()); @@ -508,7 +532,7 @@ forward_rename(const string& oldpath, const string& newpath, // returning one result and a broadcast(endpoint_set) returning a // result_set. When that happens we can remove the .at(0) :/ auto out = ld_network_service - ->post(endp, oldpath, newpath) + ->post(endp, oldpath, newpath) .get() .at(0); diff --git a/src/client/rpc/rpc_types.cpp b/src/client/rpc/rpc_types.cpp index 688e36553..1f34d0d5a 100644 --- a/src/client/rpc/rpc_types.cpp +++ b/src/client/rpc/rpc_types.cpp @@ -62,6 +62,9 @@ hermes::detail::register_user_request_types(uint32_t provider_id) { #ifdef HAS_SYMLINKS (void) registered_requests().add(provider_id); #endif // HAS_SYMLINKS +#ifdef HAS_RENAME + (void) registered_requests().add(provider_id); +#endif // HAS_RENAME (void) registered_requests().add(provider_id); (void) registered_requests().add(provider_id); (void) registered_requests().add(provider_id); diff --git a/src/daemon/daemon.cpp b/src/daemon/daemon.cpp index 4df0a09f2..25b970b58 100644 --- a/src/daemon/daemon.cpp +++ b/src/daemon/daemon.cpp @@ -178,6 +178,10 @@ register_server_rpcs(margo_instance_id mid) { #ifdef HAS_SYMLINKS MARGO_REGISTER(mid, gkfs::rpc::tag::mk_symlink, rpc_mk_symlink_in_t, rpc_err_out_t, rpc_srv_mk_symlink); +#endif +#ifdef HAS_RENAME + MARGO_REGISTER(mid, gkfs::rpc::tag::rename, rpc_rename_in_t, rpc_err_out_t, + rpc_srv_rename); #endif MARGO_REGISTER(mid, gkfs::rpc::tag::write, rpc_write_data_in_t, rpc_data_out_t, rpc_srv_write); @@ -584,7 +588,7 @@ agios_initialize() { agios_exit(); - throw; + std::terminate(); } } #endif @@ -885,10 +889,9 @@ parse_input(const cli_options& opts, const CLI::App& desc) { metadir_path.native()); } else { // use rootdir as metadata dir - auto metadir = opts.rootdir; - if(GKFS_DATA->enable_forwarding()) { + auto metadir = opts.rootdir; // As we store normally he metadata to the pfs, we need to put each // daemon in a separate directory. auto metadir_path = diff --git a/src/daemon/handler/srv_metadata.cpp b/src/daemon/handler/srv_metadata.cpp index fb87688eb..8ba4b3005 100644 --- a/src/daemon/handler/srv_metadata.cpp +++ b/src/daemon/handler/srv_metadata.cpp @@ -847,7 +847,66 @@ rpc_srv_get_dirents_extended(hg_handle_t handle) { return gkfs::rpc::cleanup_respond(&handle, &in, &out, &bulk_handle); } -#if defined(HAS_SYMLINKS) || defined(HAS_RENAME) +#if defined(HAS_SYMLINKS) +/** + * @brief Serves a request create a symbolic link + * @internal + * The state of this function is unclear and requires a complete refactor. + * + * All exceptions must be caught here and dealt with accordingly. Any errors are + * placed in the response. + * @endinternal + * @param handle Mercury RPC handle (path is the symbolic link) + * @return Mercury error code to Mercury + */ +hg_return_t +rpc_srv_mk_symlink(hg_handle_t handle) { + rpc_mk_symlink_in_t in{}; + rpc_err_out_t out{}; + + auto ret = margo_get_input(handle, &in); + if(ret != HG_SUCCESS) { + GKFS_DATA->spdlogger()->error( + "{}() Failed to retrieve input from handle", __func__); + } + GKFS_DATA->spdlogger()->debug( + "{}() Got RPC with path '{}' and target path '{}'", __func__, + in.path, in.target_path); + // do update + try { + gkfs::metadata::Metadata md = gkfs::metadata::get(in.path); + + md.target_path(in.target_path); + md.mode(S_IFLNK); + md.blocks(0); + + GKFS_DATA->spdlogger()->debug( + "{}() Updating path '{}' with metadata '{}'", __func__, in.path, + md.serialize()); + gkfs::metadata::update(in.path, md); + out.err = 0; + } catch(const std::exception& e) { + // TODO handle NotFoundException + GKFS_DATA->spdlogger()->error("{}() Failed to update entry", __func__); + out.err = 1; + } + + GKFS_DATA->spdlogger()->debug("{}() Sending output err '{}'", __func__, + out.err); + auto hret = margo_respond(handle, &out); + if(hret != HG_SUCCESS) { + GKFS_DATA->spdlogger()->error("{}() Failed to respond", __func__); + } + + // Destroy handle when finished + margo_free_input(handle, &in); + margo_destroy(handle); + return HG_SUCCESS; +} + +#endif // HAS_SYMLINKS + +#if defined(HAS_RENAME) /** * @brief Serves a request create a symbolic link and supports rename * @internal @@ -856,11 +915,11 @@ rpc_srv_get_dirents_extended(hg_handle_t handle) { * All exceptions must be caught here and dealt with accordingly. Any errors are * placed in the response. * @endinteral - * @param handle Mercury RPC handle + * @param handle Mercury RPC handle (target_path is the symbolic link) * @return Mercury error code to Mercury */ hg_return_t -rpc_srv_mk_symlink(hg_handle_t handle) { +rpc_srv_rename(hg_handle_t handle) { rpc_mk_symlink_in_t in{}; rpc_err_out_t out{}; @@ -875,17 +934,14 @@ rpc_srv_mk_symlink(hg_handle_t handle) { // do update try { gkfs::metadata::Metadata md = gkfs::metadata::get(in.path); -#ifdef HAS_RENAME - if(md.blocks() == -1) { - // We need to fill the rename path as this is an inverse path - // old -> new - md.rename_path(in.target_path); - } else { -#endif // HAS_RENAME - md.target_path(in.target_path); -#ifdef HAS_RENAME + + + md.target_path(in.target_path); + // We are reverting the rename so we clean up the target_path + if(strcmp(in.target_path, "") == 0) { + md.blocks(0); } -#endif // HAS_RENAME + GKFS_DATA->spdlogger()->debug( "{}() Updating path '{}' with metadata '{}'", __func__, in.path, md.serialize()); @@ -910,7 +966,7 @@ rpc_srv_mk_symlink(hg_handle_t handle) { return HG_SUCCESS; } -#endif // HAS_SYMLINKS || HAS_RENAME +#endif // HAS_RENAME } // namespace @@ -938,3 +994,6 @@ DEFINE_MARGO_RPC_HANDLER(rpc_srv_get_dirents_extended) DEFINE_MARGO_RPC_HANDLER(rpc_srv_mk_symlink) #endif +#ifdef HAS_RENAME +DEFINE_MARGO_RPC_HANDLER(rpc_srv_rename) +#endif \ No newline at end of file diff --git a/src/proxy/util.cpp b/src/proxy/util.cpp index f1542949a..e85bd8e73 100644 --- a/src/proxy/util.cpp +++ b/src/proxy/util.cpp @@ -215,8 +215,8 @@ connect_to_hosts(const vector>& hosts) { ret = margo_addr_lookup(PROXY_DATA->client_rpc_mid(), uri.c_str(), &svr_addr); if(ret != HG_SUCCESS) { - // still not working after 5 tries. - if(i == 4) { + // still not working after 4 tries. + if(i == 3) { auto err_msg = fmt::format("{}() Unable to lookup address '{}'", __func__, uri); diff --git a/test/symlink_test.cpp b/test/symlink_test.cpp index 6f90f1676..592de710f 100644 --- a/test/symlink_test.cpp +++ b/test/symlink_test.cpp @@ -57,7 +57,7 @@ main(int argc, char* argv[]) { const std::string link_ext = dir_ext + "/tmp/link"; char buffIn[] = "oops."; - char* buffOut = new char[strlen(buffIn) + 1]; + char buffOut[strlen(buffIn) + 1]; struct stat st; int ret; @@ -170,7 +170,7 @@ main(int argc, char* argv[]) { // Check readlink - char* target_path = new char[target_int.size() + 1]; + char target_path[target_int.size() + 1]; ret = readlink(link_int.c_str(), target_path, target_int.size() + 1); if(ret <= 0) { std::cerr << "ERROR: Failed to retrieve link path: " << strerror(errno) @@ -309,4 +309,4 @@ main(int argc, char* argv[]) { << std::endl; return -1; } -} +} \ No newline at end of file diff --git a/test/wr_test.cpp b/test/wr_test.cpp index 802e48ca6..b712f33b7 100644 --- a/test/wr_test.cpp +++ b/test/wr_test.cpp @@ -61,7 +61,7 @@ main(int argc, char* argv[]) { string mountdir = "/tmp/mountdir"; string p = mountdir + "/file"; char buffIn[] = "oops."; - char* buffOut = new char[strlen(buffIn) + 1 + 20]; + char buffOut[strlen(buffIn) + 1 + 20]; int fd; int ret; struct stat st; diff --git a/tests/integration/shell/test_concat.py b/tests/integration/shell/test_concat.py index 2a9d21f28..cfac9da5c 100644 --- a/tests/integration/shell/test_concat.py +++ b/tests/integration/shell/test_concat.py @@ -69,5 +69,5 @@ def test_concat(gkfs_daemon, gkfs_shell, file_factory): {gkfs_shell.patched_environ} cat {gkfs_daemon.mountdir / large_file_01} >> {gkfs_daemon.mountdir / large_file_02} """, intercept_shell=False) - # cmd = gkfs_client.bash('cat', gkfs_daemon.mountdir / large_file_01, '>>', gkfs_daemon.mountdir / large_file_02) + #cmd = gkfs_client.bash('cat', gkfs_daemon.mountdir / large_file_01, '>>', gkfs_daemon.mountdir / large_file_02) assert cmd.exit_code == 0 -- GitLab