From 7e7e789e52435a36302428fb8e77fb5efe653a72 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Mon, 6 May 2024 13:42:43 +0200 Subject: [PATCH 1/4] Remove/remove_dir optimization: Avoid 1 RPC per call --- include/client/rpc/forward_metadata.hpp | 2 +- include/client/rpc/rpc_types.hpp | 14 ++++++++--- include/common/rpc/rpc_types.hpp | 3 ++- src/client/gkfs_functions.cpp | 21 ++++++++-------- src/client/rpc/forward_metadata.cpp | 9 ++++--- src/daemon/handler/srv_metadata.cpp | 33 +++++++++++++++---------- 6 files changed, 49 insertions(+), 33 deletions(-) diff --git a/include/client/rpc/forward_metadata.hpp b/include/client/rpc/forward_metadata.hpp index ac3cd4512..88ed11188 100644 --- a/include/client/rpc/forward_metadata.hpp +++ b/include/client/rpc/forward_metadata.hpp @@ -62,7 +62,7 @@ forward_rename(const std::string& oldpath, const std::string& newpath, #endif // HAS_RENAME int -forward_remove(const std::string& path, const int8_t num_copies); +forward_remove(const std::string& path, bool rm_dir, const int8_t num_copies); int forward_decr_size(const std::string& path, size_t length, const int copy); diff --git a/include/client/rpc/rpc_types.hpp b/include/client/rpc/rpc_types.hpp index a259e46d1..4577f58ed 100644 --- a/include/client/rpc/rpc_types.hpp +++ b/include/client/rpc/rpc_types.hpp @@ -520,7 +520,8 @@ struct remove_metadata { hermes::detail::post_to_mercury(ExecutionContext*); public: - input(const std::string& path) : m_path(path) {} + input(const std::string& path, bool rm_dir) + : m_path(path), m_rm_dir(rm_dir) {} input(input&& rhs) = default; @@ -537,14 +538,21 @@ struct remove_metadata { return m_path; } - explicit input(const rpc_rm_node_in_t& other) : m_path(other.path) {} + bool + rm_dir() const { + return m_rm_dir; + } + + explicit input(const rpc_rm_node_in_t& other) + : m_path(other.path), m_rm_dir(other.rm_dir) {} explicit operator rpc_rm_node_in_t() { - return {m_path.c_str()}; + return {m_path.c_str(), m_rm_dir}; } private: std::string m_path; + bool m_rm_dir; }; class output { diff --git a/include/common/rpc/rpc_types.hpp b/include/common/rpc/rpc_types.hpp index e247d14ac..40fc20005 100644 --- a/include/common/rpc/rpc_types.hpp +++ b/include/common/rpc/rpc_types.hpp @@ -48,7 +48,8 @@ MERCURY_GEN_PROC(rpc_path_only_in_t, ((hg_const_string_t) (path))) MERCURY_GEN_PROC(rpc_stat_out_t, ((hg_int32_t) (err))((hg_const_string_t) (db_val))) -MERCURY_GEN_PROC(rpc_rm_node_in_t, ((hg_const_string_t) (path))) +MERCURY_GEN_PROC(rpc_rm_node_in_t, + ((hg_const_string_t) (path))((hg_bool_t) (rm_dir))) MERCURY_GEN_PROC( rpc_rm_metadata_out_t, diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 90a1ca0ae..82f47b8c9 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -339,6 +339,8 @@ gkfs_create(const std::string& path, mode_t mode) { */ int gkfs_remove(const std::string& path) { +#ifdef HAS_SYMLINKS +#ifdef HAS_RENAME auto md = gkfs::utils::get_metadata(path); if(!md) { return -1; @@ -349,8 +351,6 @@ gkfs_remove(const std::string& path) { errno = EISDIR; return -1; } -#ifdef HAS_SYMLINKS -#ifdef HAS_RENAME if(md.value().blocks() == -1) { errno = ENOENT; return -1; @@ -378,7 +378,7 @@ gkfs_remove(const std::string& path) { if(gkfs::config::proxy::fwd_remove && CTX->use_proxy()) { err = gkfs::rpc::forward_remove_proxy(path); } else { - err = gkfs::rpc::forward_remove(path, CTX->get_replicas()); + err = gkfs::rpc::forward_remove(path, false, CTX->get_replicas()); } if(err) { errno = err; @@ -1340,19 +1340,17 @@ gkfs_opendir(const std::string& path) { */ int gkfs_rmdir(const std::string& path) { + int err; + // check that directory is empty if a strict dir hierarchy is enforced + // TODO rename #define +#if GKFS_CREATE_CHECK_PARENTS auto md = gkfs::utils::get_metadata(path); if(!md) { LOG(DEBUG, "Error: Path '{}' err code '{}' ", path, strerror(errno)); return -1; } - if(!S_ISDIR(md->mode())) { - LOG(DEBUG, "Path '{}' is not a directory", path); - errno = ENOTDIR; - return -1; - } - auto ret = gkfs::rpc::forward_get_dirents(path); - auto err = ret.first; + err = ret.first; if(err) { errno = err; return -1; @@ -1363,10 +1361,11 @@ gkfs_rmdir(const std::string& path) { errno = ENOTEMPTY; return -1; } +#endif if(gkfs::config::proxy::fwd_remove && CTX->use_proxy()) { err = gkfs::rpc::forward_remove_proxy(path); } else { - err = gkfs::rpc::forward_remove(path, CTX->get_replicas()); + err = gkfs::rpc::forward_remove(path, true, CTX->get_replicas()); } if(err) { errno = err; diff --git a/src/client/rpc/forward_metadata.cpp b/src/client/rpc/forward_metadata.cpp index e569d873f..c5c309573 100644 --- a/src/client/rpc/forward_metadata.cpp +++ b/src/client/rpc/forward_metadata.cpp @@ -135,7 +135,7 @@ forward_stat(const std::string& path, string& attr, const int copy) { * @return error code */ int -forward_remove(const std::string& path, const int8_t num_copies) { +forward_remove(const std::string& path, bool rm_dir, const int8_t num_copies) { if(gkfs::config::proxy::fwd_remove && CTX->use_proxy()) { LOG(WARNING, "{} was called even though proxy should be used!", __func__); @@ -159,7 +159,8 @@ forward_remove(const std::string& path, const int8_t num_copies) { // returning one result and a broadcast(endpoint_set) returning a // result_set. When that happens we can remove the .at(0) :/ auto out = ld_network_service - ->post(endp, path) + ->post(endp, path, + rm_dir) .get() .at(0); @@ -174,9 +175,9 @@ forward_remove(const std::string& path, const int8_t num_copies) { return EBUSY; } } - // if file is not a regular file and it's size is 0, data does not need to + // if file is not a regular file or it's size is 0, data does not need to // be removed, thus, we exit - if(!(S_ISREG(mode) && (size != 0))) + if(!S_ISREG(mode) || size == 0) return 0; diff --git a/src/daemon/handler/srv_metadata.cpp b/src/daemon/handler/srv_metadata.cpp index 7351b2fb9..d34247403 100644 --- a/src/daemon/handler/srv_metadata.cpp +++ b/src/daemon/handler/srv_metadata.cpp @@ -241,23 +241,30 @@ rpc_srv_remove_metadata(hg_handle_t handle) { GKFS_DATA->spdlogger()->debug("{}() Got remove metadata RPC with path '{}'", __func__, in.path); - // Remove metadentry if exists on the node try { auto md = gkfs::metadata::get(in.path); - gkfs::metadata::remove(in.path); - out.err = 0; - out.mode = md.mode(); - out.size = md.size(); - if constexpr(gkfs::config::metadata::implicit_data_removal) { - if(S_ISREG(md.mode()) && (md.size() != 0)) - GKFS_DATA->storage()->destroy_chunk_space(in.path); + if(S_ISDIR(md.mode()) && !in.rm_dir) { + // return is directory errorcode if request was not rmdir + out.err = EISDIR; + } else if(!S_ISDIR(md.mode()) && in.rm_dir) { + // return is not directory errorcode if request was rmdir + out.err = ENOTDIR; + } else { + // remove metadata (and implicitly data if enabled + gkfs::metadata::remove(in.path); + out.err = 0; + out.mode = md.mode(); + out.size = S_ISDIR(md.mode()) ? 0 : md.size(); + // if file, remove metadata and also return mode and size + if constexpr(gkfs::config::metadata::implicit_data_removal) { + if(S_ISREG(md.mode()) && (md.size() != 0)) + GKFS_DATA->storage()->destroy_chunk_space(in.path); + } } - } catch(const gkfs::metadata::NotFoundException& e) { - GKFS_DATA->spdlogger()->warn( - "{}(): path '{}' message '{}'. Continuing, setting out.err 0.", - __func__, in.path, e.what()); - out.err = 0; + // This exception is only thrown from get() if the entry does not exist + // remove() does not throw this exception + out.err = ENOENT; } catch(const gkfs::metadata::DBException& e) { GKFS_DATA->spdlogger()->error("{}(): path '{}' message '{}'", __func__, in.path, e.what()); -- GitLab From 9349e3663941e132281c3df111b73cc0eadfd45f Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Fri, 28 Jun 2024 10:19:54 +0200 Subject: [PATCH 2/4] Proxy: Remove optimization added --- include/client/rpc/forward_metadata_proxy.hpp | 2 +- include/client/rpc/rpc_types.hpp | 14 +++++++++++--- include/proxy/rpc/forward_metadata.hpp | 2 +- src/client/gkfs_functions.cpp | 13 +++++++++---- src/client/rpc/forward_metadata_proxy.cpp | 5 +++-- src/proxy/rpc/forward_metadata.cpp | 18 ++++++++++-------- src/proxy/rpc/srv_metadata.cpp | 3 ++- 7 files changed, 37 insertions(+), 20 deletions(-) diff --git a/include/client/rpc/forward_metadata_proxy.hpp b/include/client/rpc/forward_metadata_proxy.hpp index 2e87106fa..8e5ff1def 100644 --- a/include/client/rpc/forward_metadata_proxy.hpp +++ b/include/client/rpc/forward_metadata_proxy.hpp @@ -23,7 +23,7 @@ int forward_stat_proxy(const std::string& path, std::string& attr); int -forward_remove_proxy(const std::string& path); +forward_remove_proxy(const std::string& path, bool rm_dir); int forward_decr_size_proxy(const std::string& path, size_t length); diff --git a/include/client/rpc/rpc_types.hpp b/include/client/rpc/rpc_types.hpp index 4577f58ed..485619e60 100644 --- a/include/client/rpc/rpc_types.hpp +++ b/include/client/rpc/rpc_types.hpp @@ -3121,7 +3121,8 @@ struct remove_proxy { hermes::detail::post_to_mercury(ExecutionContext*); public: - input(const std::string& path) : m_path(path) {} + input(const std::string& path, bool rm_dir) + : m_path(path), m_rm_dir(rm_dir) {} input(input&& rhs) = default; @@ -3138,14 +3139,21 @@ struct remove_proxy { return m_path; } - explicit input(const rpc_rm_node_in_t& other) : m_path(other.path) {} + bool + rm_dir() const { + return m_rm_dir; + } + + explicit input(const rpc_rm_node_in_t& other) + : m_path(other.path), m_rm_dir(other.rm_dir) {} explicit operator rpc_rm_node_in_t() { - return {m_path.c_str()}; + return {m_path.c_str(), m_rm_dir}; } private: std::string m_path; + bool m_rm_dir; }; class output { diff --git a/include/proxy/rpc/forward_metadata.hpp b/include/proxy/rpc/forward_metadata.hpp index c5ff43098..a16e5f5cf 100644 --- a/include/proxy/rpc/forward_metadata.hpp +++ b/include/proxy/rpc/forward_metadata.hpp @@ -25,7 +25,7 @@ std::pair forward_stat(const std::string& path); int -forward_remove(const std::string& path); +forward_remove(const std::string& path, bool rm_dir); int forward_decr_size(const std::string& path, size_t length); diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 82f47b8c9..ec04145f1 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -365,7 +365,8 @@ gkfs_remove(const std::string& path) { return -1; } } - auto err = gkfs::rpc::forward_remove(new_path, CTX->get_replicas()); + auto err = gkfs::rpc::forward_remove(new_path, false, + CTX->get_replicas()); if(err) { errno = err; return -1; @@ -376,7 +377,7 @@ gkfs_remove(const std::string& path) { #endif // HAS_SYMLINKS int err = 0; if(gkfs::config::proxy::fwd_remove && CTX->use_proxy()) { - err = gkfs::rpc::forward_remove_proxy(path); + err = gkfs::rpc::forward_remove_proxy(path, false); } else { err = gkfs::rpc::forward_remove(path, false, CTX->get_replicas()); } @@ -478,7 +479,11 @@ gkfs_rename(const string& old_path, const string& new_path) { return -1; } // Delete old file - err = gkfs::rpc::forward_remove(old_path, CTX->get_replicas()); + auto is_dir = false; + if(S_ISDIR(md_old->mode())) + is_dir = true; + err = gkfs::rpc::forward_remove(old_path, is_dir, + CTX->get_replicas()); if(err) { errno = err; return -1; @@ -1363,7 +1368,7 @@ gkfs_rmdir(const std::string& path) { } #endif if(gkfs::config::proxy::fwd_remove && CTX->use_proxy()) { - err = gkfs::rpc::forward_remove_proxy(path); + err = gkfs::rpc::forward_remove_proxy(path, true); } else { err = gkfs::rpc::forward_remove(path, true, CTX->get_replicas()); } diff --git a/src/client/rpc/forward_metadata_proxy.cpp b/src/client/rpc/forward_metadata_proxy.cpp index 0e43b352d..b2892ec69 100644 --- a/src/client/rpc/forward_metadata_proxy.cpp +++ b/src/client/rpc/forward_metadata_proxy.cpp @@ -78,7 +78,7 @@ forward_stat_proxy(const std::string& path, string& attr) { } int -forward_remove_proxy(const std::string& path) { +forward_remove_proxy(const std::string& path, bool rm_dir) { auto endp = CTX->proxy_host(); try { @@ -88,7 +88,8 @@ forward_remove_proxy(const std::string& path) { // TODO(amiranda): hermes will eventually provide a post(endpoint) // returning one result and a broadcast(endpoint_set) returning a // result_set. When that happens we can remove the .at(0) :/ - auto out = ld_proxy_service->post(endp, path) + auto out = ld_proxy_service + ->post(endp, path, rm_dir) .get() .at(0); LOG(DEBUG, "Got response success: {}", out.err()); diff --git a/src/proxy/rpc/forward_metadata.cpp b/src/proxy/rpc/forward_metadata.cpp index 830b7db51..36f83fb36 100644 --- a/src/proxy/rpc/forward_metadata.cpp +++ b/src/proxy/rpc/forward_metadata.cpp @@ -22,7 +22,7 @@ using namespace std; namespace { std::tuple -remove_metadata(const std::string& path) { +remove_metadata(const std::string& path, bool rm_dir) { hg_handle_t rpc_handle = nullptr; rpc_rm_node_in_t daemon_in{}; rpc_rm_metadata_out_t daemon_out{}; @@ -31,6 +31,7 @@ remove_metadata(const std::string& path) { uint32_t mode = 0; // fill in daemon_in.path = path.c_str(); + daemon_in.rm_dir = rm_dir; // Create handle PROXY_DATA->log()->debug("{}() Creating Margo handle ...", __func__); auto endp = PROXY_DATA->rpc_endpoints().at( @@ -51,9 +52,11 @@ remove_metadata(const std::string& path) { if(ret == HG_SUCCESS) { PROXY_DATA->log()->debug("{}() Got response success: {}", __func__, daemon_out.err); - mode = daemon_out.mode; - size = daemon_out.size; err = daemon_out.err; + if(!err) { + mode = daemon_out.mode; + size = daemon_out.size; + } margo_free_output(rpc_handle, &daemon_out); } else { // something is wrong @@ -233,16 +236,15 @@ forward_stat(const std::string& path) { } int -forward_remove(const std::string& path) { - auto [err, mode, size] = remove_metadata(path); +forward_remove(const std::string& path, bool rm_dir) { + auto [err, mode, size] = remove_metadata(path, rm_dir); if(err != 0) { return err; } - // if file is not a regular file and it's size is 0, data does not need to + // if file is not a regular file or it's size is 0, data does not need to // be removed, thus, we exit - if(!(S_ISREG(mode) && (size != 0))) { + if(!S_ISREG(mode) || size == 0) return 0; - } return remove_data(path); } diff --git a/src/proxy/rpc/srv_metadata.cpp b/src/proxy/rpc/srv_metadata.cpp index df07724d1..4b973c03a 100644 --- a/src/proxy/rpc/srv_metadata.cpp +++ b/src/proxy/rpc/srv_metadata.cpp @@ -79,7 +79,8 @@ proxy_rpc_srv_remove(hg_handle_t handle) { } PROXY_DATA->log()->debug("{}() Got RPC with path '{}'", __func__, client_in.path); - client_out.err = gkfs::rpc::forward_remove(client_in.path); + client_out.err = + gkfs::rpc::forward_remove(client_in.path, client_in.rm_dir); PROXY_DATA->log()->debug("{}() Sending output err '{}'", __func__, client_out.err); -- GitLab From 86ec77a21cfb94c67e76455a2e3209c508f829cc Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Fri, 28 Jun 2024 09:26:50 +0200 Subject: [PATCH 3/4] Added Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fb21bebc..aa8775d9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] ### New +- Remove optimization, removing one RPC per operation ([!195](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_request/195)). - Added the GekkoFS proxy as an optional gateway between client and daemon. The proxy is started on each compute node that houses clients ([!191](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_request/191)). - Additional options for the GekkoFS daemon were added to integrate the GekkoFS proxy. -- GitLab From b0ba57897a7710235f20d27ed12b11ffb5e65731 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Fri, 28 Jun 2024 10:00:03 +0200 Subject: [PATCH 4/4] Fix GCC warning bug for <= gcc-12 via #pragma --- include/common/common_defs.hpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/common/common_defs.hpp b/include/common/common_defs.hpp index f9768d809..206ab7de1 100644 --- a/include/common/common_defs.hpp +++ b/include/common/common_defs.hpp @@ -90,9 +90,17 @@ constexpr auto ucx_all = "ucx+all"; constexpr auto ucx_tcp = "ucx+tcp"; constexpr auto ucx_rc = "ucx+rc"; constexpr auto ucx_ud = "ucx+ud"; +/* + * Disable warning for unused variable + * This is a bug in gcc that was fixed in version 13. + * XXX Can be removed in the future + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" constexpr auto all_remote_protocols = {ofi_sockets, ofi_tcp, ofi_verbs, ofi_psm2, ucx_all, ucx_tcp, ucx_rc, ucx_ud}; +#pragma GCC diagnostic pop } // namespace protocol } // namespace gkfs::rpc -- GitLab