From 9c6d9a79276a09a3c5a4aab6f0fb407516741a13 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Fri, 14 Jun 2024 10:25:29 +0200 Subject: [PATCH 01/24] Added Proxy --- .gitmodules | 2 +- include/CMakeLists.txt | 3 + include/client/env.hpp | 2 + include/client/hooks.hpp | 3 + include/client/preload_context.hpp | 26 + include/client/preload_util.hpp | 8 + include/client/rpc/forward_data.hpp | 8 +- include/client/rpc/forward_data_proxy.hpp | 34 + include/client/rpc/forward_metadata_proxy.hpp | 38 + include/client/rpc/rpc_types.hpp | 1033 +++++++++++++++++ include/common/CMakeLists.txt | 6 + include/common/common_defs.hpp | 20 + include/common/rpc/rpc_types.hpp | 36 + include/config.hpp | 34 + include/daemon/classes/fs_data.hpp | 14 + include/daemon/classes/rpc_data.hpp | 16 +- include/daemon/handler/rpc_defs.hpp | 14 +- include/daemon/ops/data.hpp | 3 +- include/proxy/CMakeLists.txt | 41 + include/proxy/env.hpp | 25 + include/proxy/proxy.hpp | 36 + include/proxy/proxy_data.hpp | 146 +++ include/proxy/rpc/forward_data.hpp | 36 + include/proxy/rpc/forward_metadata.hpp | 41 + include/proxy/rpc/rpc_defs.hpp | 40 + include/proxy/rpc/rpc_util.hpp | 107 ++ include/proxy/util.hpp | 44 + marc/CMakeLists.txt | 24 + marc/main.cpp | 45 + scripts/run/gkfs | 181 ++- scripts/run/gkfs_io500_proxy.conf | 44 + src/CMakeLists.txt | 2 + src/client/CMakeLists.txt | 4 + src/client/gkfs_functions.cpp | 113 +- src/client/hooks.cpp | 14 + src/client/intercept.cpp | 7 + src/client/preload.cpp | 29 + src/client/preload_context.cpp | 35 + src/client/preload_util.cpp | 113 +- src/client/rpc/forward_data.cpp | 24 +- src/client/rpc/forward_data_proxy.cpp | 197 ++++ src/client/rpc/forward_metadata.cpp | 39 +- src/client/rpc/forward_metadata_proxy.cpp | 249 ++++ src/client/rpc/rpc_types.cpp | 55 +- src/daemon/backend/data/chunk_storage.cpp | 20 +- src/daemon/classes/fs_data.cpp | 19 + src/daemon/classes/rpc_data.cpp | 28 +- src/daemon/daemon.cpp | 197 +++- src/daemon/handler/srv_data.cpp | 574 +++++++++ src/daemon/handler/srv_metadata.cpp | 8 +- src/daemon/ops/data.cpp | 176 ++- src/daemon/util.cpp | 9 +- src/proxy/CMakeLists.txt | 79 ++ src/proxy/env.cpp | 14 + src/proxy/proxy.cpp | 388 +++++++ src/proxy/proxy_data.cpp | 127 ++ src/proxy/rpc/forward_data.cpp | 429 +++++++ src/proxy/rpc/forward_metadata.cpp | 373 ++++++ src/proxy/rpc/srv_data.cpp | 207 ++++ src/proxy/rpc/srv_metadata.cpp | 207 ++++ src/proxy/util.cpp | 246 ++++ 61 files changed, 5895 insertions(+), 197 deletions(-) create mode 100644 include/client/rpc/forward_data_proxy.hpp create mode 100644 include/client/rpc/forward_metadata_proxy.hpp create mode 100644 include/proxy/CMakeLists.txt create mode 100644 include/proxy/env.hpp create mode 100644 include/proxy/proxy.hpp create mode 100644 include/proxy/proxy_data.hpp create mode 100644 include/proxy/rpc/forward_data.hpp create mode 100644 include/proxy/rpc/forward_metadata.hpp create mode 100644 include/proxy/rpc/rpc_defs.hpp create mode 100644 include/proxy/rpc/rpc_util.hpp create mode 100644 include/proxy/util.hpp create mode 100644 marc/CMakeLists.txt create mode 100644 marc/main.cpp create mode 100644 scripts/run/gkfs_io500_proxy.conf create mode 100644 src/client/rpc/forward_data_proxy.cpp create mode 100644 src/client/rpc/forward_metadata_proxy.cpp create mode 100644 src/proxy/CMakeLists.txt create mode 100644 src/proxy/env.cpp create mode 100644 src/proxy/proxy.cpp create mode 100644 src/proxy/proxy_data.cpp create mode 100644 src/proxy/rpc/forward_data.cpp create mode 100644 src/proxy/rpc/forward_metadata.cpp create mode 100644 src/proxy/rpc/srv_data.cpp create mode 100644 src/proxy/rpc/srv_metadata.cpp create mode 100644 src/proxy/util.cpp diff --git a/.gitmodules b/.gitmodules index 276395241..941213b3d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "external/hermes"] path = external/hermes - url = https://github.com/gekkofs/hermes.git + url = https://github.com/marcvef/hermes.git [submodule "tests/scripts/bats"] path = tests/scripts/bats url = https://github.com/bats-core/bats-core.git diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt index 1ae9e3b40..f7a07e2c6 100644 --- a/include/CMakeLists.txt +++ b/include/CMakeLists.txt @@ -32,5 +32,8 @@ add_subdirectory(common) add_subdirectory(daemon) # Client library add_subdirectory(client) +# Proxy +add_subdirectory(proxy) target_sources(gkfs_daemon PUBLIC config.hpp version.hpp.in) +target_sources(gkfs_proxy PUBLIC config.hpp version.hpp.in) diff --git a/include/client/env.hpp b/include/client/env.hpp index 4f24d9ab1..5574df247 100644 --- a/include/client/env.hpp +++ b/include/client/env.hpp @@ -59,6 +59,8 @@ static constexpr auto METRICS_IP_PORT = ADD_PREFIX("METRICS_IP_PORT"); #endif static constexpr auto NUM_REPL = ADD_PREFIX("NUM_REPL"); +static constexpr auto PROXY_PID_FILE = ADD_PREFIX("PROXY_PID_FILE"); + } // namespace gkfs::env #undef ADD_PREFIX diff --git a/include/client/hooks.hpp b/include/client/hooks.hpp index a287f6d3c..4f21d0563 100644 --- a/include/client/hooks.hpp +++ b/include/client/hooks.hpp @@ -205,6 +205,9 @@ hook_fsync(unsigned int fd); int hook_getxattr(const char* path, const char* name, void* value, size_t size); +int +hook_lgetxattr(const char* path, const char* name, void* value, size_t size); + int hook_fallocate(int fd, int mode, off_t offset, off_t len); diff --git a/include/client/preload_context.hpp b/include/client/preload_context.hpp index f73288032..95d2f8093 100644 --- a/include/client/preload_context.hpp +++ b/include/client/preload_context.hpp @@ -101,6 +101,11 @@ private: std::string rpc_protocol_; bool auto_sm_{false}; + // proxy stuff + bool use_proxy_{false}; + std::string proxy_address_str_; + hermes::endpoint proxy_host_; + bool interception_enabled_; std::bitset internal_fds_; @@ -191,6 +196,27 @@ public: relativize_path(const char* raw_path, std::string& relative_path, bool resolve_last_link = true) const; + bool + use_proxy() const; + + void + use_proxy(bool use_proxy); + + const std::string& + proxy_address_str() const; + + void + proxy_address_str(const std::string& proxy_address_str); + + const hermes::endpoint& + proxy_host() const; + + void + proxy_host(const hermes::endpoint& proxy_host); + + void + clear_proxy_host(); + const std::shared_ptr& file_map() const; diff --git a/include/client/preload_util.hpp b/include/client/preload_util.hpp index ee608b485..432a9b6ce 100644 --- a/include/client/preload_util.hpp +++ b/include/client/preload_util.hpp @@ -61,6 +61,7 @@ class async_engine; } extern std::unique_ptr ld_network_service; +extern std::unique_ptr ld_proxy_service; // function definitions namespace gkfs::utils { @@ -89,6 +90,13 @@ read_hosts_file(); void connect_to_hosts(const std::vector>& hosts); +void +check_for_proxy(); + +void +lookup_proxy_addr(); + } // namespace gkfs::utils + #endif // GEKKOFS_PRELOAD_UTIL_HPP diff --git a/include/client/rpc/forward_data.hpp b/include/client/rpc/forward_data.hpp index cf349a4aa..a044a1baf 100644 --- a/include/client/rpc/forward_data.hpp +++ b/include/client/rpc/forward_data.hpp @@ -30,17 +30,13 @@ #ifndef GEKKOFS_CLIENT_FORWARD_DATA_HPP #define GEKKOFS_CLIENT_FORWARD_DATA_HPP +#include + #include #include #include namespace gkfs::rpc { -struct ChunkStat { - unsigned long chunk_size; - unsigned long chunk_total; - unsigned long chunk_free; -}; - // TODO once we have LEAF, remove all the error code returns and throw them as // an exception. diff --git a/include/client/rpc/forward_data_proxy.hpp b/include/client/rpc/forward_data_proxy.hpp new file mode 100644 index 000000000..52659eab8 --- /dev/null +++ b/include/client/rpc/forward_data_proxy.hpp @@ -0,0 +1,34 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GEKKOFS_FORWARD_DATA_PROXY_HPP +#define GEKKOFS_FORWARD_DATA_PROXY_HPP + +#include + +namespace gkfs::rpc { + +std::pair +forward_write_proxy(const std::string& path, const void* buf, off64_t offset, + size_t write_size); + +std::pair +forward_read_proxy(const std::string& path, void* buf, off64_t offset, + size_t read_size); + +std::pair +forward_get_chunk_stat_proxy(); + +} // namespace gkfs::rpc + +#endif // GEKKOFS_FORWARD_DATA_PROXY_HPP diff --git a/include/client/rpc/forward_metadata_proxy.hpp b/include/client/rpc/forward_metadata_proxy.hpp new file mode 100644 index 000000000..57c34afb3 --- /dev/null +++ b/include/client/rpc/forward_metadata_proxy.hpp @@ -0,0 +1,38 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GEKKOFS_FORWARD_METADATA_PROXY_HPP +#define GEKKOFS_FORWARD_METADATA_PROXY_HPP + +namespace gkfs::rpc { + +int +forward_create_proxy(const std::string& path, const mode_t mode); + +int +forward_stat_proxy(const std::string& path, std::string& attr); + +int +forward_remove_proxy(const std::string& path); + +std::pair +forward_update_metadentry_size_proxy(const std::string& path, const size_t size, + const off64_t offset, + const bool append_flag); + +std::pair>> +forward_get_dirents_single_proxy(const std::string& path, int server); + +} // namespace gkfs::rpc + +#endif // GEKKOFS_FORWARD_METADATA_PROXY_HPP diff --git a/include/client/rpc/rpc_types.hpp b/include/client/rpc/rpc_types.hpp index 4c756dec2..4f8567550 100644 --- a/include/client/rpc/rpc_types.hpp +++ b/include/client/rpc/rpc_types.hpp @@ -2294,6 +2294,1039 @@ struct chunk_stat { }; }; +//============================================================================== +// definitions for write_data +struct write_data_proxy { + + // forward declarations of public input/output types for this RPC + class input; + + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = write_data_proxy; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_client_proxy_write_in_t; + using mercury_output_type = rpc_data_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 20; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = 0; + + // RPC name + constexpr static const auto name = gkfs::rpc::tag::client_proxy_write; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_client_proxy_write_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_data_out_t); + + class input { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, int64_t offset, uint64_t write_size, + const hermes::exposed_memory& buffers) + : m_path(path), m_offset(offset), m_write_size(write_size), + m_buffers(buffers) {} + + input(input&& rhs) = default; + + input(const input& other) = default; + + input& + operator=(input&& rhs) = default; + + input& + operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + int64_t + offset() const { + return m_offset; + } + + uint64_t + write_size() const { + return m_write_size; + } + + hermes::exposed_memory + buffers() const { + return m_buffers; + } + + explicit input(const rpc_client_proxy_write_in_t& other) + : m_path(other.path), m_offset(other.offset), + m_write_size(other.write_size), m_buffers(other.bulk_handle) {} + + explicit + operator rpc_client_proxy_write_in_t() { + return {m_path.c_str(), m_offset, m_write_size, + hg_bulk_t(m_buffers)}; + } + + private: + std::string m_path; + int64_t m_offset; + uint64_t m_write_size; + hermes::exposed_memory m_buffers; + }; + + class output { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : m_err(), m_io_size() {} + + output(int32_t err, size_t io_size) : m_err(err), m_io_size(io_size) {} + + output(output&& rhs) = default; + + output(const output& other) = default; + + output& + operator=(output&& rhs) = default; + + output& + operator=(const output& other) = default; + + explicit output(const rpc_data_out_t& out) { + m_err = out.err; + m_io_size = out.io_size; + } + + int32_t + err() const { + return m_err; + } + + int64_t + io_size() const { + return m_io_size; + } + + private: + int32_t m_err; + size_t m_io_size; + }; +}; + +//============================================================================== +// definitions for write_data +struct read_data_proxy { + + // forward declarations of public input/output types for this RPC + class input; + + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = read_data_proxy; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_client_proxy_read_in_t; + using mercury_output_type = rpc_data_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 21; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = 0; + + // RPC name + constexpr static const auto name = gkfs::rpc::tag::client_proxy_read; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_client_proxy_read_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_data_out_t); + + class input { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, int64_t offset, uint64_t read_size, + const hermes::exposed_memory& buffers) + : m_path(path), m_offset(offset), m_read_size(read_size), + m_buffers(buffers) {} + + input(input&& rhs) = default; + + input(const input& other) = default; + + input& + operator=(input&& rhs) = default; + + input& + operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + int64_t + offset() const { + return m_offset; + } + + uint64_t + read_size() const { + return m_read_size; + } + + hermes::exposed_memory + buffers() const { + return m_buffers; + } + + explicit input(const rpc_client_proxy_read_in_t& other) + : m_path(other.path), m_offset(other.offset), + m_read_size(other.read_size), m_buffers(other.bulk_handle) {} + + explicit + operator rpc_client_proxy_read_in_t() { + return {m_path.c_str(), m_offset, m_read_size, + hg_bulk_t(m_buffers)}; + } + + private: + std::string m_path; + int64_t m_offset; + uint64_t m_read_size; + hermes::exposed_memory m_buffers; + }; + + class output { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : m_err(), m_io_size() {} + + output(int32_t err, size_t io_size) : m_err(err), m_io_size(io_size) {} + + output(output&& rhs) = default; + + output(const output& other) = default; + + output& + operator=(output&& rhs) = default; + + output& + operator=(const output& other) = default; + + explicit output(const rpc_data_out_t& out) { + m_err = out.err; + m_io_size = out.io_size; + } + + int32_t + err() const { + return m_err; + } + + int64_t + io_size() const { + return m_io_size; + } + + private: + int32_t m_err; + size_t m_io_size; + }; +}; + +//============================================================================== +// definitions for chunk_stat_proxy +struct chunk_stat_proxy { + + // forward declarations of public input/output types for this RPC + class input; + + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = chunk_stat_proxy; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_chunk_stat_in_t; + using mercury_output_type = rpc_chunk_stat_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 22; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = 0; + + // RPC name + constexpr static const auto name = gkfs::rpc::tag::client_proxy_chunk_stat; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_chunk_stat_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_chunk_stat_out_t); + + class input { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(int32_t dummy) : m_dummy(dummy) {} + + input(input&& rhs) = default; + + input(const input& other) = default; + + input& + operator=(input&& rhs) = default; + + input& + operator=(const input& other) = default; + + int32_t + dummy() const { + return m_dummy; + } + + explicit input(const rpc_chunk_stat_in_t& other) + : m_dummy(other.dummy) {} + + explicit + operator rpc_chunk_stat_in_t() { + return {m_dummy}; + } + + private: + int32_t m_dummy; + }; + + class output { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : m_err(), m_chunk_size(), m_chunk_total(), m_chunk_free() {} + + output(int32_t err, uint64_t chunk_size, uint64_t chunk_total, + uint64_t chunk_free) + : m_err(err), m_chunk_size(chunk_size), m_chunk_total(chunk_total), + m_chunk_free(chunk_free) {} + + output(output&& rhs) = default; + + output(const output& other) = default; + + output& + operator=(output&& rhs) = default; + + output& + operator=(const output& other) = default; + + explicit output(const rpc_chunk_stat_out_t& out) { + m_err = out.err; + m_chunk_size = out.chunk_size; + m_chunk_total = out.chunk_total; + m_chunk_free = out.chunk_free; + } + + int32_t + err() const { + return m_err; + } + + uint64_t + chunk_size() const { + return m_chunk_size; + } + + uint64_t + chunk_total() const { + return m_chunk_total; + } + + uint64_t + chunk_free() const { + return m_chunk_free; + } + + private: + int32_t m_err; + uint64_t m_chunk_size; + uint64_t m_chunk_total; + uint64_t m_chunk_free; + }; +}; + +//============================================================================== +// definitions for create +struct create_proxy { + + // forward declarations of public input/output types for this RPC + class input; + + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = create_proxy; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_mk_node_in_t; + using mercury_output_type = rpc_err_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 23; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = 0; + + // RPC name + constexpr static const auto name = gkfs::rpc::tag::client_proxy_create; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_mk_node_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_err_out_t); + + class input { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, uint32_t mode) + : m_path(path), m_mode(mode) {} + + input(input&& rhs) = default; + + input(const input& other) = default; + + input& + operator=(input&& rhs) = default; + + input& + operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + uint32_t + mode() const { + return m_mode; + } + + explicit input(const rpc_mk_node_in_t& other) + : m_path(other.path), m_mode(other.mode) {} + + explicit + operator rpc_mk_node_in_t() { + return {m_path.c_str(), m_mode}; + } + + private: + std::string m_path; + uint32_t m_mode; + }; + + class output { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : m_err() {} + + output(int32_t err) : m_err(err) {} + + output(output&& rhs) = default; + + output(const output& other) = default; + + output& + operator=(output&& rhs) = default; + + output& + operator=(const output& other) = default; + + explicit output(const rpc_err_out_t& out) { + m_err = out.err; + } + + int32_t + err() const { + return m_err; + } + + private: + int32_t m_err; + }; +}; + +//============================================================================== +// definitions for stat +struct stat_proxy { + + // forward declarations of public input/output types for this RPC + class input; + + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = stat_proxy; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_path_only_in_t; + using mercury_output_type = rpc_stat_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 24; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = 0; + + // RPC name + constexpr static const auto name = gkfs::rpc::tag::client_proxy_stat; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_path_only_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_stat_out_t); + + class input { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path) : m_path(path) {} + + input(input&& rhs) = default; + + input(const input& other) = default; + + input& + operator=(input&& rhs) = default; + + input& + operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + explicit input(const rpc_path_only_in_t& other) : m_path(other.path) {} + + explicit + operator rpc_path_only_in_t() { + return {m_path.c_str()}; + } + + private: + std::string m_path; + }; + + class output { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : m_err(), m_db_val() {} + + output(int32_t err, const std::string& db_val) + : m_err(err), m_db_val(db_val) {} + + output(output&& rhs) = default; + + output(const output& other) = default; + + output& + operator=(output&& rhs) = default; + + output& + operator=(const output& other) = default; + + explicit output(const rpc_stat_out_t& out) { + m_err = out.err; + + if(out.db_val != nullptr) { + m_db_val = out.db_val; + } + } + + int32_t + err() const { + return m_err; + } + + std::string + db_val() const { + return m_db_val; + } + + private: + int32_t m_err; + std::string m_db_val; + }; +}; + +//============================================================================== +// definitions for remove +struct remove_proxy { + + // forward declarations of public input/output types for this RPC + class input; + + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = remove_proxy; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_rm_node_in_t; + using mercury_output_type = rpc_err_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 25; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = 0; + + // RPC name + constexpr static const auto name = gkfs::rpc::tag::client_proxy_remove; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_rm_node_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_err_out_t); + + class input { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path) : m_path(path) {} + + input(input&& rhs) = default; + + input(const input& other) = default; + + input& + operator=(input&& rhs) = default; + + input& + operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + explicit input(const rpc_rm_node_in_t& other) : m_path(other.path) {} + + explicit + operator rpc_rm_node_in_t() { + return {m_path.c_str()}; + } + + private: + std::string m_path; + }; + + class output { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : m_err() {} + + output(int32_t err) : m_err(err) {} + + output(output&& rhs) = default; + + output(const output& other) = default; + + output& + operator=(output&& rhs) = default; + + output& + operator=(const output& other) = default; + + explicit output(const rpc_err_out_t& out) { + m_err = out.err; + } + + int32_t + err() const { + return m_err; + } + + private: + int32_t m_err; + }; +}; + +//============================================================================== +// definitions for update_metadentry_size +struct update_metadentry_size_proxy { + + // forward declarations of public input/output types for this RPC + class input; + + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = update_metadentry_size_proxy; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_update_metadentry_size_in_t; + using mercury_output_type = rpc_update_metadentry_size_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 26; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = 0; + + // RPC name + constexpr static const auto name = gkfs::rpc::tag::client_proxy_update_size; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_update_metadentry_size_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_update_metadentry_size_out_t); + + class input { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, uint64_t size, int64_t offset, + bool append) + : m_path(path), m_size(size), m_offset(offset), m_append(append) {} + + input(input&& rhs) = default; + + input(const input& other) = default; + + input& + operator=(input&& rhs) = default; + + input& + operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + uint64_t + size() const { + return m_size; + } + + int64_t + offset() const { + return m_offset; + } + + bool + append() const { + return m_append; + } + + explicit input(const rpc_update_metadentry_size_in_t& other) + : m_path(other.path), m_size(other.size), m_offset(other.offset), + m_append(other.append) {} + + explicit + operator rpc_update_metadentry_size_in_t() { + return {m_path.c_str(), m_size, m_offset, m_append}; + } + + private: + std::string m_path; + uint64_t m_size; + int64_t m_offset; + bool m_append; + }; + + class output { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : m_err(), m_ret_size() {} + + output(int32_t err, int64_t ret_size) + : m_err(err), m_ret_size(ret_size) {} + + output(output&& rhs) = default; + + output(const output& other) = default; + + output& + operator=(output&& rhs) = default; + + output& + operator=(const output& other) = default; + + explicit output(const rpc_update_metadentry_size_out_t& out) { + m_err = out.err; + m_ret_size = out.ret_offset; + } + + int32_t + err() const { + return m_err; + } + + int64_t + ret_size() const { + return m_ret_size; + } + + private: + int32_t m_err; + int64_t m_ret_size; + }; +}; + +//============================================================================== +// definitions for get_dirents_extended +struct get_dirents_extended_proxy { + + // forward declarations of public input/output types for this RPC + class input; + + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = get_dirents_extended_proxy; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_proxy_get_dirents_in_t; + using mercury_output_type = rpc_get_dirents_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 27; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = 0; + + // RPC name + constexpr static const auto name = + gkfs::rpc::tag::client_proxy_get_dirents_extended; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_proxy_get_dirents_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_get_dirents_out_t); + + class input { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, int32_t server, + const hermes::exposed_memory& buffers) + : m_path(path), m_server(server), m_buffers(buffers) {} + + input(input&& rhs) = default; + + input(const input& other) = default; + + input& + operator=(input&& rhs) = default; + + input& + operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + int32_t + server() const { + return m_server; + } + + hermes::exposed_memory + buffers() const { + return m_buffers; + } + + explicit input(const rpc_proxy_get_dirents_in_t& other) + : m_path(other.path), m_server(other.server), + m_buffers(other.bulk_handle) {} + + explicit + operator rpc_proxy_get_dirents_in_t() { + return {m_path.c_str(), m_server, hg_bulk_t(m_buffers)}; + } + + private: + std::string m_path; + int32_t m_server; + hermes::exposed_memory m_buffers; + }; + + class output { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : m_err(), m_dirents_size() {} + + output(int32_t err, size_t dirents_size) + : m_err(err), m_dirents_size(dirents_size) {} + + output(output&& rhs) = default; + + output(const output& other) = default; + + output& + operator=(output&& rhs) = default; + + output& + operator=(const output& other) = default; + + explicit output(const rpc_get_dirents_out_t& out) { + m_err = out.err; + m_dirents_size = out.dirents_size; + } + + int32_t + err() const { + return m_err; + } + + size_t + dirents_size() const { + return m_dirents_size; + } + + private: + int32_t m_err; + size_t m_dirents_size; + }; +}; + } // namespace gkfs::rpc diff --git a/include/common/CMakeLists.txt b/include/common/CMakeLists.txt index 38a23bc30..73861334e 100644 --- a/include/common/CMakeLists.txt +++ b/include/common/CMakeLists.txt @@ -30,3 +30,9 @@ target_sources( gkfs_daemon PUBLIC cmake_configure.hpp.in common_defs.hpp rpc/rpc_types.hpp rpc/rpc_util.hpp ) + +target_sources(gkfs_proxy + PUBLIC + common_defs.hpp + rpc/rpc_types.hpp + rpc/rpc_util.hpp) \ No newline at end of file diff --git a/include/common/common_defs.hpp b/include/common/common_defs.hpp index 21c09a743..8f1630092 100644 --- a/include/common/common_defs.hpp +++ b/include/common/common_defs.hpp @@ -34,6 +34,12 @@ namespace gkfs::rpc { using chnk_id_t = unsigned long; +struct ChunkStat { + unsigned long chunk_size; + unsigned long chunk_total; + unsigned long chunk_free; +}; + namespace tag { @@ -55,6 +61,20 @@ constexpr auto write = "rpc_srv_write_data"; constexpr auto read = "rpc_srv_read_data"; constexpr auto truncate = "rpc_srv_trunc_data"; constexpr auto get_chunk_stat = "rpc_srv_chunk_stat"; +// IPC communication between client and proxy +constexpr auto client_proxy_create = "proxy_rpc_srv_create"; +constexpr auto client_proxy_stat = "proxy_rpc_srv_stat"; +constexpr auto client_proxy_remove = "proxy_rpc_srv_remove"; +constexpr auto client_proxy_update_size = + "proxy_rpc_srv_update_metadentry_size"; +constexpr auto client_proxy_write = "proxy_rpc_srv_write_data"; +constexpr auto client_proxy_read = "proxy_rpc_srv_read_data"; +constexpr auto client_proxy_chunk_stat = "proxy_rpc_srv_chunk_stat"; +constexpr auto client_proxy_get_dirents_extended = + "proxy_rpc_srv_get_dirents_extended"; +// Specific RPCs between daemon and proxy +constexpr auto proxy_daemon_write = "proxy_daemon_rpc_srv_write_data"; +constexpr auto proxy_daemon_read = "proxy_daemon_rpc_srv_read_data"; } // namespace tag namespace protocol { diff --git a/include/common/rpc/rpc_types.hpp b/include/common/rpc/rpc_types.hpp index b21a50006..8e4eacf52 100644 --- a/include/common/rpc/rpc_types.hpp +++ b/include/common/rpc/rpc_types.hpp @@ -126,4 +126,40 @@ MERCURY_GEN_PROC( ((hg_int32_t) (err))((hg_uint64_t) (chunk_size))( (hg_uint64_t) (chunk_total))((hg_uint64_t) (chunk_free))) +// client <-> proxy +MERCURY_GEN_PROC(rpc_client_proxy_write_in_t, + ((hg_const_string_t) (path))( + (int64_t) (offset)) // file offset, NOT chunk offset + ((hg_uint64_t) (write_size))((hg_bulk_t) (bulk_handle))) + +MERCURY_GEN_PROC(rpc_client_proxy_read_in_t, + ((hg_const_string_t) (path))( + (int64_t) (offset)) // file offset, NOT chunk offset + ((hg_uint64_t) (read_size))((hg_bulk_t) (bulk_handle))) + +// proxy <-> daemon + +MERCURY_GEN_PROC( + rpc_proxy_daemon_write_in_t, + ((hg_const_string_t) (path))((int64_t) (offset))( + (hg_uint64_t) (host_id))((hg_uint64_t) (host_size))( + (hg_uint64_t) (chunk_n))((hg_uint64_t) (chunk_start))( + (hg_uint64_t) (chunk_end))((hg_uint64_t) (total_chunk_size))( + (hg_bulk_t) (bulk_handle))) + +MERCURY_GEN_PROC( + rpc_proxy_daemon_read_in_t, + ((hg_const_string_t) (path))((int64_t) (offset))( + (hg_uint64_t) (host_id))((hg_uint64_t) (host_size))( + (hg_uint64_t) (chunk_n))((hg_uint64_t) (chunk_start))( + (hg_uint64_t) (chunk_end))((hg_uint64_t) (total_chunk_size))( + (hg_bulk_t) (bulk_handle))) + +MERCURY_GEN_PROC(rpc_proxy_test_in_t, ((hg_const_string_t) (path))) + +MERCURY_GEN_PROC(rpc_proxy_get_dirents_in_t, + ((hg_const_string_t) (path))((int32_t) (server))( + (hg_bulk_t) (bulk_handle))) + + #endif // LFS_RPC_TYPES_HPP diff --git a/include/config.hpp b/include/config.hpp index edf7b2c69..a2ba8ecf7 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -35,9 +35,13 @@ #define CLIENT_ENV_PREFIX "LIBGKFS_" #define DAEMON_ENV_PREFIX "GKFS_DAEMON_" #define COMMON_ENV_PREFIX "GKFS_" +#define PROXY_ENV_PREFIX "GKFS_PROXY_" namespace gkfs::config { +// writes to dev null instead of chunk space, read is reading /dev/zero +constexpr bool limbo_mode = false; + constexpr auto hostfile_path = "./gkfs_hosts.txt"; // We do not default this, ENV variable always required. constexpr auto forwarding_file_path = ""; @@ -56,14 +60,28 @@ namespace io { * If buffer is not zeroed, sparse regions contain invalid data. */ constexpr auto zero_buffer_before_read = false; +/* + * When the daemon handler serves a read request, it starts tasklets (for each + * chunk) from the io pool to read all chunks of that read request in parallel. + * Then another thread is waiting for the first tasklet to finish before + * initiating the bulk transfer back to the client for this chunk. + * This will continue in sequence, allowing gaps between bulk transfers while + * waiting. Although this is CPU efficient, it does not provide the highest I/O. + * if spin_lock_read is enabled it will try all tasklets if they are finished + * regardless of their order minimizing the gap between bulk transfers. + * Due to spinning in a loop this increases CPU utilization + */ +constexpr auto spin_lock_read = true; } // namespace io namespace log { constexpr auto client_log_path = "/tmp/gkfs_client.log"; constexpr auto daemon_log_path = "/tmp/gkfs_daemon.log"; +constexpr auto proxy_log_path = "/tmp/gkfs_proxy.log"; constexpr auto client_log_level = "info,errors,critical,hermes"; constexpr auto daemon_log_level = 4; // info +constexpr auto proxy_log_level = 4; // info } // namespace log namespace metadata { @@ -99,6 +117,20 @@ namespace data { constexpr auto chunk_dir = "chunks"; } // namespace data +namespace proxy { +constexpr auto pid_path = "/tmp/gkfs_proxy.pid"; +constexpr auto fwd_create = true; +constexpr auto fwd_stat = true; +constexpr auto fwd_remove = true; +constexpr auto fwd_update_size = true; +constexpr auto fwd_io = true; +constexpr auto fwd_chunk_stat = true; +constexpr auto fwd_get_dirents_single = true; +// Only use proxy for io if write/read size is higher than set value +constexpr auto fwd_io_count_threshold = 0; + +} // namespace proxy + namespace rpc { constexpr auto chunksize = 524288; // in bytes (e.g., 524288 == 512KB) // size of preallocated buffer to hold directory entries in rpc call @@ -111,6 +143,8 @@ constexpr auto dirents_buff_size = (8 * 1024 * 1024); // 8 mega constexpr auto daemon_io_xstreams = 8; // Number of threads used for RPC handlers at the daemon constexpr auto daemon_handler_xstreams = 4; +// Number of threads used for RPC handlers at the proxy +constexpr auto proxy_handler_xstreams = 3; } // namespace rpc namespace rocksdb { diff --git a/include/daemon/classes/fs_data.hpp b/include/daemon/classes/fs_data.hpp index ef89b5e28..4cde4170e 100644 --- a/include/daemon/classes/fs_data.hpp +++ b/include/daemon/classes/fs_data.hpp @@ -69,7 +69,10 @@ private: // RPC management std::string rpc_protocol_{}; + std::string proxy_rpc_protocol_{}; std::string bind_addr_{}; + std::string bind_proxy_addr_{}; // optional when used with running proxy. + // Remains empty if unused std::string hosts_file_{}; bool use_auto_sm_; @@ -172,12 +175,23 @@ public: void rpc_protocol(const std::string& rpc_protocol); + const std::string& + proxy_rpc_protocol() const; + void + proxy_rpc_protocol(const std::string& proxy_rpc_protocol); + const std::string& bind_addr() const; void bind_addr(const std::string& addr); + const std::string& + bind_proxy_addr() const; + + void + bind_proxy_addr(const std::string& proxy_addr); + const std::string& hosts_file() const; diff --git a/include/daemon/classes/rpc_data.hpp b/include/daemon/classes/rpc_data.hpp index db563982a..c98b7d3ad 100644 --- a/include/daemon/classes/rpc_data.hpp +++ b/include/daemon/classes/rpc_data.hpp @@ -49,11 +49,13 @@ private: // Margo IDs. They can also be used to retrieve the Mercury classes and // contexts that were created at init time margo_instance_id server_rpc_mid_; + margo_instance_id proxy_server_rpc_mid_; // Argobots I/O pools and execution streams ABT_pool io_pool_; std::vector io_streams_; std::string self_addr_str_; + std::string self_proxy_addr_str_; // Distributor std::shared_ptr distributor_; @@ -77,6 +79,12 @@ public: void server_rpc_mid(margo_instance* server_rpc_mid); + margo_instance* + proxy_server_rpc_mid(); + + void + proxy_server_rpc_mid(margo_instance* proxy_server_rpc_mid); + ABT_pool io_pool() const; @@ -93,7 +101,13 @@ public: self_addr_str() const; void - self_addr_str(const std::string& addr_stra); + self_addr_str(const std::string& addr_str); + + const std::string& + self_proxy_addr_str() const; + + void + self_proxy_addr_str(const std::string& proxy_addr_str); const std::shared_ptr& distributor() const; diff --git a/include/daemon/handler/rpc_defs.hpp b/include/daemon/handler/rpc_defs.hpp index 0981a4eba..371094966 100644 --- a/include/daemon/handler/rpc_defs.hpp +++ b/include/daemon/handler/rpc_defs.hpp @@ -37,8 +37,7 @@ extern "C" { #include } -/* visible API for RPC operations */ - +// client <-> daemon RPCs DECLARE_MARGO_RPC_HANDLER(rpc_srv_get_fs_config) DECLARE_MARGO_RPC_HANDLER(rpc_srv_create) @@ -64,7 +63,6 @@ DECLARE_MARGO_RPC_HANDLER(rpc_srv_mk_symlink) #endif - // data DECLARE_MARGO_RPC_HANDLER(rpc_srv_remove_data) @@ -76,4 +74,14 @@ DECLARE_MARGO_RPC_HANDLER(rpc_srv_truncate) DECLARE_MARGO_RPC_HANDLER(rpc_srv_get_chunk_stat) +// proxy <-> daemon RPCs +DECLARE_MARGO_RPC_HANDLER(rpc_srv_proxy_write) + +DECLARE_MARGO_RPC_HANDLER(rpc_srv_proxy_read) + +// client <-> proxy RPCs +DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_read) + +DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_write) + #endif // GKFS_DAEMON_RPC_DEFS_HPP diff --git a/include/daemon/ops/data.hpp b/include/daemon/ops/data.hpp index bb25ee22f..18d6daabd 100644 --- a/include/daemon/ops/data.hpp +++ b/include/daemon/ops/data.hpp @@ -302,7 +302,8 @@ private: size_t size; //!< size to read from chunk off64_t off; //!< offset for individual chunk ABT_eventual eventual; //!< Attached eventual - }; //!< Struct for an chunk read operation + bool bulk_transfer_done = false; + }; //!< Struct for an chunk read operation std::vector task_args_; //!< tasklet input structs /** diff --git a/include/proxy/CMakeLists.txt b/include/proxy/CMakeLists.txt new file mode 100644 index 000000000..5d1772f0e --- /dev/null +++ b/include/proxy/CMakeLists.txt @@ -0,0 +1,41 @@ +################################################################################ +# Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +target_sources( + gkfs_proxy + PUBLIC proxy.hpp + env.hpp + proxy.hpp + proxy_data.hpp + util.hpp + rpc/forward_data.hpp + rpc/forward_metadata.hpp + rpc/rpc_defs.hpp + rpc/rpc_util.hpp + +) \ No newline at end of file diff --git a/include/proxy/env.hpp b/include/proxy/env.hpp new file mode 100644 index 000000000..4e6db83a2 --- /dev/null +++ b/include/proxy/env.hpp @@ -0,0 +1,25 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GEKKOFS_PROXY_ENV_HPP +#define GEKKOFS_PROXY_ENV_HPP + +#include + +#define ADD_PREFIX(str) PROXY_ENV_PREFIX str + +/* Environment variables for the GekkoFS proxy */ +namespace gkfs::env {} // namespace gkfs::env + + +#endif // GEKKOFS_PROXY_ENV_HPP diff --git a/include/proxy/proxy.hpp b/include/proxy/proxy.hpp new file mode 100644 index 000000000..34bea585b --- /dev/null +++ b/include/proxy/proxy.hpp @@ -0,0 +1,36 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ +#ifndef GEKKOFS_PROXY_PROXY_HPP +#define GEKKOFS_PROXY_PROXY_HPP + +// std libs +#include +#include + +#include +#include + +// margo +extern "C" { +#include +#include +#include +} + +#include + +#define PROXY_DATA \ + (static_cast( \ + gkfs::proxy::ProxyData::getInstance())) + +#endif // GEKKOFS_PROXY_PROXY_HPP diff --git a/include/proxy/proxy_data.hpp b/include/proxy/proxy_data.hpp new file mode 100644 index 000000000..7122a449f --- /dev/null +++ b/include/proxy/proxy_data.hpp @@ -0,0 +1,146 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GEKKOFS_PROXY_PROXY_DATA_HPP +#define GEKKOFS_PROXY_PROXY_DATA_HPP + +#include +#include + + +namespace gkfs { +namespace rpc { +class Distributor; +} +namespace proxy { + +struct margo_client_ids { + hg_id_t rpc_create_id; + hg_id_t rpc_stat_id; + hg_id_t rpc_remove_id; + hg_id_t rpc_remove_data_id; + hg_id_t rpc_update_metadentry_size_id; + hg_id_t rpc_write_id; + hg_id_t rpc_read_id; + hg_id_t rpc_chunk_stat_id; + hg_id_t rpc_get_dirents_extended_id; +}; + +class ProxyData { + +private: + ProxyData() {} + + // logger + std::shared_ptr spdlogger_{}; + + // RPC stuff + margo_instance_id client_rpc_mid_{}; + margo_instance_id server_ipc_mid_{}; + std::string server_self_addr_{}; + + bool use_auto_sm_{false}; + + std::map rpc_endpoints_; + uint64_t hosts_size_; + uint64_t local_host_id_; + + margo_client_ids rpc_client_ids_{}; + + // pid file + std::string pid_file_path_{gkfs::config::proxy::pid_path}; + + // data distribution + std::shared_ptr distributor_; + +public: + static ProxyData* + getInstance() { + static ProxyData instance; + return &instance; + } + + ProxyData(ProxyData const&) = delete; + + void + operator=(ProxyData const&) = delete; + + // Getter/Setter + + const std::shared_ptr& + log() const; + + void + log(const std::shared_ptr& log); + + margo_instance* + client_rpc_mid(); + + void + client_rpc_mid(margo_instance* client_rpc_mid); + + margo_instance* + server_ipc_mid(); + + void + server_ipc_mid(margo_instance* server_ipc_mid); + + const std::string& + server_self_addr() const; + + void + server_self_addr(const std::string& server_self_addr); + + bool + use_auto_sm() const; + void + use_auto_sm(bool use_auto_sm); + + std::map& + rpc_endpoints(); + + void + rpc_endpoints(const std::map& rpc_endpoints); + + uint64_t + hosts_size() const; + + void + hosts_size(uint64_t hosts_size); + + uint64_t + local_host_id() const; + + void + local_host_id(uint64_t local_host_id); + + margo_client_ids& + rpc_client_ids(); + + const std::string& + pid_file_path() const; + + void + pid_file_path(const std::string& pid_file_path); + + void + distributor(std::shared_ptr distributor); + + std::shared_ptr + distributor() const; +}; + +} // namespace proxy +} // namespace gkfs + +#endif // GEKKOFS_PROXY_PROXY_DATA_HPP \ No newline at end of file diff --git a/include/proxy/rpc/forward_data.hpp b/include/proxy/rpc/forward_data.hpp new file mode 100644 index 000000000..85f3cd63a --- /dev/null +++ b/include/proxy/rpc/forward_data.hpp @@ -0,0 +1,36 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GEKKOFS_PROXY_FWD_DATA_HPP +#define GEKKOFS_PROXY_FWD_DATA_HPP + +#include + +namespace gkfs { +namespace rpc { + +std::pair +forward_write(const std::string& path, void* buf, int64_t offset, + size_t write_size); + +std::pair +forward_read(const std::string& path, void* buf, int64_t offset, + size_t read_size); + +std::pair +forward_get_chunk_stat(); + +} // namespace rpc +} // namespace gkfs + +#endif // GEKKOFS_PROXY_FWD_DATA_HPP diff --git a/include/proxy/rpc/forward_metadata.hpp b/include/proxy/rpc/forward_metadata.hpp new file mode 100644 index 000000000..77fe00f1e --- /dev/null +++ b/include/proxy/rpc/forward_metadata.hpp @@ -0,0 +1,41 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GEKKOFS_PROXY_FORWARD_METADATA_HPP +#define GEKKOFS_PROXY_FORWARD_METADATA_HPP + +#include + +namespace gkfs::rpc { + +int +forward_create(const std::string& path, const mode_t mode); + +std::pair +forward_stat(const std::string& path); + +int +forward_remove(const std::string& path); + +std::pair +forward_update_metadentry_size(const std::string& path, const size_t size, + const off64_t offset, const bool append_flag); + +std::pair +forward_get_dirents_single(const std::string& path, int server, void* buf, + const size_t bulk_size); + +} // namespace gkfs::rpc + + +#endif // GEKKOFS_PROXY_FORWARD_METADATA_HPP diff --git a/include/proxy/rpc/rpc_defs.hpp b/include/proxy/rpc/rpc_defs.hpp new file mode 100644 index 000000000..e2d83e9e3 --- /dev/null +++ b/include/proxy/rpc/rpc_defs.hpp @@ -0,0 +1,40 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + + +#ifndef GKFS_PROXY_RPC_DEFS_HPP +#define GKFS_PROXY_RPC_DEFS_HPP + +extern "C" { +#include +} + +/* visible API for RPC operations */ + +DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_create) + +DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_stat) + +DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_remove) + +DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_update_metadentry_size) + +DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_read) + +DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_write) + +DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_chunk_stat) + +DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_get_dirents_extended) + +#endif // GKFS_PROXY_RPC_DEFS_HPP diff --git a/include/proxy/rpc/rpc_util.hpp b/include/proxy/rpc/rpc_util.hpp new file mode 100644 index 000000000..23cd37dfa --- /dev/null +++ b/include/proxy/rpc/rpc_util.hpp @@ -0,0 +1,107 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GEKKOFS_PROXY_RPC_UTIL_HPP +#define GEKKOFS_PROXY_RPC_UTIL_HPP + +extern "C" { +#include +#include +#include +} + +#include + +namespace gkfs { +namespace rpc { + +// TODO THIS WHOLE THING IS COPY PASTA FROM DAEMON. REFACTOR. It is literally +// not allowed to merge with this todo! +// XXX Also file name = stupid. + +template +inline hg_return_t +cleanup(hg_handle_t* handle, InputType* input, OutputType* output, + hg_bulk_t* bulk_handle) { + auto ret = HG_SUCCESS; + if(bulk_handle) { + ret = margo_bulk_free(*bulk_handle); + if(ret != HG_SUCCESS) + return ret; + } + if(input && handle) { + ret = margo_free_input(*handle, input); + if(ret != HG_SUCCESS) + return ret; + } + if(output && handle) { + ret = margo_free_output(*handle, output); + if(ret != HG_SUCCESS) + return ret; + } + if(handle) { + ret = margo_destroy(*handle); + if(ret != HG_SUCCESS) + return ret; + } + return ret; +} + +template +inline hg_return_t +respond(hg_handle_t* handle, OutputType* output) { + auto ret = HG_SUCCESS; + if(output && handle) { + ret = margo_respond(*handle, output); + if(ret != HG_SUCCESS) + return ret; + } + return ret; +} + +template +inline hg_return_t +cleanup_respond(hg_handle_t* handle, InputType* input, OutputType* output, + hg_bulk_t* bulk_handle) { + auto ret = respond(handle, output); + if(ret != HG_SUCCESS) + return ret; + return cleanup(handle, input, static_cast(nullptr), + bulk_handle); +} + +template +inline hg_return_t +cleanup_respond(hg_handle_t* handle, InputType* input, OutputType* output) { + return cleanup_respond(handle, input, output, nullptr); +} + +template +inline hg_return_t +cleanup_respond(hg_handle_t* handle, OutputType* output) { + auto ret = respond(handle, output); + if(ret != HG_SUCCESS) + return ret; + if(handle) { + ret = margo_destroy(*handle); + if(ret != HG_SUCCESS) + return ret; + } + return ret; +} + +} // namespace rpc +} // namespace gkfs + + +#endif // GEKKOFS_PROXY_RPC_UTIL_HPP diff --git a/include/proxy/util.hpp b/include/proxy/util.hpp new file mode 100644 index 000000000..02d1e8a39 --- /dev/null +++ b/include/proxy/util.hpp @@ -0,0 +1,44 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GEKKOFS_PROXY_UTIL_HPP +#define GEKKOFS_PROXY_UTIL_HPP + +#include +#include + +namespace gkfs { +namespace util { + +bool +is_proxy_already_running(); + +void +create_proxy_pid_file(); + +void +remove_proxy_pid_file(); + +bool +check_for_hosts_file(const std::string& hostfile); + +std::vector> +read_hosts_file(const std::string& hostfile); + +void +connect_to_hosts(const std::vector>& hosts); + +} // namespace util +} // namespace gkfs + +#endif // GEKKOFS_PROXY_UTIL_HPP diff --git a/marc/CMakeLists.txt b/marc/CMakeLists.txt new file mode 100644 index 000000000..1674ac495 --- /dev/null +++ b/marc/CMakeLists.txt @@ -0,0 +1,24 @@ +set(MARC_SRC + main.cpp +) +set(MARC_HEADERS +) + +add_executable(marc_test ${MARC_SRC} ${MARC_HEADERS}) + +target_link_libraries(marc_test + PUBLIC + # gkfs_intercept + # internal libs + # spdlog + # fmt::fmt + # others + # Threads::Threads + PRIVATE + # open issue for std::filesystem https://gitlab.kitware.com/cmake/cmake/-/issues/17834 + stdc++fs +) + +target_include_directories(marc_test + PRIVATE +) diff --git a/marc/main.cpp b/marc/main.cpp new file mode 100644 index 000000000..2da8f0c29 --- /dev/null +++ b/marc/main.cpp @@ -0,0 +1,45 @@ +#include +#include +#include +#include + +using namespace std; + +using ns = chrono::nanoseconds; +using get_time = chrono::steady_clock; + +int +main(int argc, char* argv[]) { + + auto filen = atoi(argv[1]); + + // cout << mkdir("/tmp/mountdir/bla", 0775) << endl; + // auto buf = "BUFFERINO2"; + // struct stat attr; + // cout << creat("/tmp/mountdir/creat.txt", 0666) << endl; + // cout << + // creat("/tmp/mountdir/#test-dir.0/mdtest_tree.0/file.mdtest.0000000.0000000005", + // 0666) << endl; cout << stat("/tmp/mountdir/creat.txt", &attr) << endl; + // cout << unlink("/tmp/mountdir/creat.txt") << endl; + + + auto start_t = get_time::now(); + int fd; + for(int i = 0; i < filen; ++i) { + string p = "/dev/shm/vef_gkfs_mountdir/file" + to_string(i); + fd = creat(p.c_str(), 0666); + if(i % 25000 == 0) + cout << i << " files processed." << endl; + close(fd); + } + + auto end_t = get_time::now(); + auto diff = end_t - start_t; + + auto diff_count = chrono::duration_cast(diff).count(); + + cout << diff_count << "ns\t" << (diff_count) / 1000000. << "ms" << endl; + cout << filen / ((diff_count) / 1000000000.) << " files per second" << endl; + + return 0; +} \ No newline at end of file diff --git a/scripts/run/gkfs b/scripts/run/gkfs index f097d81fc..8e93a3b86 100755 --- a/scripts/run/gkfs +++ b/scripts/run/gkfs @@ -1,4 +1,20 @@ #!/bin/bash +# colors and shell styles +# check for color support and set variables +if [ "$(tput colors)" -gt 2 ]; then + C_GREEN='\033[0;32m' # green + C_BYELLOW='\033[1;33m' # Bold yellow + C_BRED='\033[1;31m' # Bold red + C_NONE='\033[0m' # No color +else + C_GREEN='' + C_BYELLOW='' + C_BRED='' + C_NONE='' +fi +C_AST_GREEN="${C_GREEN}*${C_NONE} [gkfs] " +C_AST_YELLOW="${C_BYELLOW}*${C_NONE} [gkfs] " +C_AST_RED="${C_BRED}*${C_NONE} [gkfs] " ####################################### # Poll GekkoFS hostsfile until all daemons are started. # Exits with 1 if daemons cannot be started. @@ -23,7 +39,7 @@ wait_for_gkfs_daemons() { sleep 2 server_wait_cnt=$((server_wait_cnt+1)) if [ ${server_wait_cnt} -gt 600 ]; then - echo "Server failed to start. Exiting ..." + echo -e "${C_AST_RED}Server failed to start. Exiting ..." exit 1 fi done @@ -32,7 +48,7 @@ wait_for_gkfs_daemons() { # Creates a pid file for a given pid. If pid file exists, we check if its pids are still valid. # If valid, an additional line is added. Otherwise, the pid in the file is deleted. # Globals: -# DAEMON_PID_FILE +# SRUN_DAEMON_PID_FILE # VERBOSE # Arguments: # pid to write to pid file @@ -40,14 +56,14 @@ wait_for_gkfs_daemons() { # Writes status to stdout if VERBOSE is true ####################################### create_pid_file() { - local pid_file=${DAEMON_PID_FILE} + local pid_file=${SRUN_DAEMON_PID_FILE} local pid=${1} if [[ ${VERBOSE} == true ]]; then - echo "Creating pid file at ${pid_file} with pid ${pid} ..." + echo -e "${C_AST_GREEN}Creating pid file at ${pid_file} with pid ${pid} ..." fi # if PID file exists another daemon could run if [[ -e ${pid_file} ]]; then - local pid_file_tmp=${DAEMON_PID_FILE}.swp + local pid_file_tmp=${SRUN_DAEMON_PID_FILE}.swp # create empty tmp file truncate -s 0 "${pid_file_tmp}" while IFS= read -r line @@ -69,10 +85,13 @@ create_pid_file() { # NODE_NUM # MOUNTDIR # ROOTDIR -# ARGS +# DAEMON_ARGS_ +# PROXY_ARGS_ # CPUS_PER_TASK # VERBOSE -# USE_NUMACTL +# DAEMON_NUMACTL_ +# PROXY_NUMACTL_ +# USE_PROXY # DAEMON_CPUNODEBIND # DAEMON_MEMBIND # GKFS_DAEMON_LOG_PATH @@ -85,6 +104,7 @@ start_daemon() { local node_list local srun_cmd local daemon_execute + local proxy_execute # setup if [[ ${USE_SRUN} == true ]]; then node_list=$(scontrol show job "${SLURM_JOB_ID}" | grep " NodeList=" | cut -d "=" -f2) @@ -93,53 +113,106 @@ start_daemon() { fi # Setting up base srun cmd srun_cmd="srun --disable-status -N ${NODE_NUM} --ntasks=${NODE_NUM} --cpus-per-task=${CPUS_PER_TASK} ${SRUN_ARGS} " + else + NODE_NUM=1 fi if [[ ${VERBOSE} == true ]]; then - echo "### mountdir: ${MOUNTDIR}" - echo "### rootdir: ${ROOTDIR}" - echo "### node_num: ${NODE_NUM}" - echo "### additional daemon args: ${ARGS}" - echo "### cpus_per_task: ${CPUS_PER_TASK}" + echo -e "${C_AST_GREEN}mountdir: ${MOUNTDIR}" + echo -e "${C_AST_GREEN}rootdir: ${ROOTDIR}" + echo -e "${C_AST_GREEN}node_num: ${NODE_NUM}" + echo -e "${C_AST_GREEN}additional daemon args: ${DAEMON_ARGS_}" + echo -e "${C_AST_GREEN}cpus_per_task: ${CPUS_PER_TASK}" + [[ ${USE_PROXY} == true ]] && echo -e "${C_AST_GREEN}Proxy enabled" fi if [[ ${VERBOSE} == true ]]; then - echo "# Cleaning host file ..." + echo -e "${C_AST_GREEN}Cleaning host file ..." fi rm "${HOSTSFILE}" 2> /dev/null # Setting up base daemon cmd - local daemon_cmd="${DAEMON_BIN} -r ${ROOTDIR} -m ${MOUNTDIR} -H ${HOSTSFILE} ${ARGS}" + local daemon_cmd="${DAEMON_BIN} -r ${ROOTDIR} -m ${MOUNTDIR} -H ${HOSTSFILE} ${DAEMON_ARGS_}" + if [[ ${USE_PROXY} == true ]]; then + daemon_cmd="${daemon_cmd} ${DAEMON_PROXY_ARGS}" + fi # Setting up numactl - if [[ ${USE_NUMACTL} == true ]]; then + if [[ ${DAEMON_NUMACTL_} == true ]]; then daemon_cmd="numactl --cpunodebind=${DAEMON_CPUNODEBIND} --membind=${DAEMON_MEMBIND} ${daemon_cmd}" fi # final daemon execute command daemon_execute="${srun_cmd}${daemon_cmd}" + # Setting up base proxy command + if [[ ${USE_PROXY} == true ]]; then + local proxy_cmd="${PROXY_BIN} -H ${HOSTSFILE} --pid-path ${PROXY_LOCAL_PID_FILE} ${PROXY_ARGS_}" + # Setting up numactl + if [[ ${PROXY_NUMACTL_} == true ]]; then + proxy_cmd="numactl --cpunodebind=${PROXY_CPUNODEBIND} --membind=${PROXY_MEMBIND} ${proxy_cmd}" + fi + # final proxy execute command + proxy_execute="${srun_cmd}${proxy_cmd}" + fi + if [[ ${VERBOSE} == true ]]; then - echo "### Full execute DAEMON command:" - echo "##### $daemon_execute" + echo -e "${C_AST_GREEN}Full execute DAEMON command:" + echo -e "${C_AST_GREEN}# $daemon_execute" + [[ ${USE_PROXY} == true ]] && echo -e "${C_AST_GREEN}Full execute PROXY command:" + [[ ${USE_PROXY} == true ]] && echo -e "${C_AST_GREEN}# $proxy_execute" fi # setup environment variables export GKFS_DAEMON_LOG_PATH=$GKFS_DAEMON_LOG_PATH export GKFS_DAEMON_LOG_LEVEL=$GKFS_DAEMON_LOG_LEVEL + export GKFS_PROXY_LOG_PATH=$GKFS_PROXY_LOG_PATH + export GKFS_PROXY_LOG_LEVEL=$GKFS_PROXY_LOG_LEVEL - echo "Starting daemons ..." + echo -e "${C_AST_GREEN}Starting GekkoFS daemons (${NODE_NUM} nodes) ..." + start_time="$(date -u +%s.%3N)" ${daemon_execute} & local daemon_pid=$! wait_for_gkfs_daemons - echo "Running ..." + stop_time="$(date -u +%s.%3N)" + elapsed="$(bc <<<"$stop_time-$start_time")" + echo -e "${C_AST_GREEN}GekkoFS daemons running" + echo -e "${C_AST_GREEN}Startup time: ${elapsed} seconds" + + #if [[ ${USE_PROXY} == true ]]; then + # echo -e "${C_AST_GREEN}Starting GekkoFS proxies (${NODE_NUM} nodes) ..." + # start_time="$(date -u +%s.%3N)" + # ${proxy_execute} & + # local proxy_pid=$! + # sleep 5 # TODO + # stop_time="$(date -u +%s.%3N)" + # elapsed="$(bc <<<"$stop_time-$start_time")" + # echo -e "${C_AST_GREEN}GekkoFS daemons probably :) running" + # echo -e "${C_AST_GREEN}Startup time: ${elapsed} seconds" + #fi if [[ ${RUN_FOREGROUND} == true ]]; then echo "Press 'q' to exit" while : ; do read -n 1 k <&1 if [[ $k = q ]] ; then + #if [[ ${USE_PROXY} == true ]]; then + # start_time="$(date -u +%s.%3N)" + # echo + # echo -e "${C_AST_GREEN}Shutting down GekkoFS proxies ..." + # if [[ -n ${proxy_pid} ]]; then + # kill -s SIGINT ${proxy_pid} & + # wait ${proxy_pid} + # fi + # stop_time="$(date -u +%s.%3N)" + # elapsed="$(bc <<<"$stop_time-$start_time")" + # echo -e "${C_AST_GREEN}Shutdown time: ${elapsed} seconds" + #fi + start_time="$(date -u +%s.%3N)" echo - echo "Shutting down ..." + echo -e "${C_AST_GREEN}Shutting down GekkoFS daemons ..." if [[ -n ${daemon_pid} ]]; then kill -s SIGINT ${daemon_pid} & wait ${daemon_pid} fi + stop_time="$(date -u +%s.%3N)" + elapsed="$(bc <<<"$stop_time-$start_time")" + echo -e "${C_AST_GREEN}Shutdown time: ${elapsed} seconds" break else echo "Press 'q' to exit" @@ -147,36 +220,39 @@ start_daemon() { done else create_pid_file ${daemon_pid} +# create_pid_file ${proxy_pid} fi } ####################################### # Stops GekkoFS daemons for the configured pid file # Globals: -# DAEMON_PID_FILE +# SRUN_DAEMON_PID_FILE # VERBOSE # Outputs: # Writes status to stdout ####################################### stop_daemons() { - local pid_file=${DAEMON_PID_FILE} + local pid_file=${SRUN_DAEMON_PID_FILE} if [[ -e ${pid_file} ]]; then while IFS= read -r line do if ps -p "${line}" > /dev/null; then - if [[ ${VERBOSE} == true ]]; then - echo "Stopping daemon with pid ${line}" - fi + echo -e "${C_AST_GREEN}Stopping daemon with pid ${line}" + start_time="$(date -u +%s.%3N)" kill -s SIGINT "${line}" & # poll pid until it stopped if [[ ${VERBOSE} == true ]]; then - echo "Waiting for daemons to exit ..." + echo -e "${C_AST_GREEN}Waiting for daemons to exit ..." fi timeout 1 tail --pid=${line} -f /dev/null fi done < "${pid_file}" rm "${pid_file}" + stop_time="$(date -u +%s.%3N)" + elapsed="$(bc <<<"$stop_time-$start_time")" + echo -e "${C_AST_GREEN}Shutdown time: ${elapsed} seconds" else - echo "No pid file found -> no daemon running. Exiting ..." + echo -e "${C_AST_RED}No pid file found -> no daemon running. Exiting ..." fi } ####################################### @@ -186,8 +262,8 @@ stop_daemons() { ####################################### usage_short() { echo " -usage: gkfs [-h/--help] [-r/--rootdir ] [-m/--mountdir ] [-a/--args ] [-f/--foreground ] - [--srun ] [-n/--numnodes ] [--cpuspertask <64>] [--numactl ] [-v/--verbose ] +usage: gkfs [-h/--help] [-r/--rootdir ] [-m/--mountdir ] [-a/--args ] [--proxy ] [-f/--foreground ] + [--srun ] [-n/--numnodes ] [--cpuspertask <64>] [--daemon_numactl ] [--proxy_numactl ] [-v/--verbose ] {start,stop} " } @@ -210,14 +286,17 @@ help_msg() { -h, --help Shows this help message and exits -r, --rootdir The rootdir path for GekkoFS daemons. -m, --mountdir The mountdir path for GekkoFS daemons. - -a, --args + -d, --daemon_args + --proxy Start proxy after the daemons are running. Add various additional daemon arguments, e.g., \"-l ib0 -P ofi+psm2\". + -p, --proxy_args -f, --foreground Starts the script in the foreground. Daemons are stopped by pressing 'q'. --srun Use srun to start daemons on multiple nodes. -n, --numnodes GekkoFS daemons are started on n nodes. Nodelist is extracted from Slurm via the SLURM_JOB_ID env variable. --cpuspertask <#cores> Set the number of cores the daemons can use. Must use '--srun'. - --numactl Use numactl for the daemon. Modify gkfs.conf for further numactl configurations. + --daemon_numactl Use numactl for the daemon. Modify gkfs.conf for further numactl configurations. + --proxy_numactl Use numactl for the proxy. Modify gkfs.conf for further numactl configurations. -c, --config Path to configuration file. By defaults looks for a 'gkfs.conf' in this directory. -v, --verbose Increase verbosity " @@ -241,22 +320,25 @@ if [[ -z ${CONFIGPATH} ]]; then CONFIGPATH="${SCRIPTDIR}/gkfs.conf" fi if [[ ! -f ${CONFIGPATH} ]]; then - >&2 echo ">> No config file found at '${CONFIGPATH}'." + >&2 echo -e "${C_AST_RED}>> No config file found at '${CONFIGPATH}'." exit 1 fi # get variables from CONFIGPATH source "$CONFIGPATH" # more global variables which may be overwritten by user input VERBOSE=false -NODE_NUM=1 +NODE_NUM="" MOUNTDIR=${DAEMON_MOUNTDIR} ROOTDIR=${DAEMON_ROOTDIR} HOSTSFILE=${LIBGKFS_HOSTS_FILE} CPUS_PER_TASK=$(grep -c ^processor /proc/cpuinfo) -ARGS=${DAEMON_ARGS} +DAEMON_ARGS_=${DAEMON_ARGS} +PROXY_ARGS_=${PROXY_ARGS} USE_SRUN=${USE_SRUN} RUN_FOREGROUND=false -USE_NUMACTL=${DAEMON_NUMACTL} +DAEMON_NUMACTL_=${DAEMON_NUMACTL} +PROXY_NUMACTL_=${PROXY_NUMACTL} +USE_PROXY=false # parse input POSITIONAL=() while [[ $# -gt 0 ]]; do @@ -278,11 +360,20 @@ while [[ $# -gt 0 ]]; do shift # past argument shift # past value ;; - -a | --args) - ARGS="${ARGS} $2" + -d | --daemon_args) + DAEMON_ARGS_="${DAEMON_ARGS_} $2" shift # past argument shift # past value ;; + -p | --proxy_args) + PROXY_ARGS_="${PROXY_ARGS_} $2" + shift # past argument + shift # past value + ;; + --proxy) + USE_PROXY=true + shift # past argument + ;; --srun) USE_SRUN=true shift # past argument @@ -291,8 +382,12 @@ while [[ $# -gt 0 ]]; do RUN_FOREGROUND=true shift # past argument ;; - --numactl) - USE_NUMACTL=true + --daemon_numactl) + DAEMON_NUMACTL_=true + shift # past argument + ;; + --proxy_numactl) + PROXY_NUMACTL_=true shift # past argument ;; --cpuspertask) @@ -323,14 +418,14 @@ set -- "${POSITIONAL[@]}" # restore positional parameters # positional arguments if [[ -z ${1+x} ]]; then - echo "ERROR: Positional arguments missing." + echo -e "${C_AST_RED}ERROR: Positional arguments missing." usage_short exit 1 fi command="${1}" # checking input if [[ ${command} != *"start"* ]] && [[ ${command} != *"stop"* ]]; then - echo "ERROR: command ${command} not supported" + echo -e "${C_AST_RED}ERROR: command ${command} not supported" usage_short exit 1 fi @@ -341,5 +436,5 @@ elif [[ ${command} == "stop" ]]; then stop_daemons fi if [[ ${VERBOSE} == true ]]; then - echo "Nothing left to do. Exiting :)" -fi \ No newline at end of file + echo -e "${C_AST_GREEN}Nothing left to do. Exiting :)" +fi diff --git a/scripts/run/gkfs_io500_proxy.conf b/scripts/run/gkfs_io500_proxy.conf new file mode 100644 index 000000000..2b636463b --- /dev/null +++ b/scripts/run/gkfs_io500_proxy.conf @@ -0,0 +1,44 @@ +#!/bin/bash + +# binaries (default for project_dir/build +PRELOAD_LIB=/lustre/miifs01/project/m2_zdvresearch/vef/io500/lib/libgkfs_intercept.so +DAEMON_BIN=/lustre/miifs01/project/m2_zdvresearch/vef/io500/bin/gkfs_daemon +PROXY_BIN=/lustre/miifs01/project/m2_zdvresearch/vef/io500/bin/gkfs_proxy + +## client configuration +LIBGKFS_HOSTS_FILE=/lustre/miifs01/project/m2_zdvresearch/vef/io500/run/gkfs_hostfile + +## daemon configuration +#DAEMON_ROOTDIR=/dev/shm/vef_gkfs_rootdir +DAEMON_ROOTDIR=/localscratch/${SLURM_JOB_ID}/vef_gkfs_rootdir +DAEMON_MOUNTDIR=/dev/shm/vef_gkfs_mountdir +# additional daemon arguments (see `gkfs_daemon -h`) +# use numactl to pin daemon to socket +DAEMON_ARGS="-l ib0 -c" +DAEMON_NUMACTL=true +DAEMON_CPUNODEBIND="1" +DAEMON_MEMBIND="1" + +## proxy configuration +DAEMON_PROXY_ARGS="--proxy-listen ib0 --proxy-protocol ofi+sockets" +PROXY_LOCAL_PID_FILE=/dev/shm/vef_gkfs_proxy.pid +PROXY_ARGS="" +PROXY_NUMACTL=true +PROXY_CPUNODEBIND="0" +PROXY_MEMBIND="0" + +## slurm configuration +# Use Slurm's srun to start the daemons on multiple nodes and set specific srun args +USE_SRUN=true +SRUN_ARGS="--ntasks-per-node=1 --overcommit --contiguous --oversubscribe --mem=0" +# path to daemon pid file; created where the script is run +SRUN_DAEMON_PID_FILE=/lustre/miifs01/project/m2_zdvresearch/vef/io500/run/gkfs_daemon.pid +SRUN_PROXY_PID_FILE=/lustre/miifs01/project/m2_zdvresearch/vef/io500/run/gkfs_proxy.pid # TODO + +# logging configuration +GKFS_DAEMON_LOG_LEVEL=info +GKFS_DAEMON_LOG_PATH=/dev/shm/vef_gkfs_daemon.log +GKFS_PROXY_LOG_LEVEL=info +GKFS_PROXY_LOG_PATH=/dev/shm/vef_gkfs_proxy.log +LIBGKFS_LOG=errors,warnings +LIBGKFS_LOG_OUTPUT=/dev/shm/vef_gkfs_client.log \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 56654c8da..2aef9f5c5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -32,4 +32,6 @@ add_subdirectory(common) add_subdirectory(daemon) # Client library add_subdirectory(client) +# Proxy +add_subdirectory(proxy) diff --git a/src/client/CMakeLists.txt b/src/client/CMakeLists.txt index 780899a18..d2415233d 100644 --- a/src/client/CMakeLists.txt +++ b/src/client/CMakeLists.txt @@ -49,8 +49,10 @@ target_sources(gkfs_intercept preload_util.cpp rpc/rpc_types.cpp rpc/forward_data.cpp + rpc/forward_data_proxy.cpp rpc/forward_management.cpp rpc/forward_metadata.cpp + rpc/forward_metadata_proxy.cpp syscalls/detail/syscall_info.c) target_sources( @@ -67,8 +69,10 @@ target_sources( preload_util.cpp rpc/rpc_types.cpp rpc/forward_data.cpp + rpc/forward_data_proxy.cpp rpc/forward_management.cpp rpc/forward_metadata.cpp + rpc/forward_metadata_proxy.cpp syscalls/detail/syscall_info.c syscalls/util.S ) diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index e08745136..0a4c14264 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -33,7 +33,9 @@ #include #include #include +#include #include +#include #include #include @@ -299,24 +301,33 @@ gkfs_create(const std::string& path, mode_t mode) { return -1; } - if(check_parent_dir(path)) { - return -1; - } - // Write to all replicas, at least one need to success - bool success = false; - for(auto copy = 0; copy < CTX->get_replicas() + 1; copy++) { - auto err = gkfs::rpc::forward_create(path, mode, copy); + // if(check_parent_dir(path)) { + // return -1; + // } + int err = 0; + if(gkfs::config::proxy::fwd_create && CTX->use_proxy()) { + // no replication support for proxy + err = gkfs::rpc::forward_create_proxy(path, mode); if(err) { errno = err; - } else { - success = true; - errno = 0; + return -1; + } + } else { + // Write to all replicas, at least one need to success + bool success = false; + for(auto copy = 0; copy < CTX->get_replicas() + 1; copy++) { + err = gkfs::rpc::forward_create(path, mode, copy); + if(err) { + errno = err; + } else { + success = true; + errno = 0; + } + } + if(!success) { + return -1; } } - if(!success) { - return -1; - } - return 0; } @@ -363,8 +374,12 @@ gkfs_remove(const std::string& path) { } #endif // HAS_RENAME #endif // HAS_SYMLINKS - - auto err = gkfs::rpc::forward_remove(path, CTX->get_replicas()); + int err = 0; + if(gkfs::config::proxy::fwd_remove && CTX->use_proxy()) { + err = gkfs::rpc::forward_remove_proxy(path); + } else { + err = gkfs::rpc::forward_remove(path, CTX->get_replicas()); + } if(err) { errno = err; return -1; @@ -593,8 +608,12 @@ gkfs_statx(int dirfs, const std::string& path, int flags, unsigned int mask, */ int gkfs_statfs(struct statfs* buf) { - - auto ret = gkfs::rpc::forward_get_chunk_stat(); + pair ret; + if(gkfs::config::proxy::fwd_chunk_stat && CTX->use_proxy()) { + ret = gkfs::rpc::forward_get_chunk_stat_proxy(); + } else { + ret = gkfs::rpc::forward_get_chunk_stat(); + } auto err = ret.first; if(err) { LOG(ERROR, "{}() Failure with error: '{}'", err); @@ -887,8 +906,14 @@ gkfs_do_write(gkfs::filemap::OpenFile& file, const char* buf, size_t count, auto write_size = 0; auto num_replicas = CTX->get_replicas(); - auto ret_offset = gkfs::rpc::forward_update_metadentry_size( - *path, count, offset, is_append, num_replicas); + pair ret_offset; + if(gkfs::config::proxy::fwd_update_size && CTX->use_proxy()) { + ret_offset = gkfs::rpc::forward_update_metadentry_size_proxy( + *path, count, offset, is_append); + } else { + ret_offset = gkfs::rpc::forward_update_metadentry_size( + *path, count, offset, is_append, num_replicas); + } auto err = ret_offset.first; if(err) { LOG(ERROR, "update_metadentry_size() failed with err '{}'", err); @@ -910,7 +935,13 @@ gkfs_do_write(gkfs::filemap::OpenFile& file, const char* buf, size_t count, offset = ret_offset.second; } - auto ret_write = gkfs::rpc::forward_write(*path, buf, offset, count, 0); + pair ret_write; + if(gkfs::config::proxy::fwd_io && CTX->use_proxy() && + count > gkfs::config::proxy::fwd_io_count_threshold) { + ret_write = gkfs::rpc::forward_write_proxy(*path, buf, offset, count); + } else { + ret_write = gkfs::rpc::forward_write(*path, buf, offset, count, 0); + } err = ret_write.first; write_size = ret_write.second; @@ -1085,24 +1116,29 @@ gkfs_do_read(const gkfs::filemap::OpenFile& file, char* buf, size_t count, if constexpr(gkfs::config::io::zero_buffer_before_read) { memset(buf, 0, sizeof(char) * count); } - std::pair ret; - std::set failed; // set with failed targets. - if(CTX->get_replicas() != 0) { - ret = gkfs::rpc::forward_read(file.path(), buf, offset, count, - CTX->get_replicas(), failed); - while(ret.first == EIO) { + pair ret; + if(gkfs::config::proxy::fwd_io && CTX->use_proxy() && + count > gkfs::config::proxy::fwd_io_count_threshold) { + ret = gkfs::rpc::forward_read_proxy(file.path(), buf, offset, count); + } else { + std::set failed; // set with failed targets. + if(CTX->get_replicas() != 0) { + ret = gkfs::rpc::forward_read(file.path(), buf, offset, count, CTX->get_replicas(), failed); - LOG(WARNING, "gkfs::rpc::forward_read() failed with ret '{}'", - ret.first); - } + while(ret.first == EIO) { + ret = gkfs::rpc::forward_read(file.path(), buf, offset, count, + CTX->get_replicas(), failed); + LOG(WARNING, "gkfs::rpc::forward_read() failed with ret '{}'", + ret.first); + } - } else { - ret = gkfs::rpc::forward_read(file.path(), buf, offset, count, 0, - failed); + } else { + ret = gkfs::rpc::forward_read(file.path(), buf, offset, count, 0, + failed); + } } - auto err = ret.first; if(err) { LOG(WARNING, "gkfs::rpc::forward_read() failed with ret '{}'", err); @@ -1586,7 +1622,14 @@ extern "C" int gkfs_getsingleserverdir(const char* path, struct dirent_extended* dirp, unsigned int count, int server) { - auto ret = gkfs::rpc::forward_get_dirents_single(path, server); + pair, bool, size_t, time_t>>> + ret{}; + if(gkfs::config::proxy::fwd_get_dirents_single && CTX->use_proxy()) { + ret = gkfs::rpc::forward_get_dirents_single_proxy(path, server); + } else { + ret = gkfs::rpc::forward_get_dirents_single(path, server); + } + auto err = ret.first; if(err) { errno = err; diff --git a/src/client/hooks.cpp b/src/client/hooks.cpp index 46b043678..1c3c3390b 100644 --- a/src/client/hooks.cpp +++ b/src/client/hooks.cpp @@ -983,6 +983,20 @@ hook_getxattr(const char* path, const char* name, void* value, size_t size) { return syscall_no_intercept_wrapper(SYS_getxattr, path, name, value, size); } +int +hook_lgetxattr(const char* path, const char* name, void* value, size_t size) { + + LOG(DEBUG, "{}() called with path '{}' name '{}' value '{}' size '{}'", + __func__, path, name, fmt::ptr(value), size); + + std::string rel_path; + if(CTX->relativize_path(path, rel_path)) { + return -ENOTSUP; + } + return syscall_no_intercept_wrapper(SYS_lgetxattr, path, name, value, + size); +} + int hook_fallocate(int fd, int mode, off_t offset, off_t len) { diff --git a/src/client/intercept.cpp b/src/client/intercept.cpp index 87792bfbd..ba35eb080 100644 --- a/src/client/intercept.cpp +++ b/src/client/intercept.cpp @@ -785,6 +785,13 @@ hook(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, reinterpret_cast(arg2), static_cast(arg4)); break; + case SYS_lgetxattr: + *result = gkfs::hook::hook_lgetxattr( + reinterpret_cast(arg0), + reinterpret_cast(arg1), + reinterpret_cast(arg2), static_cast(arg4)); + break; + case SYS_fallocate: *result = gkfs::hook::hook_fallocate( static_cast(arg0), static_cast(arg1), diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 6865887ec..32a52ad56 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -50,6 +50,7 @@ using namespace std; std::unique_ptr ld_network_service; // extern variable +std::unique_ptr ld_proxy_service; // extern variable namespace { @@ -104,6 +105,21 @@ init_hermes_client() { ex.what()); return false; } + if(CTX->use_proxy()) { + try { + LOG(INFO, "Initializing IPC proxy subsystem..."); + hermes::engine_options opts{}; + ld_proxy_service = std::make_unique( + hermes::get_transport_type("na+sm"), opts, "", false, 1); + ld_proxy_service->run(); + } catch(const std::exception& ex) { + fmt::print(stderr, + "Failed to initialize Hermes IPC client for proxy {}\n", + ex.what()); + return false; + } + } + return true; } @@ -191,6 +207,9 @@ init_environment() { "Failed to load hosts addresses: "s + e.what()); } + LOG(INFO, "Checking for GKFS Proxy"); + gkfs::utils::check_for_proxy(); + // initialize Hermes interface to Mercury LOG(INFO, "Initializing RPC subsystem..."); @@ -200,6 +219,10 @@ init_environment() { try { gkfs::utils::connect_to_hosts(hosts); + if(CTX->use_proxy()) { + LOG(INFO, "Connecting to proxy..."); + gkfs::utils::lookup_proxy_addr(); + } } catch(const std::exception& e) { exit_error_msg(EXIT_FAILURE, "Failed to connect to hosts: "s + e.what()); @@ -327,6 +350,12 @@ destroy_preload() { CTX->clear_hosts(); LOG(DEBUG, "Peer information deleted"); + if(CTX->use_proxy()) { + CTX->clear_proxy_host(); + LOG(DEBUG, "Shutting down IPC subsystem"); + ld_proxy_service.reset(); + } + LOG(DEBUG, "Shutting down RPC subsystem"); ld_network_service.reset(); LOG(DEBUG, "RPC subsystem shut down"); diff --git a/src/client/preload_context.cpp b/src/client/preload_context.cpp index bbf9fab0b..701530959 100644 --- a/src/client/preload_context.cpp +++ b/src/client/preload_context.cpp @@ -350,6 +350,41 @@ PreloadContext::relativize_path(const char* raw_path, return gkfs::path::resolve(path, relative_path, resolve_last_link); } +const std::string& +PreloadContext::proxy_address_str() const { + return proxy_address_str_; +} + +void +PreloadContext::proxy_address_str(const std::string& proxy_address_str) { + proxy_address_str_ = proxy_address_str; +} + +const hermes::endpoint& +PreloadContext::proxy_host() const { + return proxy_host_; +} + +void +PreloadContext::proxy_host(const hermes::endpoint& proxy_host) { + proxy_host_ = proxy_host; +} + +void +PreloadContext::clear_proxy_host() { + proxy_host_ = {}; +} + +bool +PreloadContext::use_proxy() const { + return use_proxy_; +} + +void +PreloadContext::use_proxy(bool use_proxy) { + use_proxy_ = use_proxy; +} + const std::shared_ptr& PreloadContext::file_map() const { return ofm_; diff --git a/src/client/preload_util.cpp b/src/client/preload_util.cpp index b70ddf2d9..7c769b1cb 100644 --- a/src/client/preload_util.cpp +++ b/src/client/preload_util.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -61,7 +62,8 @@ namespace { * @throws std::runtime_error */ hermes::endpoint -lookup_endpoint(const std::string& uri, std::size_t max_retries = 3) { +lookup_endpoint(const std::string& uri, bool use_proxy = false, + std::size_t max_retries = 3) { LOG(DEBUG, "Looking up address \"{}\"", uri); @@ -71,7 +73,10 @@ lookup_endpoint(const std::string& uri, std::size_t max_retries = 3) { do { try { - return ld_network_service->lookup(uri); + if(use_proxy) + return ld_proxy_service->lookup(uri); + else + return ld_network_service->lookup(uri); } catch(const exception& ex) { error_msg = ex.what(); @@ -149,7 +154,7 @@ load_hostfile(const std::string& path) { path, strerror(errno))); } vector> hosts; - const regex line_re("^(\\S+)\\s+(\\S+)$", + const regex line_re("^(\\S+)\\s+(\\S+)\\s*(\\S*)$", regex::ECMAScript | regex::optimize); string line; string host; @@ -199,26 +204,35 @@ namespace gkfs::utils { optional get_metadata(const string& path, bool follow_links) { std::string attr; - auto err = gkfs::rpc::forward_stat(path, attr, 0); - // TODO: retry on failure - - if(err) { - auto copy = 1; - while(copy < CTX->get_replicas() + 1 && err) { - LOG(ERROR, "Retrying Stat on replica {} {}", copy, follow_links); - err = gkfs::rpc::forward_stat(path, attr, copy); - copy++; - } + int err{}; + if(gkfs::config::proxy::fwd_stat && CTX->use_proxy()) { + err = gkfs::rpc::forward_stat_proxy(path, attr); + } else { + err = gkfs::rpc::forward_stat(path, attr, 0); + // TODO: retry on failure if(err) { - errno = err; - return {}; + auto copy = 1; + while(copy < CTX->get_replicas() + 1 && err) { + LOG(ERROR, "Retrying Stat on replica {} {}", copy, + follow_links); + err = gkfs::rpc::forward_stat(path, attr, copy); + copy++; + } } } + if(err) { + errno = err; + return {}; + } #ifdef HAS_SYMLINKS if(follow_links) { gkfs::metadata::Metadata md{attr}; while(md.is_link()) { - err = gkfs::rpc::forward_stat(md.target_path(), attr, 0); + if(gkfs::config::proxy::fwd_stat && CTX->use_proxy()) { + err = gkfs::rpc::forward_stat_proxy(md.target_path(), attr); + } else { + err = gkfs::rpc::forward_stat(md.target_path(), attr, 0); + } if(err) { errno = err; return {}; @@ -429,4 +443,71 @@ connect_to_hosts(const vector>& hosts) { CTX->hosts(addrs); } +/** + * Looks for a proxy pid file. If it exists, we set address string in preload + * context. + */ +void +check_for_proxy() { + auto pid_path = gkfs::env::get_var(gkfs::env::PROXY_PID_FILE, + gkfs::config::proxy::pid_path); + ifstream ifs(pid_path, ::ifstream::in); + if(!ifs) { + LOG(INFO, "Proxy pid file NOT FOUND. Proxy will NOT be used!"); + return; + } + /* + * read two lines in pid file + * 1. line: process id (used to check for process existence) + * 2. line: na_sm address to connect to (which will be returned) + */ + if(ifs) { + // get PID + string running_pid; + if(getline(ifs, running_pid) && !running_pid.empty()) { + // check if process exists without killing it. Signal 0 doesn't + // kill + if(0 != ::kill(::stoi(running_pid), 0)) { + LOG(WARNING, + "Proxy pid file '{}' found but process with pid '{}' was not found. Will NOT use proxy", + pid_path, running_pid); + return; + } + } else { + LOG(WARNING, + "Proxy pid file '{}' first line is empty. Will NOT use proxy", + pid_path); + return; + } + // get proxy address + string proxy_address{}; + if(getline(ifs, proxy_address) && !proxy_address.empty()) { + CTX->proxy_address_str(proxy_address); + } else { + LOG(WARNING, + "Proxy pid file '{}' second line is empty. Will NOT use proxy", + pid_path); + return; + } + } else { + LOG(WARNING, + "Proxy pid file '{}' was found but cannot be opened. Will NOT use proxy.", + pid_path); + return; + } + LOG(INFO, "Proxy is enabled and will be used!"); + CTX->use_proxy(true); +} + +/** + * Lookup proxy address via hermes RPC client + * @throws runtime_error + */ +void +lookup_proxy_addr() { + auto addr = lookup_endpoint(CTX->proxy_address_str(), true); + LOG(DEBUG, "Found proxy peer: {}", addr.to_string()); + CTX->proxy_host(addr); +} + } // namespace gkfs::utils diff --git a/src/client/rpc/forward_data.cpp b/src/client/rpc/forward_data.cpp index 159280c74..27c78d2b6 100644 --- a/src/client/rpc/forward_data.cpp +++ b/src/client/rpc/forward_data.cpp @@ -64,6 +64,12 @@ pair forward_write(const string& path, const void* buf, const off64_t offset, const size_t write_size, const int8_t num_copies) { + if(gkfs::config::proxy::fwd_io && CTX->use_proxy()) { + LOG(WARNING, + "{} was called even though proxy should be used! Note: io threshold '{}' and rpc write size '{}'", + __func__, gkfs::config::proxy::fwd_io, write_size); + } + // import pow2-optimized arithmetic functions using namespace gkfs::utils::arithmetic; @@ -297,6 +303,12 @@ forward_read(const string& path, void* buf, const off64_t offset, const size_t read_size, const int8_t num_copies, std::set& failed) { + if(gkfs::config::proxy::fwd_io && CTX->use_proxy()) { + LOG(WARNING, + "{} was called even though proxy should be used! Note: io threshold '{}' and rpc read size '{}'", + __func__, gkfs::config::proxy::fwd_io, read_size); + } + // import pow2-optimized arithmetic functions using namespace gkfs::utils::arithmetic; @@ -504,6 +516,11 @@ int forward_truncate(const std::string& path, size_t current_size, size_t new_size, const int8_t num_copies) { + if(CTX->use_proxy()) { + LOG(WARNING, "{} is run due to missing proxy implementation!", + __func__); + } + // import pow2-optimized arithmetic functions using namespace gkfs::utils::arithmetic; @@ -583,6 +600,11 @@ forward_truncate(const std::string& path, size_t current_size, size_t new_size, pair forward_get_chunk_stat() { + if(gkfs::config::proxy::fwd_chunk_stat && CTX->use_proxy()) { + LOG(WARNING, "{} was called even though proxy should be used!", + __func__); + } + std::vector> handles; auto err = 0; @@ -624,7 +646,7 @@ forward_get_chunk_stat() { try { // XXX We might need a timeout here to not wait forever for an - // output that never comes? + // output that never comes? Yep. Seems to be an issue. out = handles[i].get().at(0); if(out.err()) { diff --git a/src/client/rpc/forward_data_proxy.cpp b/src/client/rpc/forward_data_proxy.cpp new file mode 100644 index 000000000..fc25df982 --- /dev/null +++ b/src/client/rpc/forward_data_proxy.cpp @@ -0,0 +1,197 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#include +#include +#include +#include + +#include +#include + +#include + +using namespace std; + +namespace gkfs::rpc { + +/** + * Actual chunking logic on proxy handler + * @param path + * @param buf + * @param append_flag + * @param offset + * @param write_size + * @param updated_metadentry_size + * @return + */ +pair +forward_write_proxy(const string& path, const void* buf, off64_t offset, + size_t write_size) { + LOG(DEBUG, "Using write proxy for path '{}' offset '{}' size '{}' ...", + path, offset, write_size); + // TODO mostly copy pasta from forward_data. + assert(write_size > 0); + + // Calculate chunkid boundaries and numbers so that daemons know in + // which interval to look for chunks + + // some helper variables for async RPC + std::vector bufseq{ + hermes::mutable_buffer{const_cast(buf), write_size}, + }; + + // expose user buffers so that they can serve as RDMA data sources + // (these are automatically "unexposed" when the destructor is called) + hermes::exposed_memory local_buffers; + + try { + local_buffers = ld_proxy_service->expose( + bufseq, hermes::access_mode::read_only); + + } catch(const std::exception& ex) { + LOG(ERROR, "Failed to expose buffers for RMA"); + return make_pair(EBUSY, 0); + } + + auto endp = CTX->proxy_host(); + auto err = 0; + ssize_t out_size = 0; + try { + LOG(DEBUG, "Sending RPC ..."); + + gkfs::rpc::write_data_proxy::input in(path, offset, write_size, + local_buffers); + LOG(DEBUG, "proxy-host: {}, path: '{}', size: {}, offset: {}", endp.to_string(), + path, in.write_size(), in.offset()); + + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that + // we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = ld_proxy_service->post(endp, in) + .get() + .at(0); + + if(out.err()) { + LOG(ERROR, "Daemon reported error: {}", out.err()); + err = out.err(); + } + out_size = out.io_size(); + + } catch(const std::exception& ex) { + LOG(ERROR, "While RPC send or getting RPC output. Err: '{}'", + ex.what()); + err = EBUSY; + } + if(err) + return make_pair(err, 0); + else + return make_pair(0, out_size); +} + +pair +forward_read_proxy(const string& path, void* buf, const off64_t offset, + const size_t read_size) { + LOG(DEBUG, "Using read proxy for path '{}' offset '{}' size '{}' ...", path, + offset, read_size); + + // some helper variables for async RPCs + std::vector bufseq{ + hermes::mutable_buffer{buf, read_size}, + }; + + // expose user buffers so that they can serve as RDMA data targets + // (these are automatically "unexposed" when the destructor is called) + hermes::exposed_memory local_buffers; + + try { + local_buffers = ld_proxy_service->expose( + bufseq, hermes::access_mode::write_only); + + } catch(const std::exception& ex) { + LOG(ERROR, "Failed to expose buffers for RMA"); + errno = EBUSY; + return make_pair(EBUSY, 0); + } + + auto endp = CTX->proxy_host(); + auto err = 0; + ssize_t out_size = 0; + + try { + LOG(DEBUG, "Sending RPC ..."); + + gkfs::rpc::read_data_proxy::input in(path, offset, read_size, + local_buffers); + LOG(DEBUG, "proxy-host: {}, path: '{}', size: {}, offset: {}", endp.to_string(), + path, in.read_size(), in.offset()); + + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that + // we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = ld_proxy_service->post(endp, in) + .get() + .at(0); + + if(out.err()) { + LOG(ERROR, "Daemon reported error: {}", out.err()); + err = out.err(); + } + out_size = out.io_size(); + + } catch(const std::exception& ex) { + LOG(ERROR, "While RPC send or getting RPC output. Err: '{}'", + ex.what()); + err = EBUSY; + } + + if(err) + return make_pair(err, 0); + else + return make_pair(0, out_size); +} + +pair +forward_get_chunk_stat_proxy() { + auto endp = CTX->proxy_host(); + gkfs::rpc::chunk_stat_proxy::input in(0); + + try { + LOG(DEBUG, "Sending RPC ..."); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we + // can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + + auto out = ld_proxy_service->post(endp, in) + .get() + .at(0); + LOG(DEBUG, "Got response success: {}", out.err()); + + if(out.err()) + return make_pair(out.err(), ChunkStat{}); + else + return make_pair(0, ChunkStat{out.chunk_size(), out.chunk_total(), + out.chunk_free()}); + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); + return make_pair(EBUSY, ChunkStat{}); + } +} + +} // namespace gkfs::rpc \ No newline at end of file diff --git a/src/client/rpc/forward_metadata.cpp b/src/client/rpc/forward_metadata.cpp index 677e53c17..e1054740e 100644 --- a/src/client/rpc/forward_metadata.cpp +++ b/src/client/rpc/forward_metadata.cpp @@ -56,7 +56,10 @@ namespace gkfs::rpc { */ int forward_create(const std::string& path, const mode_t mode, const int copy) { - + if(gkfs::config::proxy::fwd_create && CTX->use_proxy()) { + LOG(WARNING, "{} was called even though proxy should be used!", + __func__); + } auto endp = CTX->hosts().at( CTX->distributor()->locate_file_metadata(path, copy)); @@ -88,7 +91,10 @@ forward_create(const std::string& path, const mode_t mode, const int copy) { */ int forward_stat(const std::string& path, string& attr, const int copy) { - + if(gkfs::config::proxy::fwd_stat && CTX->use_proxy()) { + LOG(WARNING, "{} was called even though proxy should be used!", + __func__); + } auto endp = CTX->hosts().at( CTX->distributor()->locate_file_metadata(path, copy)); @@ -130,6 +136,10 @@ forward_stat(const std::string& path, string& attr, const int copy) { */ int forward_remove(const std::string& path, const int8_t num_copies) { + if(gkfs::config::proxy::fwd_remove && CTX->use_proxy()) { + LOG(WARNING, "{} was called even though proxy should be used!", + __func__); + } int64_t size = 0; uint32_t mode = 0; @@ -283,7 +293,10 @@ forward_remove(const std::string& path, const int8_t num_copies) { */ int forward_decr_size(const std::string& path, size_t length, const int copy) { - + if(CTX->use_proxy()) { + LOG(WARNING, "{} is run due to missing proxy implementation!", + __func__); + } auto endp = CTX->hosts().at( CTX->distributor()->locate_file_metadata(path, copy)); @@ -515,7 +528,10 @@ pair forward_update_metadentry_size(const string& path, const size_t size, const off64_t offset, const bool append_flag, const int num_copies) { - + if(gkfs::config::proxy::fwd_update_size && CTX->use_proxy()) { + LOG(WARNING, "{} was called even though proxy should be used!", + __func__); + } std::vector> handles; for(auto copy = 0; copy < num_copies + 1; copy++) { @@ -579,7 +595,10 @@ forward_update_metadentry_size(const string& path, const size_t size, */ pair forward_get_metadentry_size(const std::string& path, const int copy) { - + if(CTX->use_proxy()) { + LOG(WARNING, "{} is run due to missing proxy implementation!", + __func__); + } auto endp = CTX->hosts().at( CTX->distributor()->locate_file_metadata(path, copy)); @@ -615,6 +634,11 @@ forward_get_metadentry_size(const std::string& path, const int copy) { pair> forward_get_dirents(const string& path) { + if(CTX->use_proxy()) { + LOG(WARNING, "{} is run due to missing proxy implementation!", + __func__); + } + LOG(DEBUG, "{}() enter for path '{}'", __func__, path) auto const targets = CTX->distributor()->locate_directory_metadata(path); @@ -761,6 +785,11 @@ forward_get_dirents(const string& path) { pair>> forward_get_dirents_single(const string& path, int server) { + if(gkfs::config::proxy::fwd_get_dirents_single && CTX->use_proxy()) { + LOG(WARNING, "{} was called even though proxy should be used!", + __func__); + } + LOG(DEBUG, "{}() enter for path '{}'", __func__, path) auto const targets = CTX->distributor()->locate_directory_metadata(path); diff --git a/src/client/rpc/forward_metadata_proxy.cpp b/src/client/rpc/forward_metadata_proxy.cpp new file mode 100644 index 000000000..2271ad943 --- /dev/null +++ b/src/client/rpc/forward_metadata_proxy.cpp @@ -0,0 +1,249 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#include +#include +#include +#include + +#include + +using namespace std; + +namespace gkfs { +namespace rpc { + +int +forward_create_proxy(const std::string& path, const mode_t mode) { + auto endp = CTX->proxy_host(); + + try { + LOG(DEBUG, "Sending RPC ..."); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we + // can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = ld_proxy_service + ->post(endp, path, mode) + .get() + .at(0); + LOG(DEBUG, "Got response success: {}", out.err()); + + return out.err() ? out.err() : 0; + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); + return EBUSY; + } +} + +int +forward_stat_proxy(const std::string& path, string& attr) { + + auto endp = CTX->proxy_host(); + + try { + LOG(DEBUG, "Sending RPC ..."); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we + // can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = ld_proxy_service->post(endp, path) + .get() + .at(0); + LOG(DEBUG, "Got response success: {}", out.err()); + + if(out.err()) + return out.err(); + + attr = out.db_val(); + return 0; + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); + return EBUSY; + } +} + +int +forward_remove_proxy(const std::string& path) { + auto endp = CTX->proxy_host(); + + try { + LOG(DEBUG, "Sending RPC ..."); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we + // can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = ld_proxy_service->post(endp, path) + .get() + .at(0); + LOG(DEBUG, "Got response success: {}", out.err()); + + return out.err() ? out.err() : 0; + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); + return EBUSY; + } +} + +pair +forward_update_metadentry_size_proxy(const string& path, const size_t size, + const off64_t offset, + const bool append_flag) { + + auto endp = CTX->proxy_host(); + try { + LOG(DEBUG, "Sending update size proxy RPC ..."); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we + // can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = ld_proxy_service + ->post( + endp, path, size, offset, + bool_to_merc_bool(append_flag)) + .get() + .at(0); + + LOG(DEBUG, "Got response success: {}", out.err()); + + if(out.err()) + return make_pair(out.err(), 0); + else + return make_pair(0, out.ret_size()); + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); + return make_pair(EBUSY, 0); + } +} + +pair>> +forward_get_dirents_single_proxy(const string& path, int server) { + + LOG(DEBUG, "{}() enter for path '{}'", __func__, path) + auto endp = CTX->proxy_host(); + + /* preallocate receiving buffer. The actual size is not known yet. + * + * On C++14 make_unique function also zeroes the newly allocated buffer. + * It turns out that this operation is increadibly slow for such a big + * buffer. Moreover we don't need a zeroed buffer here. + */ + auto large_buffer = std::unique_ptr( + new char[gkfs::config::rpc::dirents_buff_size]); + + // We use the full size per server... + const std::size_t per_host_buff_size = gkfs::config::rpc::dirents_buff_size; + vector> output; + + // expose local buffers for RMA from servers + std::vector exposed_buffers; + exposed_buffers.reserve(1); + try { + exposed_buffers.emplace_back(ld_proxy_service->expose( + std::vector{hermes::mutable_buffer{ + large_buffer.get(), per_host_buff_size}}, + hermes::access_mode::write_only)); + } catch(const std::exception& ex) { + LOG(ERROR, "{}() Failed to expose buffers for RMA. err '{}'", __func__, + ex.what()); + return make_pair(EBUSY, output); + } + + auto err = 0; + // send RPCs + std::vector> + handles; + + gkfs::rpc::get_dirents_extended_proxy::input in(path, server, + exposed_buffers[0]); + + try { + LOG(DEBUG, "{}() Sending IPC to proxy", __func__); + handles.emplace_back( + ld_proxy_service->post( + endp, in)); + } catch(const std::exception& ex) { + LOG(ERROR, + "{}() Unable to send non-blocking proxy_get_dirents() on {} [peer: proxy] err '{}'", + __func__, path, ex.what()); + err = EBUSY; + } + + LOG(DEBUG, + "{}() path '{}' sent rpc_srv_get_dirents() rpc to proxy. Waiting on reply next and deserialize", + __func__, path); + + // wait for RPC responses + + gkfs::rpc::get_dirents_extended_proxy::output out; + + try { + // XXX We might need a timeout here to not wait forever for an + // output that never comes? + out = handles[0].get().at(0); + // skip processing dirent data if there was an error during send + // In this case all responses are gathered but their contents skipped + + if(out.err() != 0) { + LOG(ERROR, + "{}() Failed to retrieve dir entries from proxy. Error '{}', path '{}'", + __func__, strerror(out.err()), path); + err = out.err(); + // We need to gather all responses before exiting + } + } catch(const std::exception& ex) { + LOG(ERROR, + "{}() Failed to get rpc output.. [path: {}, target host: proxy] err '{}'", + __func__, path, ex.what()); + err = EBUSY; + // We need to gather all responses before exiting + } + + // The parenthesis is extremely important if not the cast will add as a + // size_t or a time_t and not as a char + auto out_buff_ptr = static_cast(exposed_buffers[0].begin()->data()); + auto bool_ptr = reinterpret_cast(out_buff_ptr); + auto size_ptr = reinterpret_cast( + (out_buff_ptr) + (out.dirents_size() * sizeof(bool))); + auto ctime_ptr = reinterpret_cast( + (out_buff_ptr) + + (out.dirents_size() * (sizeof(bool) + sizeof(size_t)))); + auto names_ptr = + out_buff_ptr + (out.dirents_size() * + (sizeof(bool) + sizeof(size_t) + sizeof(time_t))); + + for(std::size_t j = 0; j < out.dirents_size(); j++) { + + bool ftype = (*bool_ptr); + bool_ptr++; + + size_t size = *size_ptr; + size_ptr++; + + time_t ctime = *ctime_ptr; + ctime_ptr++; + + auto name = std::string(names_ptr); + // number of characters in entry + \0 terminator + names_ptr += name.size() + 1; + output.emplace_back(std::forward_as_tuple(name, ftype, size, ctime)); + } + return make_pair(err, output); +} + +} // namespace rpc +} // namespace gkfs \ No newline at end of file diff --git a/src/client/rpc/rpc_types.cpp b/src/client/rpc/rpc_types.cpp index 4b375e69b..0b5c4869d 100644 --- a/src/client/rpc/rpc_types.cpp +++ b/src/client/rpc/rpc_types.cpp @@ -34,24 +34,45 @@ // register request types so that they can be used by users and the engine // void -hermes::detail::register_user_request_types() { - (void) registered_requests().add(); - (void) registered_requests().add(); - (void) registered_requests().add(); - (void) registered_requests().add(); - (void) registered_requests().add(); - (void) registered_requests().add(); - (void) registered_requests().add(); - (void) registered_requests().add(); +hermes::detail::register_user_request_types(uint32_t provider_id) { + if(provider_id == 0) { + (void) registered_requests().add(provider_id); + (void) registered_requests().add(provider_id); + (void) registered_requests().add(provider_id); + (void) registered_requests().add( + provider_id); + (void) registered_requests().add(provider_id); + (void) registered_requests().add( + provider_id); + (void) registered_requests().add( + provider_id); + (void) registered_requests().add( + provider_id); #ifdef HAS_SYMLINKS - (void) registered_requests().add(); + (void) registered_requests().add(provider_id); #endif // HAS_SYMLINKS - (void) registered_requests().add(); - (void) registered_requests().add(); - (void) registered_requests().add(); - (void) registered_requests().add(); - (void) registered_requests().add(); - (void) registered_requests().add(); - (void) registered_requests().add(); + (void) registered_requests().add(provider_id); + (void) registered_requests().add(provider_id); + (void) registered_requests().add(provider_id); + (void) registered_requests().add(provider_id); + (void) registered_requests().add(provider_id); + (void) registered_requests().add(provider_id); + (void) registered_requests().add( + provider_id); + } else { + (void) registered_requests().add( + provider_id); + (void) registered_requests().add( + provider_id); + (void) registered_requests().add( + provider_id); + (void) registered_requests().add(provider_id); + (void) registered_requests().add(provider_id); + (void) registered_requests().add(provider_id); + (void) registered_requests() + .add(provider_id); + (void) registered_requests().add( + provider_id); + } } diff --git a/src/daemon/backend/data/chunk_storage.cpp b/src/daemon/backend/data/chunk_storage.cpp index c86d24f2c..467bde8fd 100644 --- a/src/daemon/backend/data/chunk_storage.cpp +++ b/src/daemon/backend/data/chunk_storage.cpp @@ -39,6 +39,7 @@ #include #include #include +#include extern "C" { #include @@ -146,10 +147,14 @@ ChunkStorage::write_chunk(const string& file_path, size_t size, off64_t offset) const { assert((offset + size) <= chunksize_); - // may throw ChunkStorageException on failure - init_chunk_space(file_path); - - auto chunk_path = absolute(get_chunk_path(file_path, chunk_id)); + string chunk_path{}; + if(gkfs::config::limbo_mode) { + chunk_path = "/dev/null"s; + } else { + // may throw ChunkStorageException on failure + init_chunk_space(file_path); + chunk_path = absolute(get_chunk_path(file_path, chunk_id)); + } FileHandle fh(open(chunk_path.c_str(), O_WRONLY | O_CREAT, 0640), chunk_path); @@ -195,7 +200,12 @@ ssize_t ChunkStorage::read_chunk(const string& file_path, gkfs::rpc::chnk_id_t chunk_id, char* buf, size_t size, off64_t offset) const { assert((offset + size) <= chunksize_); - auto chunk_path = absolute(get_chunk_path(file_path, chunk_id)); + string chunk_path{}; + if(gkfs::config::limbo_mode) { + chunk_path = "/dev/zero"s; + } else { + chunk_path = absolute(get_chunk_path(file_path, chunk_id)); + } FileHandle fh(open(chunk_path.c_str(), O_RDONLY), chunk_path); if(!fh.valid()) { diff --git a/src/daemon/classes/fs_data.cpp b/src/daemon/classes/fs_data.cpp index 33b43bf59..909c26fe9 100644 --- a/src/daemon/classes/fs_data.cpp +++ b/src/daemon/classes/fs_data.cpp @@ -130,6 +130,15 @@ FsData::rpc_protocol(const std::string& rpc_protocol) { rpc_protocol_ = rpc_protocol; } +const std::string& +FsData::proxy_rpc_protocol() const { + return proxy_rpc_protocol_; +} +void +FsData::proxy_rpc_protocol(const std::string& proxy_rpc_protocol) { + proxy_rpc_protocol_ = proxy_rpc_protocol; +} + const std::string& FsData::bind_addr() const { return bind_addr_; @@ -140,6 +149,16 @@ FsData::bind_addr(const std::string& addr) { bind_addr_ = addr; } +const std::string& +FsData::bind_proxy_addr() const { + return bind_proxy_addr_; +} + +void +FsData::bind_proxy_addr(const std::string& proxy_addr) { + bind_proxy_addr_ = proxy_addr; +} + const std::string& FsData::hosts_file() const { return hosts_file_; diff --git a/src/daemon/classes/rpc_data.cpp b/src/daemon/classes/rpc_data.cpp index c1e931316..cf8ba8d87 100644 --- a/src/daemon/classes/rpc_data.cpp +++ b/src/daemon/classes/rpc_data.cpp @@ -30,9 +30,7 @@ using namespace std; -namespace gkfs { - -namespace daemon { +namespace gkfs::daemon { // Getter/Setter @@ -46,6 +44,16 @@ RPCData::server_rpc_mid(margo_instance* server_rpc_mid) { RPCData::server_rpc_mid_ = server_rpc_mid; } +margo_instance* +RPCData::proxy_server_rpc_mid() { + return proxy_server_rpc_mid_; +} + +void +RPCData::proxy_server_rpc_mid(margo_instance* proxy_server_rpc_mid) { + RPCData::proxy_server_rpc_mid_ = proxy_server_rpc_mid; +} + ABT_pool RPCData::io_pool() const { return io_pool_; @@ -76,6 +84,16 @@ RPCData::self_addr_str(const std::string& addr_str) { self_addr_str_ = addr_str; } +const std::string& +RPCData::self_proxy_addr_str() const { + return self_proxy_addr_str_; +} + +void +RPCData::self_proxy_addr_str(const std::string& proxy_addr_str) { + self_proxy_addr_str_ = proxy_addr_str; +} + const std::shared_ptr& RPCData::distributor() const { return distributor_; @@ -87,6 +105,4 @@ RPCData::distributor( distributor_ = distributor; } - -} // namespace daemon -} // namespace gkfs +} // namespace gkfs::daemon \ No newline at end of file diff --git a/src/daemon/daemon.cpp b/src/daemon/daemon.cpp index 0d2778e27..9d0180632 100644 --- a/src/daemon/daemon.cpp +++ b/src/daemon/daemon.cpp @@ -59,6 +59,7 @@ #include #include #include +#include extern "C" { #include @@ -84,6 +85,8 @@ struct cli_options { string parallax_size; string stats_file; string prometheus_gateway; + string proxy_protocol; + string proxy_listen; }; /** @@ -232,6 +235,101 @@ init_rpc_server() { register_server_rpcs(mid); } +void +register_proxy_server_rpcs(margo_instance_id mid) { + // MARGO_REGISTER(mid, gkfs::rpc::tag::write, rpc_write_data_in_t, + // rpc_data_out_t, rpc_srv_write); + // MARGO_REGISTER(mid, gkfs::rpc::tag::read, rpc_read_data_in_t, + // rpc_data_out_t, rpc_srv_read); + MARGO_REGISTER(mid, gkfs::rpc::tag::get_chunk_stat, rpc_chunk_stat_in_t, + rpc_chunk_stat_out_t, rpc_srv_get_chunk_stat); + MARGO_REGISTER(mid, gkfs::rpc::tag::create, rpc_mk_node_in_t, rpc_err_out_t, + rpc_srv_create); + MARGO_REGISTER(mid, gkfs::rpc::tag::stat, rpc_path_only_in_t, + rpc_stat_out_t, rpc_srv_stat); + MARGO_REGISTER(mid, gkfs::rpc::tag::remove_metadata, rpc_rm_node_in_t, + rpc_rm_metadata_out_t, rpc_srv_remove_metadata); + MARGO_REGISTER(mid, gkfs::rpc::tag::remove_data, rpc_rm_node_in_t, + rpc_err_out_t, rpc_srv_remove_data); + MARGO_REGISTER(mid, gkfs::rpc::tag::update_metadentry_size, + rpc_update_metadentry_size_in_t, + rpc_update_metadentry_size_out_t, + rpc_srv_update_metadentry_size); + MARGO_REGISTER(mid, gkfs::rpc::tag::get_dirents_extended, + rpc_get_dirents_in_t, rpc_get_dirents_out_t, + rpc_srv_get_dirents_extended); + // proxy daemon specific RPCs + MARGO_REGISTER(mid, gkfs::rpc::tag::proxy_daemon_write, + rpc_proxy_daemon_write_in_t, rpc_data_out_t, + rpc_srv_proxy_write); + MARGO_REGISTER(mid, gkfs::rpc::tag::proxy_daemon_read, + rpc_proxy_daemon_read_in_t, rpc_data_out_t, + rpc_srv_proxy_read); +} + + +void +init_proxy_rpc_server() { + // TODO currently copy-paste. redundant function. fix. + hg_addr_t addr_self; + hg_size_t addr_self_cstring_sz = 128; + char addr_self_cstring[128]; + struct hg_init_info hg_options = HG_INIT_INFO_INITIALIZER; + hg_options.auto_sm = GKFS_DATA->use_auto_sm() ? HG_TRUE : HG_FALSE; + hg_options.stats = HG_FALSE; + if(gkfs::rpc::protocol::ofi_psm2 == GKFS_DATA->proxy_rpc_protocol()) + hg_options.na_init_info.progress_mode = NA_NO_BLOCK; + // Start Margo (this will also initialize Argobots and Mercury internally) + auto margo_config = fmt::format( + R"({{ "use_progress_thread" : true, "rpc_thread_count" : {} }})", + gkfs::config::rpc::proxy_handler_xstreams); + struct margo_init_info args = {nullptr}; + args.json_config = margo_config.c_str(); + args.hg_init_info = &hg_options; + auto* mid = margo_init_ext(GKFS_DATA->bind_proxy_addr().c_str(), + MARGO_SERVER_MODE, &args); + // hg_options.na_class = nullptr; + // if(gkfs::rpc::protocol::ofi_psm2 == GKFS_DATA->proxy_rpc_protocol()) + // hg_options.na_init_info.progress_mode = NA_NO_BLOCK; + // // Start Margo (this will also initialize Argobots and Mercury + // internally) auto mid = + // margo_init_opt(GKFS_DATA->bind_proxy_addr().c_str(), + // MARGO_SERVER_MODE, &hg_options, HG_TRUE, + // gkfs::config::rpc::proxy_handler_xstreams); + if(mid == MARGO_INSTANCE_NULL) { + throw runtime_error("Failed to initialize the Margo proxy RPC server"); + } + // Figure out what address this server is listening on (must be freed when + // finished) + auto hret = margo_addr_self(mid, &addr_self); + if(hret != HG_SUCCESS) { + margo_finalize(mid); + throw runtime_error("Failed to retrieve proxy server RPC address"); + } + // Convert the address to a cstring (with \0 terminator). + hret = margo_addr_to_string(mid, addr_self_cstring, &addr_self_cstring_sz, + addr_self); + if(hret != HG_SUCCESS) { + margo_addr_free(mid, addr_self); + margo_finalize(mid); + throw runtime_error( + "Failed to convert proxy server RPC address to string"); + } + margo_addr_free(mid, addr_self); + + std::string addr_self_str(addr_self_cstring); + RPC_DATA->self_proxy_addr_str(addr_self_str); + + GKFS_DATA->spdlogger()->info("{}() Accepting proxy RPCs on address {}", + __func__, addr_self_cstring); + + // Put context and class into RPC_data object + RPC_DATA->proxy_server_rpc_mid(mid); + + // register RPCs + register_proxy_server_rpcs(mid); +} + /** * @brief Initializes the daemon environment and setting up its subroutines. * @internal @@ -308,6 +406,35 @@ init_environment() { throw; } + // init margo for proxy RPC + + if(!GKFS_DATA->bind_proxy_addr().empty()) { + GKFS_DATA->spdlogger()->debug("{}() Initializing Distributor ... ", __func__); + try { + auto distributor = std::make_shared(); + RPC_DATA->distributor(distributor); + } catch(const std::exception& e) { + GKFS_DATA->spdlogger()->error( + "{}() Failed to initialize Distributor: {}", __func__, + e.what()); + throw; + } + GKFS_DATA->spdlogger()->debug("{}() Distributed running.", __func__); + + GKFS_DATA->spdlogger()->debug( + "{}() Initializing proxy RPC server: '{}'", __func__, + GKFS_DATA->bind_proxy_addr()); + try { + init_proxy_rpc_server(); + } catch(const std::exception& e) { + GKFS_DATA->spdlogger()->error( + "{}() Failed to initialize proxy RPC server: {}", __func__, + e.what()); + throw; + } + GKFS_DATA->spdlogger()->debug("{}() Proxy RPC server running.", __func__); + } + // Init Argobots ESs to drive IO try { GKFS_DATA->spdlogger()->debug("{}() Initializing I/O pool", __func__); @@ -517,6 +644,49 @@ parse_input(const cli_options& opts, const CLI::App& desc) { GKFS_DATA->rpc_protocol(rpc_protocol); GKFS_DATA->bind_addr(fmt::format("{}://{}", rpc_protocol, addr)); + // proxy-daemon interface which is optional (mostly copy-paste from above + // for now TODO) + string proxy_addr{}; + string proxy_protocol{}; + if(desc.count("--proxy-protocol")) { + proxy_protocol = opts.proxy_protocol; + auto protocol_found = false; + for(const auto& valid_protocol : + gkfs::rpc::protocol::all_remote_protocols) { + if(proxy_protocol == valid_protocol) { + protocol_found = true; + break; + } + } + if(!protocol_found) + throw runtime_error(fmt::format( + "Given RPC protocol '{}' not supported for proxy. Check --help for supported protocols.", + rpc_protocol)); + if(desc.count("--proxy-listen")) { + proxy_addr = opts.proxy_listen; + // ofi+verbs requires an empty proxy_addr to bind to the ib + // interface + if(proxy_protocol == string(gkfs::rpc::protocol::ofi_verbs)) { + /* + * FI_VERBS_IFACE : The prefix or the full name of the network + * interface associated with the verbs device (default: ib) + * Mercury does not allow to bind to an address when ofi+verbs + * is used + */ + if(!secure_getenv("FI_VERBS_IFACE")) + setenv("FI_VERBS_IFACE", proxy_addr.c_str(), 1); + proxy_addr = ""s; + } + } else { + if(proxy_protocol != string(gkfs::rpc::protocol::ofi_verbs)) + proxy_addr = gkfs::rpc::get_my_hostname(true); + } + GKFS_DATA->proxy_rpc_protocol(proxy_protocol); + GKFS_DATA->bind_proxy_addr( + fmt::format("{}://{}", proxy_protocol, proxy_addr)); + } + + string hosts_file; if(desc.count("--hosts-file")) { hosts_file = opts.hosts_file; @@ -527,11 +697,22 @@ parse_input(const cli_options& opts, const CLI::App& desc) { GKFS_DATA->hosts_file(hosts_file); assert(desc.count("--mountdir")); - auto mountdir = opts.mountdir; - // Create mountdir. We use this dir to get some information on the - // underlying fs with statfs in gkfs_statfs - fs::create_directories(mountdir); - GKFS_DATA->mountdir(fs::canonical(mountdir).native()); + // Store mountdir and ensure parent dir exists as it is required for path + // resolution on the client + try { + fs::path mountdir(opts.mountdir); + auto mountdir_parent = fs::canonical(mountdir.parent_path()); + GKFS_DATA->mountdir(fmt::format("{}/{}", mountdir_parent.native(), + mountdir.filename().native())); + GKFS_DATA->spdlogger()->info("{}() Mountdir '{}'", __func__, + GKFS_DATA->mountdir()); + } catch(const std::exception& e) { + auto emsg = fmt::format( + "Parent directory for given mountdir does not exist. err '{}' Exiting ...", + e.what()); + cerr << emsg << endl; + exit(EXIT_FAILURE); + } assert(desc.count("--rootdir")); auto rootdir = opts.rootdir; @@ -790,6 +971,12 @@ main(int argc, const char* argv[]) { "--prometheus-gateway", opts.prometheus_gateway, "Defines the prometheus gateway (Default 127.0.0.1:9091)."); #endif + desc.add_option( + "--proxy-protocol,-p", opts.proxy_protocol, + "Starts an additional RPC server for proxy communication. Choose between: ofi+sockets, ofi+psm2, ofi+verbs. Default: Disabled"); + desc.add_option( + "--proxy-listen,-L", opts.proxy_listen, + "Address or interface to bind the proxy rpc server on (see listen above)"); desc.add_flag("--version", "Print version and exit."); // clang-format on diff --git a/src/daemon/handler/srv_data.cpp b/src/daemon/handler/srv_data.cpp index b59f5c551..18c240b07 100644 --- a/src/daemon/handler/srv_data.cpp +++ b/src/daemon/handler/srv_data.cpp @@ -635,6 +635,576 @@ rpc_srv_read(hg_handle_t handle) { return handler_ret; } +/** + * @brief Serves a write request transferring the chunks associated with this + * daemon and store them on the node-local FS. + * @internal + * The write operation has multiple steps: + * 1. Setting up all RPC related information + * 2. Allocating space for bulk transfer buffers + * 3. By processing the RPC input, the chunk IDs that are hashing to this daemon + * are computed based on a client-defined interval (start and endchunk id for + * this write operation). The client does _not_ provide the daemons with a list + * of chunk IDs because it is dynamic data that cannot be part of an RPC input + * struct. Therefore, this information would need to be pulled with a bulk + * transfer as well, adding unnecessary latency to the overall write operation. + * + * For each relevant chunk, a PULL bulk transfer is issued. Once finished, a + * non-blocking Argobots tasklet is launched to write the data chunk to the + * backend storage. Therefore, bulk transfer and the backend I/O operation are + * overlapping for efficiency. + * 4. Wait for all tasklets to complete adding up all the complete written data + * size as reported by each task. + * 5. Respond to client (when all backend write operations are finished) and + * cleanup RPC resources. Any error is reported in the RPC output struct. Note, + * that backend write operations are not canceled while in-flight when a task + * encounters an error. + * + * Note, refer to the data backend documentation w.r.t. how Argobots tasklets + * work and why they are used. + * + * All exceptions must be caught here and dealt with accordingly. + * @endinteral + * @param handle Mercury RPC handle + * @return Mercury error code to Mercury + */ +hg_return_t +rpc_srv_proxy_write(hg_handle_t handle) { + /* + * 1. Setup + */ + rpc_proxy_daemon_write_in_t in{}; + rpc_data_out_t out{}; + hg_bulk_t bulk_handle = nullptr; + // default out for error + out.err = EIO; + out.io_size = 0; + // Getting some information from margo + auto ret = margo_get_input(handle, &in); + if(ret != HG_SUCCESS) { + GKFS_DATA->spdlogger()->error( + "{}() Could not get RPC input data with err {}", __func__, ret); + return gkfs::rpc::cleanup_respond(&handle, &in, &out, &bulk_handle); + } + auto hgi = margo_get_info(handle); + auto mid = margo_hg_handle_get_instance(handle); + auto bulk_size = margo_bulk_get_size(in.bulk_handle); + GKFS_DATA->spdlogger()->debug( + "{}() path: '{}' chunk_start '{}' chunk_end '{}' chunk_n '{}' total_chunk_size '{}' bulk_size: '{}' offset: '{}'", + __func__, in.path, in.chunk_start, in.chunk_end, in.chunk_n, + in.total_chunk_size, bulk_size, in.offset); + + +#ifdef GKFS_ENABLE_AGIOS + int* data; + ABT_eventual eventual = ABT_EVENTUAL_NULL; + + /* creating eventual */ + ABT_eventual_create(sizeof(int64_t), &eventual); + + unsigned long long int request_id = generate_unique_id(); + char* agios_path = (char*) in.path; + + // We should call AGIOS before chunking (as that is an internal way to + // handle the requests) + if(!agios_add_request(agios_path, AGIOS_WRITE, in.offset, + in.total_chunk_size, request_id, + AGIOS_SERVER_ID_IGNORE, agios_eventual_callback, + eventual)) { + GKFS_DATA->spdlogger()->error("{}() Failed to send request to AGIOS", + __func__); + } else { + GKFS_DATA->spdlogger()->debug("{}() request {} was sent to AGIOS", + __func__, request_id); + } + + /* Block until the eventual is signaled */ + ABT_eventual_wait(eventual, (void**) &data); + + unsigned long long int result = *data; + GKFS_DATA->spdlogger()->debug( + "{}() request {} was unblocked (offset = {})!", __func__, result, + in.offset); + + ABT_eventual_free(&eventual); + + // Let AGIOS knows it can release the request, as it is completed + if(!agios_release_request(agios_path, AGIOS_WRITE, in.total_chunk_size, + in.offset)) { + GKFS_DATA->spdlogger()->error( + "{}() Failed to release request from AGIOS", __func__); + } +#endif + + /* + * 2. Set up buffers for pull bulk transfers + */ + void* bulk_buf; // buffer for bulk transfer + vector bulk_buf_ptrs(in.chunk_n); // buffer-chunk offsets + // create bulk handle and allocated memory for buffer with buf_sizes + // information + ret = margo_bulk_create(mid, 1, nullptr, &in.total_chunk_size, + HG_BULK_READWRITE, &bulk_handle); + if(ret != HG_SUCCESS) { + GKFS_DATA->spdlogger()->error("{}() Failed to create bulk handle", + __func__); + return gkfs::rpc::cleanup_respond(&handle, &in, &out, + static_cast(nullptr)); + } + // access the internally allocated memory buffer and put it into buf_ptrs + uint32_t actual_count; + ret = margo_bulk_access(bulk_handle, 0, in.total_chunk_size, + HG_BULK_READWRITE, 1, &bulk_buf, + &in.total_chunk_size, &actual_count); + if(ret != HG_SUCCESS || actual_count != 1) { + GKFS_DATA->spdlogger()->error( + "{}() Failed to access allocated buffer from bulk handle", + __func__); + return gkfs::rpc::cleanup_respond(&handle, &in, &out, &bulk_handle); + } + auto const host_id = in.host_id; + [[maybe_unused]] auto const host_size = in.host_size; + + auto path = make_shared(in.path); + // chnk_ids used by this host + vector chnk_ids_host(in.chunk_n); + // counter to track how many chunks have been assigned + auto chnk_id_curr = static_cast(0); + // chnk sizes per chunk for this host + vector chnk_sizes(in.chunk_n); + // how much size is left to assign chunks for writing + auto chnk_size_left_host = in.total_chunk_size; + // temporary traveling pointer + auto chnk_ptr = static_cast(bulk_buf); + /* + * consider the following cases: + * 1. Very first chunk has offset or not and is serviced by this node + * 2. If offset, will still be only 1 chunk written (small IO): (offset + + * bulk_size <= CHUNKSIZE) ? bulk_size + * 3. If no offset, will only be 1 chunk written (small IO): (bulk_size <= + * CHUNKSIZE) ? bulk_size + * 4. Chunks between start and end chunk have size of the CHUNKSIZE + * 5. Last chunk (if multiple chunks are written): Don't write CHUNKSIZE but + * chnk_size_left for this destination Last chunk can also happen if only + * one chunk is written. This is covered by 2 and 3. + */ + // temporary variables + auto transfer_size = (bulk_size <= gkfs::config::rpc::chunksize) + ? bulk_size + : gkfs::config::rpc::chunksize; + uint64_t origin_offset; + uint64_t local_offset; + // object for asynchronous disk IO + gkfs::data::ChunkWriteOperation chunk_op{in.path, in.chunk_n}; + + /* + * 3. Calculate chunk sizes that correspond to this host, transfer data, and + * start tasks to write to disk + */ + // Start to look for a chunk that hashes to this host with the first chunk + // in the buffer + for(auto chnk_id_file = in.chunk_start; + chnk_id_file <= in.chunk_end && chnk_id_curr < in.chunk_n; + chnk_id_file++) { + // Continue if chunk does not hash to this host +#ifndef GKFS_ENABLE_FORWARDING + if(RPC_DATA->distributor()->locate_data(in.path, chnk_id_file, + host_size, 0) != host_id) { + GKFS_DATA->spdlogger()->trace( + "{}() chunkid '{}' ignored as it does not match to this host with id '{}'. chnk_id_curr '{}'", + __func__, chnk_id_file, host_id, chnk_id_curr); + continue; + } + + if(GKFS_DATA->enable_chunkstats()) { + GKFS_DATA->stats()->add_write(in.path, chnk_id_file); + } +#endif + + chnk_ids_host[chnk_id_curr] = + chnk_id_file; // save this id to host chunk list + // offset case. Only relevant in the first iteration of the loop and if + // the chunk hashes to this host + if(chnk_id_file == in.chunk_start && in.offset > 0) { + // if only 1 destination and 1 chunk (small write) the transfer_size + // == bulk_size + size_t offset_transfer_size = 0; + if(in.offset + bulk_size <= gkfs::config::rpc::chunksize) + offset_transfer_size = bulk_size; + else + offset_transfer_size = static_cast( + gkfs::config::rpc::chunksize - in.offset); + ret = margo_bulk_transfer(mid, HG_BULK_PULL, hgi->addr, + in.bulk_handle, 0, bulk_handle, 0, + offset_transfer_size); + if(ret != HG_SUCCESS) { + GKFS_DATA->spdlogger()->error( + "{}() Failed to pull data from client for chunk {} (startchunk {}; endchunk {}", + __func__, chnk_id_file, in.chunk_start, + in.chunk_end - 1); + out.err = EBUSY; + return gkfs::rpc::cleanup_respond(&handle, &in, &out, + &bulk_handle); + } + bulk_buf_ptrs[chnk_id_curr] = chnk_ptr; + chnk_sizes[chnk_id_curr] = offset_transfer_size; + chnk_ptr += offset_transfer_size; + chnk_size_left_host -= offset_transfer_size; + } else { + local_offset = in.total_chunk_size - chnk_size_left_host; + // origin offset of a chunk is dependent on a given offset in a + // write operation + if(in.offset > 0) + origin_offset = (gkfs::config::rpc::chunksize - in.offset) + + ((chnk_id_file - in.chunk_start) - 1) * + gkfs::config::rpc::chunksize; + else + origin_offset = (chnk_id_file - in.chunk_start) * + gkfs::config::rpc::chunksize; + // last chunk might have different transfer_size + if(chnk_id_curr == in.chunk_n - 1) + transfer_size = chnk_size_left_host; + GKFS_DATA->spdlogger()->trace( + "{}() BULK_TRANSFER_PULL hostid {} file {} chnkid {} total_Csize {} Csize_left {} origin offset {} local offset {} transfersize {}", + __func__, host_id, in.path, chnk_id_file, + in.total_chunk_size, chnk_size_left_host, origin_offset, + local_offset, transfer_size); + // RDMA the data to here + ret = margo_bulk_transfer(mid, HG_BULK_PULL, hgi->addr, + in.bulk_handle, origin_offset, + bulk_handle, local_offset, transfer_size); + if(ret != HG_SUCCESS) { + GKFS_DATA->spdlogger()->error( + "{}() Failed to pull data from client. file {} chunk {} (startchunk {}; endchunk {})", + __func__, in.path, chnk_id_file, in.chunk_start, + (in.chunk_end - 1)); + out.err = EBUSY; + return gkfs::rpc::cleanup_respond(&handle, &in, &out, + &bulk_handle); + } + bulk_buf_ptrs[chnk_id_curr] = chnk_ptr; + chnk_sizes[chnk_id_curr] = transfer_size; + chnk_ptr += transfer_size; + chnk_size_left_host -= transfer_size; + } + try { + // start tasklet for writing chunk + chunk_op.write_nonblock( + chnk_id_curr, chnk_ids_host[chnk_id_curr], + bulk_buf_ptrs[chnk_id_curr], chnk_sizes[chnk_id_curr], + (chnk_id_file == in.chunk_start) ? in.offset : 0); + } catch(const gkfs::data::ChunkWriteOpException& e) { + // This exception is caused by setup of Argobots variables. If this + // fails, something is really wrong + GKFS_DATA->spdlogger()->error("{}() while write_nonblock err '{}'", + __func__, e.what()); + return gkfs::rpc::cleanup_respond(&handle, &in, &out, &bulk_handle); + } + // next chunk + chnk_id_curr++; + } + // Sanity check that all chunks where detected in previous loop + // TODO don't proceed if that happens. + if(chnk_size_left_host != 0) + GKFS_DATA->spdlogger()->warn( + "{}() Not all chunks were detected!!! Size left {}", __func__, + chnk_size_left_host); + /* + * 4. Read task results and accumulate in out.io_size + */ + auto write_result = chunk_op.wait_for_tasks(); + out.err = write_result.first; + out.io_size = write_result.second; + + // Sanity check to see if all data has been written + if(in.total_chunk_size != out.io_size) { + GKFS_DATA->spdlogger()->warn( + "{}() total chunk size {} and out.io_size {} mismatch!", + __func__, in.total_chunk_size, out.io_size); + } + + /* + * 5. Respond and cleanup + */ + GKFS_DATA->spdlogger()->debug("{}() Sending output response {}", __func__, + out.err); + auto handler_ret = + gkfs::rpc::cleanup_respond(&handle, &in, &out, &bulk_handle); + if(GKFS_DATA->enable_stats()) { + GKFS_DATA->stats()->add_value_size( + gkfs::utils::Stats::SizeOp::write_size, bulk_size); + } + return handler_ret; +} + +/** + * @brief Serves a read request reading the chunks associated with this + * daemon from the node-local FS and transferring them back to the client. + * @internal + * The read operation has multiple steps: + * 1. Setting up all RPC related information + * 2. Allocating space for bulk transfer buffers + * 3. By processing the RPC input, the chunk IDs that are hashing to this daemon + * are computed based on a client-defined interval (start and endchunk id for + * this read operation). The client does _not_ provide the daemons with a list + * of chunk IDs because it is dynamic data that cannot be part of an RPC input + * struct. Therefore, this information would need to be pulled with a bulk + * transfer as well, adding unnecessary latency to the overall write operation. + * + * For each relevant chunk, a non-blocking Arbobots tasklet is launched to read + * the data chunk from the backend storage to the allocated buffers. + * 4. Wait for all tasklets to finish the read operation while PUSH bulk + * transferring each chunk back to the client when a tasklet finishes. + * Therefore, bulk transfer and the backend I/O operation are overlapping for + * efficiency. The read size is added up for all tasklets. + * 5. Respond to client (when all bulk transfers are finished) and cleanup RPC + * resources. Any error is reported in the RPC output struct. Note, that backend + * read operations are not canceled while in-flight when a task encounters an + * error. + * + * Note, refer to the data backend documentation w.r.t. how Argobots tasklets + * work and why they are used. + * + * All exceptions must be caught here and dealt with accordingly. + * @endinteral + * @param handle Mercury RPC handle + * @return Mercury error code to Mercury + */ +hg_return_t +rpc_srv_proxy_read(hg_handle_t handle) { + /* + * 1. Setup + */ + rpc_proxy_daemon_read_in_t in{}; + rpc_data_out_t out{}; + hg_bulk_t bulk_handle = nullptr; + // Set default out for error + out.err = EIO; + out.io_size = 0; + // Getting some information from margo + auto ret = margo_get_input(handle, &in); + if(ret != HG_SUCCESS) { + GKFS_DATA->spdlogger()->error( + "{}() Could not get RPC input data with err {}", __func__, ret); + return gkfs::rpc::cleanup_respond(&handle, &in, &out, &bulk_handle); + } + auto hgi = margo_get_info(handle); + auto mid = margo_hg_handle_get_instance(handle); + auto bulk_size = margo_bulk_get_size(in.bulk_handle); + + GKFS_DATA->spdlogger()->debug( + "{}() path: '{}' chunk_start '{}' chunk_end '{}' chunk_n '{}' total_chunk_size '{}' bulk_size: '{}' offset: '{}'", + __func__, in.path, in.chunk_start, in.chunk_end, in.chunk_n, + in.total_chunk_size, bulk_size, in.offset); + +#ifdef GKFS_ENABLE_AGIOS + int* data; + ABT_eventual eventual = ABT_EVENTUAL_NULL; + + /* creating eventual */ + ABT_eventual_create(sizeof(int64_t), &eventual); + + unsigned long long int request_id = generate_unique_id(); + char* agios_path = (char*) in.path; + + // We should call AGIOS before chunking (as that is an internal way to + // handle the requests) + if(!agios_add_request(agios_path, AGIOS_READ, in.offset, + in.total_chunk_size, request_id, + AGIOS_SERVER_ID_IGNORE, agios_eventual_callback, + eventual)) { + GKFS_DATA->spdlogger()->error("{}() Failed to send request to AGIOS", + __func__); + } else { + GKFS_DATA->spdlogger()->debug("{}() request {} was sent to AGIOS", + __func__, request_id); + } + + /* block until the eventual is signaled */ + ABT_eventual_wait(eventual, (void**) &data); + + unsigned long long int result = *data; + GKFS_DATA->spdlogger()->debug( + "{}() request {} was unblocked (offset = {})!", __func__, result, + in.offset); + + ABT_eventual_free(&eventual); + + // let AGIOS knows it can release the request, as it is completed + if(!agios_release_request(agios_path, AGIOS_READ, in.total_chunk_size, + in.offset)) { + GKFS_DATA->spdlogger()->error( + "{}() Failed to release request from AGIOS", __func__); + } +#endif + + /* + * 2. Set up buffers for push bulk transfers + */ + void* bulk_buf; // buffer for bulk transfer + vector bulk_buf_ptrs(in.chunk_n); // buffer-chunk offsets + // create bulk handle and allocated memory for buffer with buf_sizes + // information + ret = margo_bulk_create(mid, 1, nullptr, &in.total_chunk_size, + HG_BULK_READWRITE, &bulk_handle); + if(ret != HG_SUCCESS) { + GKFS_DATA->spdlogger()->error("{}() Failed to create bulk handle", + __func__); + return gkfs::rpc::cleanup_respond(&handle, &in, &out, + static_cast(nullptr)); + } + // access the internally allocated memory buffer and put it into buf_ptrs + uint32_t actual_count; + ret = margo_bulk_access(bulk_handle, 0, in.total_chunk_size, + HG_BULK_READWRITE, 1, &bulk_buf, + &in.total_chunk_size, &actual_count); + if(ret != HG_SUCCESS || actual_count != 1) { + GKFS_DATA->spdlogger()->error( + "{}() Failed to access allocated buffer from bulk handle", + __func__); + return gkfs::rpc::cleanup_respond(&handle, &in, &out, &bulk_handle); + } +#ifndef GKFS_ENABLE_FORWARDING + auto const host_id = in.host_id; + auto const host_size = in.host_size; +#endif + auto path = make_shared(in.path); + // chnk_ids used by this host + vector chnk_ids_host(in.chunk_n); + // counter to track how many chunks have been assigned + auto chnk_id_curr = static_cast(0); + // chnk sizes per chunk for this host + vector chnk_sizes(in.chunk_n); + // local and origin offsets for bulk operations + vector local_offsets(in.chunk_n); + vector origin_offsets(in.chunk_n); + // how much size is left to assign chunks for reading + auto chnk_size_left_host = in.total_chunk_size; + // temporary traveling pointer + auto chnk_ptr = static_cast(bulk_buf); + // temporary variables + auto transfer_size = (bulk_size <= gkfs::config::rpc::chunksize) + ? bulk_size + : gkfs::config::rpc::chunksize; + // object for asynchronous disk IO + gkfs::data::ChunkReadOperation chunk_read_op{in.path, in.chunk_n}; + /* + * 3. Calculate chunk sizes that correspond to this host and start tasks to + * read from disk + */ + // Start to look for a chunk that hashes to this host with the first chunk + // in the buffer + for(auto chnk_id_file = in.chunk_start; + chnk_id_file <= in.chunk_end && chnk_id_curr < in.chunk_n; + chnk_id_file++) { + // Continue if chunk does not hash to this host +#ifndef GKFS_ENABLE_FORWARDING + if(RPC_DATA->distributor()->locate_data(in.path, chnk_id_file, + host_size, 0) != host_id) { + GKFS_DATA->spdlogger()->trace( + "{}() chunkid '{}' ignored as it does not match to this host with id '{}'. chnk_id_curr '{}'", + __func__, chnk_id_file, host_id, chnk_id_curr); + continue; + } + if(GKFS_DATA->enable_chunkstats()) { + GKFS_DATA->stats()->add_read(in.path, chnk_id_file); + } +#endif + + chnk_ids_host[chnk_id_curr] = + chnk_id_file; // save this id to host chunk list + // Only relevant in the first iteration of the loop and if the chunk + // hashes to this host + if(chnk_id_file == in.chunk_start && in.offset > 0) { + // if only 1 destination and 1 chunk (small read) the transfer_size + // == bulk_size + size_t offset_transfer_size = 0; + if(in.offset + bulk_size <= gkfs::config::rpc::chunksize) + offset_transfer_size = bulk_size; + else + offset_transfer_size = static_cast( + gkfs::config::rpc::chunksize - in.offset); + // Setting later transfer offsets + local_offsets[chnk_id_curr] = 0; + origin_offsets[chnk_id_curr] = 0; + bulk_buf_ptrs[chnk_id_curr] = chnk_ptr; + chnk_sizes[chnk_id_curr] = offset_transfer_size; + // utils variables + chnk_ptr += offset_transfer_size; + chnk_size_left_host -= offset_transfer_size; + } else { + local_offsets[chnk_id_curr] = + in.total_chunk_size - chnk_size_left_host; + // origin offset of a chunk is dependent on a given offset in a + // write operation + if(in.offset > 0) + origin_offsets[chnk_id_curr] = + (gkfs::config::rpc::chunksize - in.offset) + + ((chnk_id_file - in.chunk_start) - 1) * + gkfs::config::rpc::chunksize; + else + origin_offsets[chnk_id_curr] = (chnk_id_file - in.chunk_start) * + gkfs::config::rpc::chunksize; + // last chunk might have different transfer_size + if(chnk_id_curr == in.chunk_n - 1) + transfer_size = chnk_size_left_host; + bulk_buf_ptrs[chnk_id_curr] = chnk_ptr; + chnk_sizes[chnk_id_curr] = transfer_size; + // utils variables + chnk_ptr += transfer_size; + chnk_size_left_host -= transfer_size; + } + try { + // start tasklet for read operation + chunk_read_op.read_nonblock( + chnk_id_curr, chnk_ids_host[chnk_id_curr], + bulk_buf_ptrs[chnk_id_curr], chnk_sizes[chnk_id_curr], + (chnk_id_file == in.chunk_start) ? in.offset : 0); + } catch(const gkfs::data::ChunkReadOpException& e) { + // This exception is caused by setup of Argobots variables. If this + // fails, something is really wrong + GKFS_DATA->spdlogger()->error("{}() while read_nonblock err '{}'", + __func__, e.what()); + return gkfs::rpc::cleanup_respond(&handle, &in, &out, &bulk_handle); + } + chnk_id_curr++; + } + // Sanity check that all chunks where detected in previous loop + // TODO error out. If we continue this will crash the server when sending + // results back that don't exist. + if(chnk_size_left_host != 0) + GKFS_DATA->spdlogger()->warn( + "{}() Not all chunks were detected!!! Size left {}", __func__, + chnk_size_left_host); + /* + * 4. Read task results and accumulate in out.io_size + */ + gkfs::data::ChunkReadOperation::bulk_args bulk_args{}; + bulk_args.mid = mid; + bulk_args.origin_addr = hgi->addr; + bulk_args.origin_bulk_handle = in.bulk_handle; + bulk_args.origin_offsets = &origin_offsets; + bulk_args.local_bulk_handle = bulk_handle; + bulk_args.local_offsets = &local_offsets; + bulk_args.chunk_ids = &chnk_ids_host; + // wait for all tasklets and push read data back to client + auto read_result = chunk_read_op.wait_for_tasks_and_push_back(bulk_args); + out.err = read_result.first; + out.io_size = read_result.second; + + /* + * 5. Respond and cleanup + */ + GKFS_DATA->spdlogger()->debug("{}() Sending output response, err: {}", + __func__, out.err); + auto handler_ret = + gkfs::rpc::cleanup_respond(&handle, &in, &out, &bulk_handle); + if(GKFS_DATA->enable_stats()) { + GKFS_DATA->stats()->add_value_size( + gkfs::utils::Stats::SizeOp::read_size, bulk_size); + } + return handler_ret; +} /** * @brief Serves a file truncate request and remove all corresponding chunk @@ -725,6 +1295,10 @@ DEFINE_MARGO_RPC_HANDLER(rpc_srv_write) DEFINE_MARGO_RPC_HANDLER(rpc_srv_read) +DEFINE_MARGO_RPC_HANDLER(rpc_srv_proxy_write) + +DEFINE_MARGO_RPC_HANDLER(rpc_srv_proxy_read) + DEFINE_MARGO_RPC_HANDLER(rpc_srv_truncate) DEFINE_MARGO_RPC_HANDLER(rpc_srv_get_chunk_stat) diff --git a/src/daemon/handler/srv_metadata.cpp b/src/daemon/handler/srv_metadata.cpp index 1339170b9..f766a4e65 100644 --- a/src/daemon/handler/srv_metadata.cpp +++ b/src/daemon/handler/srv_metadata.cpp @@ -311,7 +311,8 @@ rpc_srv_remove_data(hg_handle_t handle) { // Remove all chunks for that file try { - GKFS_DATA->storage()->destroy_chunk_space(in.path); + if(!gkfs::config::limbo_mode) + GKFS_DATA->storage()->destroy_chunk_space(in.path); out.err = 0; } catch(const gkfs::data::ChunkStorageException& e) { GKFS_DATA->spdlogger()->error( @@ -435,8 +436,9 @@ rpc_srv_update_metadentry_size(hg_handle_t handle) { out.err = EBUSY; } - GKFS_DATA->spdlogger()->debug("{}() Sending output '{}'", __func__, - out.err); + GKFS_DATA->spdlogger()->debug( + "{}() Sending output err '{}' ret_offset '{}'", __func__, out.err, + out.ret_offset); auto hret = margo_respond(handle, &out); if(hret != HG_SUCCESS) { GKFS_DATA->spdlogger()->error("{}() Failed to respond", __func__); diff --git a/src/daemon/ops/data.cpp b/src/daemon/ops/data.cpp index 3837a0dae..9f969185b 100644 --- a/src/daemon/ops/data.cpp +++ b/src/daemon/ops/data.cpp @@ -382,6 +382,7 @@ ChunkReadOperation::read_nonblock(size_t idx, const uint64_t chunk_id, task_arg.size = size; task_arg.off = offset; task_arg.eventual = task_eventuals_[idx]; + task_arg.bulk_transfer_done = false; abt_err = ABT_task_create(RPC_DATA->io_pool(), read_file_abt, &task_args_[idx], &abt_tasks_[idx]); @@ -407,60 +408,141 @@ ChunkReadOperation::wait_for_tasks_and_push_back(const bulk_args& args) { * longer be executed as the data would be corrupted The loop continues * until all eventuals have been cleaned and freed. */ - for(uint64_t idx = 0; idx < task_args_.size(); idx++) { - ssize_t* task_size = nullptr; - auto abt_err = - ABT_eventual_wait(task_eventuals_[idx], (void**) &task_size); - if(abt_err != ABT_SUCCESS) { - GKFS_DATA->spdlogger()->error( - "ChunkReadOperation::{}() Error when waiting on ABT eventual", - __func__); - io_err = EIO; - ABT_eventual_free(&task_eventuals_[idx]); - continue; - } - // error occured. stop processing but clean up - if(io_err != 0) { - ABT_eventual_free(&task_eventuals_[idx]); - continue; - } - assert(task_size != nullptr); - if(*task_size < 0) { - // sparse regions do not have chunk files and are therefore skipped - if(-(*task_size) == ENOENT) { + // TODO refactor both if/else. They have redundant code. + if(gkfs::config::io::spin_lock_read) { + uint64_t bulk_transfer_cnt = 0; + do { + for(uint64_t idx = 0; idx < task_args_.size(); idx++) { + if(task_args_[idx].bulk_transfer_done) + continue; + ssize_t* task_size = nullptr; + int is_ready = 0; + auto abt_err = ABT_eventual_test( + task_eventuals_[idx], (void**) &task_size, &is_ready); + if(abt_err != ABT_SUCCESS) { + GKFS_DATA->spdlogger()->error( + "ChunkReadOperation::{}() Error when testing on ABT eventual", + __func__); + io_err = EIO; + bulk_transfer_cnt = task_args_.size(); + ABT_eventual_free(&task_eventuals_[idx]); + continue; + } + // not ready yet, try next + if(is_ready == ABT_FALSE) + continue; + // error occured. stop processing but clean up + if(io_err != 0) { + task_args_[idx].bulk_transfer_done = true; + bulk_transfer_cnt++; + ABT_eventual_free(&task_eventuals_[idx]); + continue; + } + assert(task_size != nullptr); + if(*task_size < 0) { + // sparse regions do not have chunk files and are therefore + // skipped + if(-(*task_size) == ENOENT) { + task_args_[idx].bulk_transfer_done = true; + bulk_transfer_cnt++; + ABT_eventual_free(&task_eventuals_[idx]); + continue; + } + io_err = -(*task_size); // make error code > 0 + } else if(*task_size == 0) { + // read size of 0 is not an error and can happen because + // reading the end-of-file + task_args_[idx].bulk_transfer_done = true; + bulk_transfer_cnt++; + ABT_eventual_free(&task_eventuals_[idx]); + continue; + } else { + // successful case, push read data back to client + GKFS_DATA->spdlogger()->trace( + "ChunkReadOperation::{}() BULK_TRANSFER_PUSH file '{}' chnkid '{}' origin offset '{}' local offset '{}' transfersize '{}'", + __func__, path_, args.chunk_ids->at(idx), + args.origin_offsets->at(idx), + args.local_offsets->at(idx), *task_size); + assert(task_args_[idx].chnk_id == args.chunk_ids->at(idx)); + auto margo_err = margo_bulk_transfer( + args.mid, HG_BULK_PUSH, args.origin_addr, + args.origin_bulk_handle, + args.origin_offsets->at(idx), + args.local_bulk_handle, args.local_offsets->at(idx), + *task_size); + if(margo_err != HG_SUCCESS) { + GKFS_DATA->spdlogger()->error( + "ChunkReadOperation::{}() Failed to margo_bulk_transfer with margo err: '{}'", + __func__, margo_err); + io_err = EBUSY; + continue; + } + total_read += *task_size; + } + task_args_[idx].bulk_transfer_done = true; + bulk_transfer_cnt++; ABT_eventual_free(&task_eventuals_[idx]); - continue; } - io_err = -(*task_size); // make error code > 0 - } else if(*task_size == 0) { - // read size of 0 is not an error and can happen because reading the - // end-of-file - ABT_eventual_free(&task_eventuals_[idx]); - continue; - } else { - // successful case, push read data back to client - GKFS_DATA->spdlogger()->trace( - "ChunkReadOperation::{}() BULK_TRANSFER_PUSH file '{}' chnkid '{}' origin offset '{}' local offset '{}' transfersize '{}'", - __func__, path_, args.chunk_ids->at(idx), - args.origin_offsets->at(idx), args.local_offsets->at(idx), - *task_size); - assert(task_args_[idx].chnk_id == args.chunk_ids->at(idx)); - auto margo_err = margo_bulk_transfer( - args.mid, HG_BULK_PUSH, args.origin_addr, - args.origin_bulk_handle, args.origin_offsets->at(idx), - args.local_bulk_handle, args.local_offsets->at(idx), - *task_size); - if(margo_err != HG_SUCCESS) { + } while(bulk_transfer_cnt != task_args_.size()); + } else { + for(uint64_t idx = 0; idx < task_args_.size(); idx++) { + ssize_t* task_size = nullptr; + auto abt_err = ABT_eventual_wait(task_eventuals_[idx], + (void**) &task_size); + if(abt_err != ABT_SUCCESS) { GKFS_DATA->spdlogger()->error( - "ChunkReadOperation::{}() Failed to margo_bulk_transfer with margo err: '{}'", - __func__, margo_err); - io_err = EBUSY; + "ChunkReadOperation::{}() Error when waiting on ABT eventual", + __func__); + io_err = EIO; + ABT_eventual_free(&task_eventuals_[idx]); + continue; + } + // error occured. stop processing but clean up + if(io_err != 0) { + ABT_eventual_free(&task_eventuals_[idx]); + continue; + } + assert(task_size != nullptr); + if(*task_size < 0) { + // sparse regions do not have chunk files and are therefore + // skipped + if(-(*task_size) == ENOENT) { + ABT_eventual_free(&task_eventuals_[idx]); + continue; + } + io_err = -(*task_size); // make error code > 0 + } else if(*task_size == 0) { + // read size of 0 is not an error and can happen because reading + // the end-of-file + ABT_eventual_free(&task_eventuals_[idx]); continue; + } else { + // successful case, push read data back to client + GKFS_DATA->spdlogger()->trace( + "ChunkReadOperation::{}() BULK_TRANSFER_PUSH file '{}' chnkid '{}' origin offset '{}' local offset '{}' transfersize '{}'", + __func__, path_, args.chunk_ids->at(idx), + args.origin_offsets->at(idx), + args.local_offsets->at(idx), *task_size); + assert(task_args_[idx].chnk_id == args.chunk_ids->at(idx)); + auto margo_err = margo_bulk_transfer( + args.mid, HG_BULK_PUSH, args.origin_addr, + args.origin_bulk_handle, args.origin_offsets->at(idx), + args.local_bulk_handle, args.local_offsets->at(idx), + *task_size); + if(margo_err != HG_SUCCESS) { + GKFS_DATA->spdlogger()->error( + "ChunkReadOperation::{}() Failed to margo_bulk_transfer with margo err: '{}'", + __func__, margo_err); + io_err = EBUSY; + continue; + } + total_read += *task_size; } - total_read += *task_size; + ABT_eventual_free(&task_eventuals_[idx]); } - ABT_eventual_free(&task_eventuals_[idx]); } + + // in case of error set read size to zero as data would be corrupted if(io_err != 0) total_read = 0; diff --git a/src/daemon/util.cpp b/src/daemon/util.cpp index bbdd6ac3a..7dc39cd8e 100644 --- a/src/daemon/util.cpp +++ b/src/daemon/util.cpp @@ -50,6 +50,8 @@ namespace gkfs::utils { void populate_hosts_file() { const auto& hosts_file = GKFS_DATA->hosts_file(); + const auto& daemon_addr = RPC_DATA->self_addr_str(); + const auto& proxy_addr = RPC_DATA->self_proxy_addr_str(); GKFS_DATA->spdlogger()->debug("{}() Populating hosts file: '{}'", __func__, hosts_file); ofstream lfstream(hosts_file, ios::out | ios::app); @@ -63,8 +65,11 @@ populate_hosts_file() { ? gkfs::rpc::get_my_hostname(true) : fmt::format("{}#{}", gkfs::rpc::get_my_hostname(true), GKFS_DATA->rootdir_suffix()); - lfstream << fmt::format("{} {}", hostname, RPC_DATA->self_addr_str()) - << std::endl; + auto line_out = fmt::format("{} {}", hostname, daemon_addr); + if(!proxy_addr.empty()) + line_out = fmt::format("{} {}", line_out, proxy_addr); + lfstream << line_out << std::endl; + if(!lfstream) { throw runtime_error( fmt::format("Failed to write on hosts file '{}': {}", diff --git a/src/proxy/CMakeLists.txt b/src/proxy/CMakeLists.txt new file mode 100644 index 000000000..774a4406a --- /dev/null +++ b/src/proxy/CMakeLists.txt @@ -0,0 +1,79 @@ +################################################################################ +# Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany # +# # +# This software was partially supported by the # +# EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # +# # +# This software was partially supported by the # +# ADA-FS project under the SPPEXA project funded by the DFG. # +# # +# This file is part of GekkoFS. # +# # +# GekkoFS is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# GekkoFS is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with GekkoFS. If not, see . # +# # +# SPDX-License-Identifier: GPL-3.0-or-later # +################################################################################ + +# ############################################################################## +# This builds the `gkfs_proxy` executable: the main GekkoFS client that forwards requests to daemons. +# ############################################################################## +add_executable(gkfs_proxy) + +target_sources(gkfs_proxy + PRIVATE + env.cpp + proxy.cpp + proxy_data.cpp + util.cpp + rpc/srv_data.cpp + rpc/srv_metadata.cpp + rpc/forward_data.cpp + rpc/forward_metadata.cpp + ../common/rpc/rpc_util.cpp + PUBLIC ${CMAKE_SOURCE_DIR}/include/config.hpp + ${CMAKE_SOURCE_DIR}/include/version.hpp.in +) +target_link_libraries( + gkfs_proxy + PUBLIC # internal libs + distributor + log_util + env_util + # external libs + CLI11::CLI11 + fmt::fmt + Mercury::Mercury + Argobots::Argobots + Margo::Margo + # others + Threads::Threads +) + +#set(PROXY_HEADERS +# ../../include/config.hpp +# ../../include/common/common_defs.hpp +# ../../include/common/rpc/rpc_types.hpp +# ../../include/common/rpc/rpc_util.hpp +# ../../include/proxy/env.hpp +# ../../include/proxy/proxy.hpp +# ../../include/proxy/proxy_data.hpp +# ../../include/proxy/util.hpp +# ../../include/proxy/rpc/forward_data.hpp +# ../../include/proxy/rpc/forward_metadata.hpp +# ../../include/proxy/rpc/rpc_defs.hpp +# ../../include/proxy/rpc/rpc_util.hpp +# ) + +install(TARGETS gkfs_proxy RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/src/proxy/env.cpp b/src/proxy/env.cpp new file mode 100644 index 000000000..95f9bd408 --- /dev/null +++ b/src/proxy/env.cpp @@ -0,0 +1,14 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#include diff --git a/src/proxy/proxy.cpp b/src/proxy/proxy.cpp new file mode 100644 index 000000000..f59dc955f --- /dev/null +++ b/src/proxy/proxy.cpp @@ -0,0 +1,388 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +using namespace std; + +static condition_variable shutdown_please; +static mutex mtx; + +struct cli_options { + string hosts_file; + string proxy_protocol; + string pid_path; +}; + +void +register_server_ipcs(margo_instance_id mid) { + MARGO_REGISTER(mid, gkfs::rpc::tag::client_proxy_write, + rpc_client_proxy_write_in_t, rpc_data_out_t, + proxy_rpc_srv_write) + MARGO_REGISTER(mid, gkfs::rpc::tag::client_proxy_read, + rpc_client_proxy_read_in_t, rpc_data_out_t, + proxy_rpc_srv_read) + MARGO_REGISTER(mid, gkfs::rpc::tag::client_proxy_chunk_stat, + rpc_chunk_stat_in_t, rpc_chunk_stat_out_t, + proxy_rpc_srv_chunk_stat) + MARGO_REGISTER(mid, gkfs::rpc::tag::client_proxy_create, rpc_mk_node_in_t, + rpc_err_out_t, proxy_rpc_srv_create) + MARGO_REGISTER(mid, gkfs::rpc::tag::client_proxy_stat, rpc_path_only_in_t, + rpc_stat_out_t, proxy_rpc_srv_stat) + MARGO_REGISTER(mid, gkfs::rpc::tag::client_proxy_remove, rpc_rm_node_in_t, + rpc_err_out_t, proxy_rpc_srv_remove) + MARGO_REGISTER(mid, gkfs::rpc::tag::client_proxy_update_size, + rpc_update_metadentry_size_in_t, + rpc_update_metadentry_size_out_t, + proxy_rpc_srv_update_metadentry_size) + MARGO_REGISTER(mid, gkfs::rpc::tag::client_proxy_get_dirents_extended, + rpc_proxy_get_dirents_in_t, rpc_get_dirents_out_t, + proxy_rpc_srv_get_dirents_extended) +} + +void +init_ipc_server() { + hg_addr_t addr_self; + hg_size_t addr_self_cstring_sz = 128; + char addr_self_cstring[128]; + struct hg_init_info hg_options = HG_INIT_INFO_INITIALIZER; + hg_options.auto_sm = HG_FALSE; + hg_options.stats = HG_FALSE; + + // Start Margo (this will also initialize Argobots and Mercury internally) + auto margo_config = fmt::format( + R"({{ "use_progress_thread" : true, "rpc_thread_count" : {} }})", + gkfs::config::rpc::proxy_handler_xstreams); + struct margo_init_info args = {nullptr}; + args.json_config = margo_config.c_str(); + args.hg_init_info = &hg_options; + auto* mid = margo_init_ext(gkfs::rpc::protocol::na_sm, MARGO_SERVER_MODE, + &args); + + // hg_options.na_class = nullptr; + // // Start Margo (this will also initialize Argobots and Mercury + // internally) auto mid = margo_init_opt(gkfs::rpc::protocol::na_sm, + // MARGO_SERVER_MODE, + // &hg_options, HG_TRUE, + // gkfs::config::rpc::proxy_handler_xstreams); + if(mid == MARGO_INSTANCE_NULL) { + throw runtime_error("Failed to initialize the Margo IPC server"); + } + // Figure out what address this server is listening on (must be freed when + // finished) + auto hret = margo_addr_self(mid, &addr_self); + if(hret != HG_SUCCESS) { + margo_finalize(mid); + throw runtime_error("Failed to retrieve server IPC address"); + } + // Convert the address to a cstring (with \0 terminator). + hret = margo_addr_to_string(mid, addr_self_cstring, &addr_self_cstring_sz, + addr_self); + if(hret != HG_SUCCESS) { + margo_addr_free(mid, addr_self); + margo_finalize(mid); + throw runtime_error("Failed to convert server IPC address to string"); + } + margo_addr_free(mid, addr_self); + + std::string addr_self_str(addr_self_cstring); + PROXY_DATA->server_self_addr(addr_self_str); + + PROXY_DATA->log()->info("{}() Accepting IPCs on address {}", __func__, + addr_self_cstring); + + // Put context and class into RPC_data object + PROXY_DATA->server_ipc_mid(mid); + + // register RPCs + register_server_ipcs(mid); +} + +void +register_client_rpcs(margo_instance_id mid) { + PROXY_DATA->rpc_client_ids().rpc_write_id = + MARGO_REGISTER(mid, gkfs::rpc::tag::proxy_daemon_write, + rpc_proxy_daemon_write_in_t, rpc_data_out_t, NULL); + PROXY_DATA->rpc_client_ids().rpc_read_id = + MARGO_REGISTER(mid, gkfs::rpc::tag::proxy_daemon_read, + rpc_proxy_daemon_read_in_t, rpc_data_out_t, NULL); + PROXY_DATA->rpc_client_ids().rpc_chunk_stat_id = + MARGO_REGISTER(mid, gkfs::rpc::tag::get_chunk_stat, + rpc_chunk_stat_in_t, rpc_chunk_stat_out_t, NULL); + PROXY_DATA->rpc_client_ids().rpc_create_id = MARGO_REGISTER( + mid, gkfs::rpc::tag::create, rpc_mk_node_in_t, rpc_err_out_t, NULL); + PROXY_DATA->rpc_client_ids().rpc_stat_id = + MARGO_REGISTER(mid, gkfs::rpc::tag::stat, rpc_path_only_in_t, + rpc_stat_out_t, NULL); + PROXY_DATA->rpc_client_ids().rpc_remove_id = + MARGO_REGISTER(mid, gkfs::rpc::tag::remove_metadata, + rpc_rm_node_in_t, rpc_rm_metadata_out_t, NULL); + PROXY_DATA->rpc_client_ids().rpc_remove_data_id = + MARGO_REGISTER(mid, gkfs::rpc::tag::remove_data, rpc_rm_node_in_t, + rpc_err_out_t, NULL); + PROXY_DATA->rpc_client_ids().rpc_update_metadentry_size_id = + MARGO_REGISTER(mid, gkfs::rpc::tag::update_metadentry_size, + rpc_update_metadentry_size_in_t, + rpc_update_metadentry_size_out_t, NULL); + PROXY_DATA->rpc_client_ids().rpc_get_dirents_extended_id = + MARGO_REGISTER(mid, gkfs::rpc::tag::get_dirents_extended, + rpc_get_dirents_in_t, rpc_get_dirents_out_t, NULL); +} + +void +init_rpc_client(const string& protocol) { + struct hg_init_info hg_options = HG_INIT_INFO_INITIALIZER; + hg_options.auto_sm = PROXY_DATA->use_auto_sm() ? HG_TRUE : HG_FALSE; + hg_options.stats = HG_FALSE; + if(gkfs::rpc::protocol::ofi_psm2 == protocol.c_str()) + hg_options.na_init_info.progress_mode = NA_NO_BLOCK; + // Start Margo (this will also initialize Argobots and Mercury internally) + auto margo_config = fmt::format( + R"({{ "use_progress_thread" : true, "rpc_thread_count" : {} }})", + 0); + struct margo_init_info args = {nullptr}; + args.json_config = margo_config.c_str(); + args.hg_init_info = &hg_options; + auto* mid = margo_init_ext(protocol.c_str(), MARGO_CLIENT_MODE, &args); + + // hg_options.na_class = nullptr; + // if(gkfs::rpc::protocol::ofi_psm2 == protocol.c_str()) + // hg_options.na_init_info.progress_mode = NA_NO_BLOCK; + // // Start Margo (this will also initialize Argobots and Mercury + // internally) auto mid = margo_init_opt(protocol.c_str(), + // MARGO_CLIENT_MODE, &hg_options, + // HG_TRUE, 0); + if(mid == MARGO_INSTANCE_NULL) { + throw runtime_error("Failed to initialize the Margo RPC client"); + } + PROXY_DATA->log()->info( + "{}() Margo RPC client initialized with protocol '{}'", __func__, + protocol); + PROXY_DATA->log()->info("{}() auto sm is set to '{}' for RPC client.", + __func__, PROXY_DATA->use_auto_sm()); + PROXY_DATA->client_rpc_mid(mid); + register_client_rpcs(mid); +} + +void +init_environment(const string& hostfile_path, const string& rpc_protocol) { + // Check if host file exists before doing anything + if(!gkfs::util::check_for_hosts_file(hostfile_path)) + throw runtime_error(fmt::format( + "Host file '{}' does not exist. Exiting ...", hostfile_path)); + + // Check if another proxy is already running + PROXY_DATA->log()->info("{}() Checking for another proxy process...", + __func__); + if(gkfs::util::is_proxy_already_running()) { + throw runtime_error("Another proxy is already running. Exiting ..."); + } + PROXY_DATA->log()->info("{}() No other proxy is running. Proceeding ...", + __func__); + + vector> hosts{}; + try { + PROXY_DATA->log()->info("{}() Loading daemon hostsfile ...", __func__); + hosts = gkfs::util::read_hosts_file(hostfile_path); + } catch(const std::exception& e) { + auto err_msg = + fmt::format("Failed to load hosts addresses: {}", e.what()); + throw runtime_error(err_msg); + } + + // Init IPC server + PROXY_DATA->log()->info("{}() Initializing IPC server...", __func__); + try { + init_ipc_server(); + } catch(const std::exception& e) { + auto err_msg = + fmt::format("Failed to initialize IPC server: {}", e.what()); + throw runtime_error(err_msg); + } + + // Init RPC client + PROXY_DATA->log()->info("{}() Initializing RPC client...", __func__); + try { + init_rpc_client(rpc_protocol); + } catch(const std::exception& e) { + auto err_msg = + fmt::format("Failed to initialize RPC client: {}", e.what()); + throw runtime_error(err_msg); + } + + // Create PID file + PROXY_DATA->log()->info("{}() Creating PID file ...", __func__); + try { + gkfs::util::create_proxy_pid_file(); + } catch(const std::exception& e) { + auto err_msg = fmt::format( + "Unexpected error: '{}' when creating PID file.", e.what()); + throw runtime_error(err_msg); + } + + // Load hosts from hostfile + try { + PROXY_DATA->log()->info( + "{}() Loading daemon addresses and looking up ...", __func__); + gkfs::util::connect_to_hosts(hosts); + } catch(const std::exception& e) { + auto err_msg = + fmt::format("Failed to load hosts addresses: '{}'", e.what()); + throw runtime_error(err_msg); + } + + // Setup SimpleDistributor + PROXY_DATA->log()->info( + "{}() Setting up simple hash distributor with local_host_id '{}' #hosts '{}'...", + __func__, PROXY_DATA->local_host_id(), + PROXY_DATA->rpc_endpoints().size()); + // TODO this needs to be globally configured because client must have same + // distribution + auto simple_hash_dist = std::make_shared( + PROXY_DATA->local_host_id(), PROXY_DATA->rpc_endpoints().size()); + PROXY_DATA->distributor(simple_hash_dist); + + PROXY_DATA->log()->info("Startup successful. Proxy is ready."); +} + +void +destroy_enviroment() { + PROXY_DATA->log()->info("{}() Closing connections ...", __func__); + for(auto& endp : PROXY_DATA->rpc_endpoints()) { + if(margo_addr_free(PROXY_DATA->client_rpc_mid(), endp.second) != + HG_SUCCESS) { + PROXY_DATA->log()->warn( + "{}() Unable to free RPC client's address: '{}'.", __func__, + endp.first); + } + } + if(PROXY_DATA->server_ipc_mid() != nullptr) { + PROXY_DATA->log()->info("{}() Finalizing margo IPC server ...", + __func__); + margo_finalize(PROXY_DATA->server_ipc_mid()); + } + if(PROXY_DATA->client_rpc_mid() != nullptr) { + PROXY_DATA->log()->info("{}() Finalizing margo RPC client ...", + __func__); + margo_finalize(PROXY_DATA->client_rpc_mid()); + } + gkfs::util::remove_proxy_pid_file(); +} + +void +shutdown_handler(int dummy) { + PROXY_DATA->log()->info("{}() Received signal: '{}'", __func__, + strsignal(dummy)); + shutdown_please.notify_all(); +} + +void +initialize_loggers() { + std::string path = gkfs::config::log::proxy_log_path; + // Try to get log path from env variable + std::string env_path_key = PROXY_ENV_PREFIX; + env_path_key += "LOG_PATH"; + char* env_path = getenv(env_path_key.c_str()); + if(env_path != nullptr) { + path = env_path; + } + + spdlog::level::level_enum level = + gkfs::log::get_level(gkfs::config::log::proxy_log_level); + // Try to get log path from env variable + std::string env_level_key = PROXY_ENV_PREFIX; + env_level_key += "LOG_LEVEL"; + char* env_level = getenv(env_level_key.c_str()); + if(env_level != nullptr) { + level = gkfs::log::get_level(env_level); + } + + auto logger_names = std::vector{ + "main", + }; + + gkfs::log::setup(logger_names, level, path); +} + +int +main(int argc, const char* argv[]) { + + CLI::App desc{"Allowed options"}; + cli_options opts{}; + // clang-format off + desc.add_option("--hosts-file,-H", opts.hosts_file, + "Path to the shared host file generated by daemons, including all daemon addresses to connect to. (default path './gkfs_hosts.txt')"); + desc.add_option("--proxy-protocol,-p", opts.proxy_protocol, + "Used protocol between proxy and daemon communication. Choose between: ofi+sockets, ofi+psm2, ofi+verbs. Default: ofi+sockets"); + desc.add_option("--pid-path,-P", opts.pid_path, + "Path to PID file where daemon registers itself for clients. Default: /tmp/gkfs_proxy.pid"); + // clang-format on + try { + desc.parse(argc, argv); + } catch(const CLI::ParseError& e) { + return desc.exit(e); + } + + initialize_loggers(); + PROXY_DATA->log(spdlog::get("main")); + + string proxy_protocol = gkfs::rpc::protocol::ofi_sockets; + if(desc.count("--proxy-protocol")) { + proxy_protocol = opts.proxy_protocol; + } + string hosts_file = gkfs::config::hostfile_path; + if(desc.count("--hosts-file")) { + hosts_file = opts.hosts_file; + } + if(desc.count("--pid-path")) { + PROXY_DATA->pid_file_path(opts.pid_path); + } + + PROXY_DATA->log()->info("{}() Initializing environment", __func__); + try { + init_environment(hosts_file, proxy_protocol); + } catch(const std::exception& e) { + auto emsg = + fmt::format("Failed to initialize environment: {}", e.what()); + PROXY_DATA->log()->error(emsg); + cerr << emsg << endl; + destroy_enviroment(); + exit(EXIT_FAILURE); + } + + signal(SIGINT, shutdown_handler); + signal(SIGTERM, shutdown_handler); + signal(SIGKILL, shutdown_handler); + + unique_lock lk(mtx); + // Wait for shutdown signal to initiate shutdown protocols + shutdown_please.wait(lk); + PROXY_DATA->log()->info("{}() Shutting down...", __func__); + destroy_enviroment(); + PROXY_DATA->log()->info("{}() Complete. Exiting...", __func__); + + return 0; +} diff --git a/src/proxy/proxy_data.cpp b/src/proxy/proxy_data.cpp new file mode 100644 index 000000000..5d93f5297 --- /dev/null +++ b/src/proxy/proxy_data.cpp @@ -0,0 +1,127 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#include + +using namespace std; + +namespace gkfs { +namespace proxy { + +const shared_ptr& +ProxyData::log() const { + return spdlogger_; +} + +void +ProxyData::log(const shared_ptr& log) { + spdlogger_ = log; +} + + +margo_instance* +ProxyData::client_rpc_mid() { + return client_rpc_mid_; +} + +void +ProxyData::client_rpc_mid(margo_instance* client_rpc_mid) { + client_rpc_mid_ = client_rpc_mid; +} + +margo_instance* +ProxyData::server_ipc_mid() { + return server_ipc_mid_; +} + +void +ProxyData::server_ipc_mid(margo_instance* server_ipc_mid) { + server_ipc_mid_ = server_ipc_mid; +} + +const string& +ProxyData::server_self_addr() const { + return server_self_addr_; +} + +void +ProxyData::server_self_addr(const string& server_self_addr) { + server_self_addr_ = server_self_addr; +} + +bool +ProxyData::use_auto_sm() const { + return use_auto_sm_; +} +void +ProxyData::use_auto_sm(bool use_auto_sm) { + use_auto_sm_ = use_auto_sm; +} + +std::map& +ProxyData::rpc_endpoints() { + return rpc_endpoints_; +} + +void +ProxyData::rpc_endpoints(const std::map& rpc_endpoints) { + rpc_endpoints_ = rpc_endpoints; +} + +uint64_t +ProxyData::hosts_size() const { + return hosts_size_; +} +void +ProxyData::hosts_size(uint64_t hosts_size) { + hosts_size_ = hosts_size; +} + +uint64_t +ProxyData::local_host_id() const { + return local_host_id_; +} + +void +ProxyData::local_host_id(uint64_t local_host_id) { + local_host_id_ = local_host_id; +} + +void +ProxyData::distributor(std::shared_ptr d) { + distributor_ = d; +} + +const string& +ProxyData::pid_file_path() const { + return pid_file_path_; +} + +void +ProxyData::pid_file_path(const string& pid_file_path) { + pid_file_path_ = pid_file_path; +} + +std::shared_ptr +ProxyData::distributor() const { + return distributor_; +} + +margo_client_ids& +ProxyData::rpc_client_ids() { + return rpc_client_ids_; +} + + +} // namespace proxy +} // namespace gkfs \ No newline at end of file diff --git a/src/proxy/rpc/forward_data.cpp b/src/proxy/rpc/forward_data.cpp new file mode 100644 index 000000000..dac2e5778 --- /dev/null +++ b/src/proxy/rpc/forward_data.cpp @@ -0,0 +1,429 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#include + +#include +#include +#include + +#include + +using namespace std; + +namespace gkfs::rpc { + +std::pair +forward_write(const std::string& path, void* buf, const int64_t offset, + const size_t write_size) { + // import pow2-optimized arithmetic functions + using namespace gkfs::utils::arithmetic; + // TODO mostly copy pasta from forward_data on client w.r.t. chunking logic + // (actually old margo code pre-hermes) + hg_bulk_t bulk_handle = nullptr; + // register local target buffer for bulk access + auto bulk_buf = buf; + auto size = make_shared(write_size); // XXX Why shared ptr? + auto ret = margo_bulk_create(PROXY_DATA->client_rpc_mid(), 1, &bulk_buf, + size.get(), HG_BULK_READ_ONLY, &bulk_handle); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Failed to create rpc bulk handle", + __func__); + return ::make_pair(EBUSY, 0); + } + auto chnk_start = block_index(offset, gkfs::config::rpc::chunksize); + auto chnk_end = block_index((offset + write_size) - 1, + gkfs::config::rpc::chunksize); + + // Collect all chunk ids within count that have the same destination so + // that those are send in one rpc bulk transfer + ::map> target_chnks{}; + // contains the target ids, used to access the target_chnks map. + // First idx is chunk with potential offset + ::vector targets{}; + + // targets for the first and last chunk as they need special treatment + uint64_t chnk_start_target = 0; + uint64_t chnk_end_target = 0; + + for(uint64_t chnk_id = chnk_start; chnk_id <= chnk_end; chnk_id++) { + auto target = PROXY_DATA->distributor()->locate_data(path, chnk_id, 0); + + if(target_chnks.count(target) == 0) { + target_chnks.insert( + std::make_pair(target, std::vector{chnk_id})); + targets.push_back(target); + } else { + target_chnks[target].push_back(chnk_id); + } + + // set first and last chnk targets + if(chnk_id == chnk_start) { + chnk_start_target = target; + } + + if(chnk_id == chnk_end) { + chnk_end_target = target; + } + } + // some helper variables for async RPC + auto target_n = targets.size(); + ::vector rpc_handles(target_n); + ::vector rpc_waiters(target_n); + ::vector rpc_in(target_n); + // Issue non-blocking RPC requests and wait for the result later + for(uint64_t i = 0; i < target_n; i++) { + auto target = targets[i]; + auto total_chunk_size = + target_chnks[target].size() * + gkfs::config::rpc::chunksize; // total chunk_size for target + if(target == chnk_start_target) // receiver of first chunk must subtract + // the offset from first chunk + total_chunk_size -= + block_overrun(offset, gkfs::config::rpc::chunksize); + // receiver of last chunk must subtract + if(target == chnk_end_target && + !is_aligned(offset + write_size, gkfs::config::rpc::chunksize)) + total_chunk_size -= block_underrun(offset + write_size, + gkfs::config::rpc::chunksize); + // Fill RPC input + rpc_in[i].path = path.c_str(); + rpc_in[i].offset = block_overrun( + offset, + gkfs::config::rpc::chunksize); // first offset in targets is the + // chunk with a potential offset + rpc_in[i].host_id = target; + rpc_in[i].host_size = PROXY_DATA->rpc_endpoints().size(); + rpc_in[i].chunk_n = + target_chnks[target] + .size(); // number of chunks handled by that destination + rpc_in[i].chunk_start = chnk_start; // chunk start id of this write + rpc_in[i].chunk_end = chnk_end; // chunk end id of this write + rpc_in[i].total_chunk_size = total_chunk_size; // total size to write + rpc_in[i].bulk_handle = bulk_handle; + PROXY_DATA->log()->trace( + "{}() Sending non-blocking RPC to '{}': path '{}' offset '{}' chunk_n '{}' chunk_start '{}' chunk_end '{}' total_chunk_size '{}'", + __func__, target, rpc_in[i].path, rpc_in[i].offset, + rpc_in[i].chunk_n, rpc_in[i].chunk_start, rpc_in[i].chunk_end, + rpc_in[i].total_chunk_size); + ret = margo_create(PROXY_DATA->client_rpc_mid(), + PROXY_DATA->rpc_endpoints().at(target), + PROXY_DATA->rpc_client_ids().rpc_write_id, + &rpc_handles[i]); + if(ret != HG_SUCCESS) { + for(uint64_t j = 0; j < i + 1; j++) { + margo_destroy(rpc_handles[j]); + } + margo_bulk_free(bulk_handle); + return ::make_pair(EBUSY, 0); + } + // Send RPC + ret = margo_iforward(rpc_handles[i], &rpc_in[i], &rpc_waiters[i]); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Unable to send non-blocking rpc for path {} and recipient {}", + __func__, path, target); + for(uint64_t j = 0; j < i + 1; j++) { + margo_destroy(rpc_handles[j]); + } + margo_bulk_free(bulk_handle); + return ::make_pair(EBUSY, 0); + } + } + PROXY_DATA->log()->debug("{}() '{}' RPCs sent, waiting for reply ...", + __func__, target_n); + // Wait for RPC responses and then get response and add it to out_size which + // is the written size All potential outputs are served to free resources + // regardless of errors, although an errorcode is set. + ssize_t out_size = 0; + int err = 0; + for(uint64_t i = 0; i < target_n; i++) { + // XXX We might need a timeout here to not wait forever for an output + // that never comes? + ret = margo_wait(rpc_waiters[i]); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Unable to wait for margo_request handle for path {} recipient {}", + __func__, path, targets[i]); + err = EBUSY; + } + // decode response + rpc_data_out_t out{}; + ret = margo_get_output(rpc_handles[i], &out); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Failed to get rpc output for path {} recipient {}", + __func__, path, targets[i]); + err = EBUSY; + } + PROXY_DATA->log()->debug( + "{}() Got response from target '{}': err '{}' with io_size '{}'", + __func__, i, out.err, out.io_size); + if(out.err != 0) + err = out.err; + else + out_size += static_cast(out.io_size); + margo_free_output(rpc_handles[i], &out); + margo_destroy(rpc_handles[i]); + } + margo_bulk_free(bulk_handle); + return ::make_pair(err, out_size); +} + +std::pair +forward_read(const std::string& path, void* buf, const int64_t offset, + const size_t read_size) { + // import pow2-optimized arithmetic functions + using namespace gkfs::utils::arithmetic; + // TODO mostly copy pasta from forward_data on client w.r.t. chunking logic + // (actually old margo code pre-hermes) + hg_bulk_t bulk_handle = nullptr; + // register local target buffer for bulk access + auto bulk_buf = buf; + auto size = make_shared(read_size); // XXX Why shared ptr? + auto ret = margo_bulk_create(PROXY_DATA->client_rpc_mid(), 1, &bulk_buf, + size.get(), HG_BULK_WRITE_ONLY, &bulk_handle); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Failed to create rpc bulk handle", + __func__); + return ::make_pair(EBUSY, 0); + } + + // Calculate chunkid boundaries and numbers so that daemons know in which + // interval to look for chunks + auto chnk_start = block_index(offset, gkfs::config::rpc::chunksize); + auto chnk_end = + block_index((offset + read_size - 1), gkfs::config::rpc::chunksize); + + // Collect all chunk ids within count that have the same destination so + // that those are send in one rpc bulk transfer + std::map> target_chnks{}; + // contains the recipient ids, used to access the target_chnks map. + // First idx is chunk with potential offset + std::vector targets{}; + + // targets for the first and last chunk as they need special treatment + uint64_t chnk_start_target = 0; + uint64_t chnk_end_target = 0; + + for(uint64_t chnk_id = chnk_start; chnk_id <= chnk_end; chnk_id++) { + auto target = PROXY_DATA->distributor()->locate_data(path, chnk_id, 0); + + if(target_chnks.count(target) == 0) { + target_chnks.insert( + std::make_pair(target, std::vector{chnk_id})); + targets.push_back(target); + } else { + target_chnks[target].push_back(chnk_id); + } + + // set first and last chnk targets + if(chnk_id == chnk_start) { + chnk_start_target = target; + } + + if(chnk_id == chnk_end) { + chnk_end_target = target; + } + } + + // some helper variables for async RPC + auto target_n = targets.size(); + vector rpc_handles(target_n); + vector rpc_waiters(target_n); + vector rpc_in(target_n); + // Issue non-blocking RPC requests and wait for the result later + for(uint64_t i = 0; i < target_n; i++) { + auto target = targets[i]; + auto total_chunk_size = + target_chnks[target].size() * gkfs::config::rpc::chunksize; + if(target == chnk_start_target) // receiver of first chunk must subtract + // the offset from first chunk + total_chunk_size -= + block_overrun(offset, gkfs::config::rpc::chunksize); + // receiver of last chunk must subtract + if(target == chnk_end_target && + !is_aligned(offset + read_size, gkfs::config::rpc::chunksize)) + total_chunk_size -= block_underrun(offset + read_size, + gkfs::config::rpc::chunksize); + + // Fill RPC input + rpc_in[i].path = path.c_str(); + rpc_in[i].offset = block_overrun( + offset, + gkfs::config::rpc::chunksize); // first offset in targets is the + // chunk with a potential offset + rpc_in[i].host_id = target; + rpc_in[i].host_size = PROXY_DATA->rpc_endpoints().size(); + rpc_in[i].chunk_n = + target_chnks[target] + .size(); // number of chunks handled by that destination + rpc_in[i].chunk_start = chnk_start; // chunk start id of this write + rpc_in[i].chunk_end = chnk_end; // chunk end id of this write + rpc_in[i].total_chunk_size = total_chunk_size; // total size to write + rpc_in[i].bulk_handle = bulk_handle; + PROXY_DATA->log()->trace( + "{}() Sending non-blocking RPC to '{}': path '{}' offset '{}' chunk_n '{}' chunk_start '{}' chunk_end '{}' total_chunk_size '{}'", + __func__, target, rpc_in[i].path, rpc_in[i].offset, + rpc_in[i].chunk_n, rpc_in[i].chunk_start, rpc_in[i].chunk_end, + rpc_in[i].total_chunk_size); + + ret = margo_create(PROXY_DATA->client_rpc_mid(), + PROXY_DATA->rpc_endpoints().at(target), + PROXY_DATA->rpc_client_ids().rpc_read_id, + &rpc_handles[i]); + if(ret != HG_SUCCESS) { + for(uint64_t j = 0; j < i + 1; j++) { + margo_destroy(rpc_handles[j]); + } + margo_bulk_free(bulk_handle); + return ::make_pair(EBUSY, 0); + } + // Send RPC + ret = margo_iforward(rpc_handles[i], &rpc_in[i], &rpc_waiters[i]); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Unable to send non-blocking rpc for path {} and recipient {}", + __func__, path, target); + for(uint64_t j = 0; j < i + 1; j++) { + margo_destroy(rpc_handles[j]); + } + margo_bulk_free(bulk_handle); + return ::make_pair(EBUSY, 0); + } + } + + PROXY_DATA->log()->debug("{}() '{}' RPCs sent, waiting for reply ...", + __func__, target_n); + // Wait for RPC responses and then get response and add it to out_size which + // is the written size All potential outputs are served to free resources + // regardless of errors, although an errorcode is set. + ssize_t out_size = 0; + int err = 0; + for(uint64_t i = 0; i < target_n; i++) { + // XXX We might need a timeout here to not wait forever for an output + // that never comes? + ret = margo_wait(rpc_waiters[i]); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Unable to wait for margo_request handle for path {} recipient {}", + __func__, path, targets[i]); + err = EBUSY; + } + // decode response + rpc_data_out_t out{}; + ret = margo_get_output(rpc_handles[i], &out); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Failed to get rpc output for path {} recipient {}", + __func__, path, targets[i]); + err = EBUSY; + } + PROXY_DATA->log()->debug( + "{}() Got response from target '{}': err '{}' with io_size '{}'", + __func__, i, out.err, out.io_size); + if(out.err != 0) + err = out.err; + else + out_size += static_cast(out.io_size); + margo_free_output(rpc_handles[i], &out); + margo_destroy(rpc_handles[i]); + } + margo_bulk_free(bulk_handle); + return ::make_pair(err, out_size); +} + +pair +forward_get_chunk_stat() { + int err = 0; + hg_return ret{}; + // Create handle + PROXY_DATA->log()->debug("{}() Creating Margo handle ...", __func__); + + // some helper variables for async RPC + auto target_n = PROXY_DATA->hosts_size(); + vector rpc_handles(target_n); + vector rpc_waiters(target_n); + vector rpc_in(target_n); + for(uint64_t i = 0; i < target_n; i++) { + ret = margo_create(PROXY_DATA->client_rpc_mid(), + PROXY_DATA->rpc_endpoints().at(i), + PROXY_DATA->rpc_client_ids().rpc_chunk_stat_id, + &rpc_handles[i]); + // XXX Don't think this is useful here cause responds go into nothing + if(ret != HG_SUCCESS) { + for(uint64_t j = 0; j < i + 1; j++) { + margo_destroy(rpc_handles[j]); + } + return ::make_pair(EBUSY, ChunkStat{}); + } + // Send RPC + rpc_in[i].dummy = 0; + ret = margo_iforward(rpc_handles[i], &rpc_in[i], &rpc_waiters[i]); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Unable to send non-blocking rpc for recipient {}", + __func__, i); + for(uint64_t j = 0; j < i + 1; j++) { + margo_destroy(rpc_handles[j]); + } + return ::make_pair(EBUSY, ChunkStat{}); + } + } + PROXY_DATA->log()->debug("{}() '{}' RPCs sent, waiting for reply ...", + __func__, target_n); + // Wait for RPC responses and then get response and add it to out_size which + // is the written size All potential outputs are served to free resources + // regardless of errors, although an errorcode is set. + unsigned long chunk_size = gkfs::config::rpc::chunksize; + unsigned long chunk_total = 0; + unsigned long chunk_free = 0; + for(uint64_t i = 0; i < target_n; i++) { + // XXX We might need a timeout here to not wait forever for an output + // that never comes? + ret = margo_wait(rpc_waiters[i]); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Unable to wait for margo_request handle for recipient {}", + __func__, i); + err = EBUSY; + } + // decode response + rpc_chunk_stat_out_t daemon_out{}; + ret = margo_get_output(rpc_handles[i], &daemon_out); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Failed to get rpc output for recipient {}", __func__, + i); + err = EBUSY; + } + PROXY_DATA->log()->debug( + "{}() Got response from target '{}': err '{}' with chunk_total '{}' chunk_free '{}'", + __func__, i, daemon_out.err, daemon_out.chunk_total, + daemon_out.chunk_free); + if(daemon_out.err != 0) + err = daemon_out.err; + else { + chunk_total += daemon_out.chunk_total; + chunk_free += daemon_out.chunk_free; + } + margo_free_output(rpc_handles[i], &daemon_out); + margo_destroy(rpc_handles[i]); + } + if(err) + return make_pair(err, ChunkStat{}); + else + return make_pair(0, ChunkStat{chunk_size, chunk_total, chunk_free}); +} + +} // namespace gkfs::rpc \ No newline at end of file diff --git a/src/proxy/rpc/forward_metadata.cpp b/src/proxy/rpc/forward_metadata.cpp new file mode 100644 index 000000000..abd34f4e9 --- /dev/null +++ b/src/proxy/rpc/forward_metadata.cpp @@ -0,0 +1,373 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#include +#include +#include + +#include + +using namespace std; + +namespace { + +std::tuple +remove_metadata(const std::string& path) { + hg_handle_t rpc_handle = nullptr; + rpc_rm_node_in_t daemon_in{}; + rpc_rm_metadata_out_t daemon_out{}; + int err = 0; + int64_t size = 0; + uint32_t mode = 0; + // fill in + daemon_in.path = path.c_str(); + // Create handle + PROXY_DATA->log()->debug("{}() Creating Margo handle ...", __func__); + auto endp = PROXY_DATA->rpc_endpoints().at( + PROXY_DATA->distributor()->locate_file_metadata(path, 0)); + auto ret = margo_create(PROXY_DATA->client_rpc_mid(), endp, + PROXY_DATA->rpc_client_ids().rpc_remove_id, + &rpc_handle); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Critical error", __func__); + return make_tuple(EBUSY, 0, 0); + ; + } + ret = margo_forward(rpc_handle, &daemon_in); + if(ret == HG_SUCCESS) { + // Get response + PROXY_DATA->log()->trace("{}() Waiting for response", __func__); + ret = margo_get_output(rpc_handle, &daemon_out); + if(ret == HG_SUCCESS) { + PROXY_DATA->log()->debug("{}() Got response success: {}", __func__, + daemon_out.err); + mode = daemon_out.mode; + size = daemon_out.size; + err = daemon_out.err; + margo_free_output(rpc_handle, &daemon_out); + } else { + // something is wrong + err = EBUSY; + PROXY_DATA->log()->error("{}() while getting rpc output", __func__); + } + } else { + // something is wrong + err = EBUSY; + PROXY_DATA->log()->error("{}() Critical error", __func__); + } + + /* clean up resources consumed by this rpc */ + margo_destroy(rpc_handle); + return make_tuple(err, size, mode); +} + +int +remove_data(const std::string& path) { + int err = 0; + // Create handles + vector rpc_handles(PROXY_DATA->hosts_size()); + vector rpc_waiters(PROXY_DATA->hosts_size()); + vector rpc_in(PROXY_DATA->hosts_size()); + for(size_t i = 0; i < PROXY_DATA->hosts_size(); i++) { + rpc_in[i].path = path.c_str(); + PROXY_DATA->log()->trace( + "{}() Sending non-blocking RPC to '{}': path '{}' ", __func__, + i, rpc_in[i].path); + auto ret = margo_create(PROXY_DATA->client_rpc_mid(), + PROXY_DATA->rpc_endpoints().at(i), + PROXY_DATA->rpc_client_ids().rpc_remove_data_id, + &rpc_handles[i]); + if(ret != HG_SUCCESS) { + for(uint64_t j = 0; j < i + 1; j++) { + margo_destroy(rpc_handles[j]); + } + return EBUSY; + } + // Send RPC + ret = margo_iforward(rpc_handles[i], &rpc_in[i], &rpc_waiters[i]); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Unable to send non-blocking rpc for path {} and recipient {}", + __func__, path, i); + for(uint64_t j = 0; j < i + 1; j++) { + margo_destroy(rpc_handles[j]); + } + return EBUSY; + } + } + PROXY_DATA->log()->debug("{}() '{}' RPCs sent, waiting for reply ...", + __func__, PROXY_DATA->hosts_size()); + // Wait for RPC responses and then get response + for(uint64_t i = 0; i < PROXY_DATA->hosts_size(); i++) { + auto ret = margo_wait(rpc_waiters[i]); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Unable to wait for margo_request handle for path {} recipient {}", + __func__, path, i); + err = EBUSY; + } + // decode response + rpc_err_out_t out{}; + ret = margo_get_output(rpc_handles[i], &out); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Failed to get rpc output for path {} recipient {}", + __func__, path, i); + err = EBUSY; + } + PROXY_DATA->log()->debug("{}() Got response from target '{}': err '{}'", + __func__, i, out.err); + if(out.err != 0) + err = out.err; + margo_free_output(rpc_handles[i], &out); + margo_destroy(rpc_handles[i]); + } + return err; +} +} // namespace + +namespace gkfs::rpc { + +int +forward_create(const std::string& path, const mode_t mode) { + hg_handle_t rpc_handle = nullptr; + rpc_mk_node_in_t daemon_in{}; + rpc_err_out_t daemon_out{}; + int err = 0; + // fill in + daemon_in.path = path.c_str(); + daemon_in.mode = mode; + // Create handle + PROXY_DATA->log()->debug("{}() Creating Margo handle ...", __func__); + auto endp = PROXY_DATA->rpc_endpoints().at( + PROXY_DATA->distributor()->locate_file_metadata(path, 0)); + auto ret = margo_create(PROXY_DATA->client_rpc_mid(), endp, + PROXY_DATA->rpc_client_ids().rpc_create_id, + &rpc_handle); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Critical error", __func__); + return EBUSY; + } + ret = margo_forward(rpc_handle, &daemon_in); + if(ret == HG_SUCCESS) { + // Get response + PROXY_DATA->log()->trace("{}() Waiting for response", __func__); + ret = margo_get_output(rpc_handle, &daemon_out); + if(ret == HG_SUCCESS) { + PROXY_DATA->log()->debug("{}() Got response success: {}", __func__, + daemon_out.err); + err = daemon_out.err; + margo_free_output(rpc_handle, &daemon_out); + } else { + // something is wrong + err = EBUSY; + PROXY_DATA->log()->error("{}() while getting rpc output", __func__); + } + } else { + // something is wrong + err = EBUSY; + PROXY_DATA->log()->error("{}() sending rpc failed", __func__); + } + + /* clean up resources consumed by this rpc */ + margo_destroy(rpc_handle); + return err; +} + +std::pair +forward_stat(const std::string& path) { + hg_handle_t rpc_handle = nullptr; + rpc_path_only_in_t daemon_in{}; + rpc_stat_out_t daemon_out{}; + int err = 0; + string attr{}; + // fill in + daemon_in.path = path.c_str(); + // Create handle + PROXY_DATA->log()->debug("{}() Creating Margo handle ...", __func__); + auto endp = PROXY_DATA->rpc_endpoints().at( + PROXY_DATA->distributor()->locate_file_metadata(path, 0)); + auto ret = + margo_create(PROXY_DATA->client_rpc_mid(), endp, + PROXY_DATA->rpc_client_ids().rpc_stat_id, &rpc_handle); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Critical error", __func__); + return make_pair(EBUSY, attr); + } + ret = margo_forward(rpc_handle, &daemon_in); + if(ret == HG_SUCCESS) { + // Get response + PROXY_DATA->log()->trace("{}() Waiting for response", __func__); + ret = margo_get_output(rpc_handle, &daemon_out); + if(ret == HG_SUCCESS) { + PROXY_DATA->log()->debug("{}() Got response success: {}", __func__, + daemon_out.err); + err = daemon_out.err; + if(err == 0) + attr = daemon_out.db_val; + margo_free_output(rpc_handle, &daemon_out); + } else { + // something is wrong + err = EBUSY; + PROXY_DATA->log()->error("{}() while getting rpc output", __func__); + } + } else { + // something is wrong + err = EBUSY; + PROXY_DATA->log()->error("{}() sending rpc failed", __func__); + } + + /* clean up resources consumed by this rpc */ + margo_destroy(rpc_handle); + return make_pair(err, attr); +} + +int +forward_remove(const std::string& path) { + auto [err, mode, size] = remove_metadata(path); + if(err != 0) { + return err; + } + // if file is not a regular file and it's size is 0, data does not need to + // be removed, thus, we exit + if(!(S_ISREG(mode) && (size != 0))) { + return 0; + } + return remove_data(path); +} + +pair +forward_update_metadentry_size(const string& path, const size_t size, + const off64_t offset, const bool append_flag) { + hg_handle_t rpc_handle = nullptr; + rpc_update_metadentry_size_in_t daemon_in{}; + rpc_update_metadentry_size_out_t daemon_out{}; + int err = 0; + off64_t ret_offset = 0; + // fill in + daemon_in.path = path.c_str(); + daemon_in.size = size; + daemon_in.offset = offset; + daemon_in.append = append_flag; + // Create handle + PROXY_DATA->log()->debug("{}() Creating Margo handle ...", __func__); + auto endp = PROXY_DATA->rpc_endpoints().at( + PROXY_DATA->distributor()->locate_file_metadata(path, 0)); + auto ret = margo_create( + PROXY_DATA->client_rpc_mid(), endp, + PROXY_DATA->rpc_client_ids().rpc_update_metadentry_size_id, + &rpc_handle); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Critical error", __func__); + return make_pair(EBUSY, 0); + } + ret = margo_forward(rpc_handle, &daemon_in); + if(ret == HG_SUCCESS) { + // Get response + PROXY_DATA->log()->trace("{}() Waiting for response", __func__); + ret = margo_get_output(rpc_handle, &daemon_out); + if(ret == HG_SUCCESS) { + PROXY_DATA->log()->debug( + "{}() Got response success: err {} ret_offset {}", __func__, + daemon_out.err, daemon_out.ret_offset); + err = daemon_out.err; + ret_offset = daemon_out.ret_offset; + margo_free_output(rpc_handle, &daemon_out); + } else { + // something is wrong + err = EBUSY; + PROXY_DATA->log()->error("{}() while getting rpc output", __func__); + } + } else { + // something is wrong + err = EBUSY; + PROXY_DATA->log()->error("{}() sending rpc failed", __func__); + } + + /* clean up resources consumed by this rpc */ + margo_destroy(rpc_handle); + return make_pair(err, ret_offset); +} + +pair +forward_get_dirents_single(const std::string& path, int server, void* buf, + size_t bulk_size) { + hg_bulk_t bulk_handle = nullptr; + hg_handle_t rpc_handle = nullptr; + rpc_get_dirents_in_t daemon_in{}; + // register local target buffer for bulk access + auto* bulk_buf = buf; + auto size = make_shared(bulk_size); // XXX Why shared ptr? + auto ret = margo_bulk_create(PROXY_DATA->client_rpc_mid(), 1, &bulk_buf, + size.get(), HG_BULK_WRITE_ONLY, &bulk_handle); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Failed to create rpc bulk handle", + __func__); + return ::make_pair(EBUSY, 0); + } + daemon_in.path = path.c_str(); + daemon_in.bulk_handle = bulk_handle; + auto* endp = PROXY_DATA->rpc_endpoints().at(server); + ret = margo_create(PROXY_DATA->client_rpc_mid(), endp, + PROXY_DATA->rpc_client_ids().rpc_get_dirents_extended_id, + &rpc_handle); + if(ret != HG_SUCCESS) { + margo_destroy(rpc_handle); + margo_bulk_free(bulk_handle); + return ::make_pair(EBUSY, 0); + } + // Send RPC + margo_request rpc_waiter{}; + ret = margo_iforward(rpc_handle, &daemon_in, &rpc_waiter); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Unable to send non-blocking rpc for path {} and recipient {}", + __func__, path, server); + margo_destroy(rpc_handle); + margo_bulk_free(bulk_handle); + return ::make_pair(EBUSY, 0); + } + PROXY_DATA->log()->debug("{}() 1 RPC sent, waiting for reply ...", + __func__); + int err = 0; + size_t dirents_size = 0; + ret = margo_wait(rpc_waiter); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Unable to wait for margo_request handle for path {} recipient {}", + __func__, path, server); + err = EBUSY; + } + // decode response + rpc_get_dirents_out_t daemon_out{}; + ret = margo_get_output(rpc_handle, &daemon_out); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Failed to get rpc output for path {} recipient {}", + __func__, path, server); + err = EBUSY; + } + PROXY_DATA->log()->debug( + "{}() Got response from target '{}': err '{}' with dirent_size '{}'", + __func__, server, daemon_out.err, daemon_out.dirents_size); + if(daemon_out.err != 0) + err = daemon_out.err; + else + dirents_size = daemon_out.dirents_size; + margo_free_output(rpc_handle, &daemon_out); + margo_destroy(rpc_handle); + margo_bulk_free(bulk_handle); + return ::make_pair(err, dirents_size); +} + +} // namespace gkfs::rpc diff --git a/src/proxy/rpc/srv_data.cpp b/src/proxy/rpc/srv_data.cpp new file mode 100644 index 000000000..49549e367 --- /dev/null +++ b/src/proxy/rpc/srv_data.cpp @@ -0,0 +1,207 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#include +#include +#include +#include + +#include + +using namespace std; + +/** + * RPC handler for an incoming write RPC + * @param handle + * @return + */ +static hg_return_t +proxy_rpc_srv_write(hg_handle_t handle) { + + rpc_client_proxy_write_in_t client_in{}; + rpc_data_out_t client_out{}; + client_out.err = EIO; + client_out.io_size = 0; + hg_bulk_t bulk_handle = nullptr; + auto ret = margo_get_input(handle, &client_in); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Failed to retrieve input from handle", + __func__); + client_out.err = EBUSY; + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); + } + + auto hgi = margo_get_info(handle); + auto mid = margo_hg_handle_get_instance(handle); + auto bulk_size = margo_bulk_get_size(client_in.bulk_handle); + assert(bulk_size == client_in.write_size); + PROXY_DATA->log()->debug( + "{}() Got RPC with path '{}' bulk_size '{}' == write_size '{}'", + __func__, client_in.path, bulk_size, client_in.write_size); + /* + * Set up buffer and pull from client + */ + void* bulk_buf; // buffer for bulk transfer + // create bulk handle and allocated memory for buffer with buf_sizes + // information + ret = margo_bulk_create(mid, 1, nullptr, &bulk_size, HG_BULK_READWRITE, + &bulk_handle); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Failed to create bulk handle", __func__); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); + } + // access the internally allocated memory buffer and put it into buf_ptrs + uint32_t actual_count; // number of segments. we use one here because we + // pull the whole buffer at once + ret = margo_bulk_access(bulk_handle, 0, bulk_size, HG_BULK_READWRITE, 1, + &bulk_buf, &bulk_size, &actual_count); + if(ret != HG_SUCCESS || actual_count != 1) { + PROXY_DATA->log()->error( + "{}() Failed to access allocated buffer from bulk handle", + __func__); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out, + &bulk_handle); + } + // pull data from client here + ret = margo_bulk_transfer(mid, HG_BULK_PULL, hgi->addr, + client_in.bulk_handle, 0, bulk_handle, 0, + bulk_size); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Failed to pull data from client for path '{}' with size '{}'", + __func__, client_in.path, bulk_size); + client_out.err = EBUSY; + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out, + &bulk_handle); + } + + // Forward request to daemon, using bulk_buf, containing the pulled data + // (which is pulled again by the daemon) + auto daemon_out = gkfs::rpc::forward_write(client_in.path, bulk_buf, + client_in.offset, bulk_size); + client_out.err = daemon_out.first; + client_out.io_size = daemon_out.second; + PROXY_DATA->log()->debug("{}() Sending output err '{}' io_size '{}'", + __func__, client_out.err, client_out.io_size); + + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out, + &bulk_handle); +} + +DEFINE_MARGO_RPC_HANDLER(proxy_rpc_srv_write) + +static hg_return_t +proxy_rpc_srv_read(hg_handle_t handle) { + rpc_client_proxy_read_in_t client_in{}; + rpc_data_out_t client_out{}; + client_out.err = EIO; + client_out.io_size = 0; + hg_bulk_t bulk_handle = nullptr; + auto ret = margo_get_input(handle, &client_in); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Failed to retrieve input from handle", + __func__); + client_out.err = EBUSY; + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); + } + + auto hgi = margo_get_info(handle); + auto mid = margo_hg_handle_get_instance(handle); + auto bulk_size = margo_bulk_get_size(client_in.bulk_handle); + assert(bulk_size == client_in.read_size); + PROXY_DATA->log()->debug( + "{}() Got RPC with path '{}' bulk_size '{}' == read_size '{}'", + __func__, client_in.path, bulk_size, client_in.read_size); + /* + * Set up buffer for push from daemon + */ + void* bulk_buf; // buffer for bulk transfer + // create bulk handle and allocated memory for buffer with buf_sizes + // information + ret = margo_bulk_create(mid, 1, nullptr, &bulk_size, HG_BULK_READWRITE, + &bulk_handle); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Failed to create bulk handle", __func__); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); + } + // access the internally allocated memory buffer + uint32_t actual_count; // number of segments. we use one here because we + // pull the whole buffer at once + ret = margo_bulk_access(bulk_handle, 0, bulk_size, HG_BULK_READWRITE, 1, + &bulk_buf, &bulk_size, &actual_count); + if(ret != HG_SUCCESS || actual_count != 1) { + PROXY_DATA->log()->error( + "{}() Failed to access allocated buffer from bulk handle", + __func__); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out, + &bulk_handle); + } + // Forward request to daemon, using bulk_buf, containing the allocated + // buffer (which is pushed the data by the daemon) + auto daemon_out = gkfs::rpc::forward_read(client_in.path, bulk_buf, + client_in.offset, bulk_size); + if(daemon_out.first != 0) { + PROXY_DATA->log()->error( + "{}() Failure when forwarding to daemon with err '{}' and iosize '{}'", + __func__, daemon_out.first, daemon_out.second); + client_out.err = daemon_out.first; + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out, + &bulk_handle); + } + // Push data to client here if no error was reported by the daemon + ret = margo_bulk_transfer(mid, HG_BULK_PUSH, hgi->addr, + client_in.bulk_handle, 0, bulk_handle, 0, + bulk_size); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Failed to push data from client for path '{}' with size '{}'", + __func__, client_in.path, bulk_size); + client_out.err = EBUSY; + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out, + &bulk_handle); + } + + client_out.err = daemon_out.first; + client_out.io_size = daemon_out.second; + PROXY_DATA->log()->debug("{}() Sending output err '{}' io_size '{}'", + __func__, client_out.err, client_out.io_size); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out, + &bulk_handle); +} + +DEFINE_MARGO_RPC_HANDLER(proxy_rpc_srv_read) + +static hg_return_t +proxy_rpc_srv_chunk_stat(hg_handle_t handle) { + rpc_chunk_stat_in_t client_in{}; + rpc_chunk_stat_out_t client_out{}; + auto ret = margo_get_input(handle, &client_in); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Failed to retrieve input from handle", + __func__); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); + } + PROXY_DATA->log()->debug("{}() Got chunk stat RPC ", __func__); + + auto daemon_out = gkfs::rpc::forward_get_chunk_stat(); + client_out.err = daemon_out.first; + client_out.chunk_free = daemon_out.second.chunk_free; + client_out.chunk_total = daemon_out.second.chunk_total; + client_out.chunk_size = daemon_out.second.chunk_size; + + PROXY_DATA->log()->debug("{}() Sending output err '{}'", __func__, + client_out.err); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); +} + +DEFINE_MARGO_RPC_HANDLER(proxy_rpc_srv_chunk_stat) \ No newline at end of file diff --git a/src/proxy/rpc/srv_metadata.cpp b/src/proxy/rpc/srv_metadata.cpp new file mode 100644 index 000000000..6c6277a5b --- /dev/null +++ b/src/proxy/rpc/srv_metadata.cpp @@ -0,0 +1,207 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#include +#include +#include +#include + +#include + +static hg_return_t +proxy_rpc_srv_create(hg_handle_t handle) { + rpc_mk_node_in_t client_in{}; + rpc_err_out_t client_out{}; + + auto ret = margo_get_input(handle, &client_in); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Failed to retrieve input from handle", + __func__); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); + } + PROXY_DATA->log()->debug("{}() Got RPC with path '{}'", __func__, + client_in.path); + + client_out.err = gkfs::rpc::forward_create(client_in.path, client_in.mode); + + PROXY_DATA->log()->debug("{}() Sending output err '{}'", __func__, + client_out.err); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); +} + +DEFINE_MARGO_RPC_HANDLER(proxy_rpc_srv_create) + +static hg_return_t +proxy_rpc_srv_stat(hg_handle_t handle) { + rpc_path_only_in_t client_in{}; + rpc_stat_out_t client_out{}; + + auto ret = margo_get_input(handle, &client_in); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Failed to retrieve input from handle", + __func__); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); + } + PROXY_DATA->log()->debug("{}() Got RPC with path '{}'", __func__, + client_in.path); + + auto out = gkfs::rpc::forward_stat(client_in.path); + client_out.err = out.first; + client_out.db_val = out.second.c_str(); + + PROXY_DATA->log()->debug("{}() Sending output err '{}'", __func__, + client_out.err); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); +} + +DEFINE_MARGO_RPC_HANDLER(proxy_rpc_srv_stat) + +static hg_return_t +proxy_rpc_srv_remove(hg_handle_t handle) { + rpc_rm_node_in_t client_in{}; + rpc_err_out_t client_out{}; + + auto ret = margo_get_input(handle, &client_in); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Failed to retrieve input from handle", + __func__); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); + } + PROXY_DATA->log()->debug("{}() Got RPC with path '{}'", __func__, + client_in.path); + client_out.err = gkfs::rpc::forward_remove(client_in.path); + + PROXY_DATA->log()->debug("{}() Sending output err '{}'", __func__, + client_out.err); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); +} + +DEFINE_MARGO_RPC_HANDLER(proxy_rpc_srv_remove) + +static hg_return_t +proxy_rpc_srv_update_metadentry_size(hg_handle_t handle) { + + rpc_update_metadentry_size_in_t client_in{}; + rpc_update_metadentry_size_out_t client_out{}; + + + auto ret = margo_get_input(handle, &client_in); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Failed to retrieve input from handle", + __func__); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); + } + PROXY_DATA->log()->debug( + "{}() path: '{}', size: '{}', offset: '{}', append: '{}'", __func__, + client_in.path, client_in.size, client_in.offset, client_in.append); + + try { + auto [err, ret_offset] = gkfs::rpc::forward_update_metadentry_size( + client_in.path, client_in.size, client_in.offset, + client_in.append); + + client_out.err = 0; + client_out.ret_offset = ret_offset; + } catch(const std::exception& e) { + PROXY_DATA->log()->error( + "{}() Failed to update metadentry size RPC: '{}'", __func__, + e.what()); + client_out.err = EBUSY; + } + + PROXY_DATA->log()->debug("{}() Sending output err '{}' ret_offset '{}'", + __func__, client_out.err, client_out.ret_offset); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); +} + +DEFINE_MARGO_RPC_HANDLER(proxy_rpc_srv_update_metadentry_size) + +static hg_return_t +proxy_rpc_srv_get_dirents_extended(hg_handle_t handle) { + + rpc_proxy_get_dirents_in_t client_in{}; + rpc_get_dirents_out_t client_out{}; + hg_bulk_t bulk_handle = nullptr; + + auto ret = margo_get_input(handle, &client_in); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Failed to retrieve input from handle", + __func__); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); + } + PROXY_DATA->log()->debug("{}() path: '{}', server: '{}'", __func__, + client_in.path, client_in.server); + + auto hgi = margo_get_info(handle); + auto mid = margo_hg_handle_get_instance(handle); + auto bulk_size = margo_bulk_get_size(client_in.bulk_handle); + PROXY_DATA->log()->debug("{}() Got RPC with path '{}' bulk_size '{}'", + __func__, client_in.path, bulk_size); + /* + * Set up buffer for push from daemon + */ + void* bulk_buf; // buffer for bulk transfer + // create bulk handle and allocated memory for buffer with buf_sizes + // information + ret = margo_bulk_create(mid, 1, nullptr, &bulk_size, HG_BULK_READWRITE, + &bulk_handle); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Failed to create bulk handle", __func__); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); + } + // access the internally allocated memory buffer + uint32_t actual_count; // number of segments. we use one here because we + // pull the whole buffer at once + ret = margo_bulk_access(bulk_handle, 0, bulk_size, HG_BULK_READWRITE, 1, + &bulk_buf, &bulk_size, &actual_count); + if(ret != HG_SUCCESS || actual_count != 1) { + PROXY_DATA->log()->error( + "{}() Failed to access allocated buffer from bulk handle", + __func__); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out, + &bulk_handle); + } + // Forward request to daemon, using bulk_buf, containing the allocated + // buffer (which is pushed the data by the daemon) + auto daemon_out = gkfs::rpc::forward_get_dirents_single( + client_in.path, client_in.server, bulk_buf, bulk_size); + if(daemon_out.first != 0) { + PROXY_DATA->log()->error( + "{}() Failure when forwarding to daemon with err '{}'", + __func__, daemon_out.first); + client_out.err = daemon_out.first; + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out, + &bulk_handle); + } + // Push data to client here if no error was reported by the daemon + ret = margo_bulk_transfer(mid, HG_BULK_PUSH, hgi->addr, + client_in.bulk_handle, 0, bulk_handle, 0, + bulk_size); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Failed to push data from client for path '{}' with size '{}'", + __func__, client_in.path, bulk_size); + client_out.err = EBUSY; + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out, + &bulk_handle); + } + + client_out.err = daemon_out.first; + client_out.dirents_size = daemon_out.second; + PROXY_DATA->log()->debug("{}() Sending output err '{}' dirents_size '{}'", + __func__, client_out.err, client_out.dirents_size); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out, + &bulk_handle); +} + +DEFINE_MARGO_RPC_HANDLER(proxy_rpc_srv_get_dirents_extended) diff --git a/src/proxy/util.cpp b/src/proxy/util.cpp new file mode 100644 index 000000000..2a85376bf --- /dev/null +++ b/src/proxy/util.cpp @@ -0,0 +1,246 @@ +/* + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +extern "C" { +#include +} + +using namespace std; +namespace fs = std::filesystem; + +namespace { + +vector> +load_hostfile(const std::string& lfpath) { + + + PROXY_DATA->log()->debug("{}() Loading hosts file: '{}'", __func__, lfpath); + + ifstream lf(lfpath); + if(!lf) { + throw runtime_error(fmt::format("Failed to open hosts file '{}': {}", + lfpath, strerror(errno))); + } + vector> hosts; + const regex line_re("^(\\S+)\\s+(\\S+)\\s*(\\S*)$", + regex::ECMAScript | regex::optimize); + string line; + string host; + string uri; + std::smatch match; + while(getline(lf, line)) { + if(!regex_match(line, match, line_re)) { + PROXY_DATA->log()->debug( + "{}() Unrecognized line format: [path: '{}', line: '{}']", + __func__, lfpath, line); + + throw runtime_error( + fmt::format("unrecognized line format: '{}'", line)); + } + host = match[1]; + if(match.size() < 3) { + throw runtime_error(fmt::format( + "hostfile does not have three columns for daemon RPC proxy server")); + } + uri = match[3]; + if(!PROXY_DATA->use_auto_sm() && + uri.find("na+sm") != std::string::npos) { + PROXY_DATA->use_auto_sm(true); + PROXY_DATA->log()->info( + "{}() auto_sm detected in daemon hosefile. Enabling it on proxy ...", + __func__); + } + + hosts.emplace_back(host, uri); + } + return hosts; +} +} // namespace + +namespace gkfs::util { + +bool +is_proxy_already_running() { + const auto& pid_path = PROXY_DATA->pid_file_path(); + + // check if another proxy is already running + if(fs::exists(pid_path)) { + ifstream ifs(pid_path, ::ifstream::in); + if(ifs) { + string running_pid{}; + if(getline(ifs, running_pid) && !running_pid.empty()) { + // check if process exists without killing it. Signal 0 doesn't + // kill + if(0 == ::kill(::stoi(running_pid), 0)) + return true; + } + } else { + throw runtime_error( + "FATAL: pid file of another proxy already exists, but cannot be opened. Exiting ..."); + } + ifs.close(); + fs::remove(pid_path); + } + return false; +} + +/** + * Create pid file with na+sm address. + * At the moment this is not NUMA-aware. + * E.g., if two PSM2 devices exist, one on each socket, it would be best to use + * two proxies but since PSM2_MULTIRAIL 2 doesn't work properly, this is a + * future TODO + */ +void +create_proxy_pid_file() { + /* + * - na+sm pid address + * - file name socket (numa node) getcpu() call in #include + * - only allow one per socket + */ + const auto& pid_path = PROXY_DATA->pid_file_path(); + auto my_pid = getpid(); + if(my_pid == -1) { + throw runtime_error("Unable to get own pid for proxy pid file"); + } + ofstream ofs(pid_path, ::ofstream::trunc); + if(ofs) { + ofs << to_string(my_pid); + ofs << "\n"; + ofs << PROXY_DATA->server_self_addr(); + } else { + throw runtime_error("Unable to create proxy pid file"); + } +} + +void +remove_proxy_pid_file() { + const auto& pid_path = PROXY_DATA->pid_file_path(); + fs::remove(pid_path); +} + +bool +check_for_hosts_file(const std::string& hostfile) { + return fs::exists(hostfile); +} + + +vector> +read_hosts_file(const std::string& hostfile) { + + vector> hosts; + try { + hosts = load_hostfile(hostfile); + } catch(const exception& e) { + auto emsg = fmt::format("Failed to load hosts file: {}", e.what()); + throw runtime_error(emsg); + } + + if(hosts.empty()) { + throw runtime_error(fmt::format("Hostfile empty: '{}'", hostfile)); + } + + PROXY_DATA->log()->info("{}() Daemon pool size: '{}'", __func__, + hosts.size()); + + return hosts; +} + +void +connect_to_hosts(const vector>& hosts) { + auto local_hostname = gkfs::rpc::get_my_hostname(true); + bool local_host_found = false; + + PROXY_DATA->hosts_size(hosts.size()); + vector host_ids(hosts.size()); + // populate vector with [0, ..., host_size - 1] + ::iota(::begin(host_ids), ::end(host_ids), 0); + /* + * Shuffle hosts to balance addr lookups to all hosts + * Too many concurrent lookups send to same host + * could overwhelm the server, + * returning error when addr lookup + */ + ::random_device rd; // obtain a random number from hardware + ::mt19937 g(rd()); // seed the random generator + ::shuffle(host_ids.begin(), host_ids.end(), g); // Shuffle hosts vector + // lookup addresses and put abstract server addresses into rpc_addresses + for(const auto& id : host_ids) { + const auto& hostname = hosts.at(id).first; + const auto& uri = hosts.at(id).second; + + hg_addr_t svr_addr = HG_ADDR_NULL; + + // try to look up 3 times before erroring out + hg_return_t ret; + for(uint32_t i = 0; i < 4; i++) { + ret = margo_addr_lookup(PROXY_DATA->client_rpc_mid(), uri.c_str(), + &svr_addr); + if(ret != HG_SUCCESS) { + // still not working after 5 tries. + if(i == 4) { + auto err_msg = + fmt::format("{}() Unable to lookup address '{}'", + __func__, uri); + throw runtime_error(err_msg); + } + // Wait a random amount of time and try again + ::mt19937 eng(rd()); // seed the random generator + ::uniform_int_distribution<> distr( + 50, 50 * (i + 2)); // define the range + ::this_thread::sleep_for(std::chrono::milliseconds(distr(eng))); + } else { + break; + } + } + if(svr_addr == HG_ADDR_NULL) { + auto err_msg = fmt::format( + "{}() looked up address is NULL for address '{}'", __func__, + uri); + throw runtime_error(err_msg); + } + PROXY_DATA->rpc_endpoints().insert(make_pair(id, svr_addr)); + + if(!local_host_found && hostname == local_hostname) { + PROXY_DATA->log()->debug("{}() Found local host: {}", __func__, + hostname); + PROXY_DATA->local_host_id(id); + local_host_found = true; + } + PROXY_DATA->log()->debug("{}() Found daemon: id '{}' uri '{}'", + __func__, id, uri); + } + + if(!local_host_found) { + PROXY_DATA->log()->warn( + "{}() Failed to find local host. Using host '0' as local host", + __func__); + PROXY_DATA->local_host_id(0); + } +} + +} // namespace gkfs::util \ No newline at end of file -- GitLab From 853ff94fa7f9b78a5e7b91c3ad1717287a81f088 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Mon, 22 Apr 2024 15:53:38 +0200 Subject: [PATCH 02/24] Added Proxy truncate() support for Client <-> Proxy --- include/client/rpc/forward_data_proxy.hpp | 4 + include/client/rpc/rpc_types.hpp | 140 +++++++++++++++++++++- include/common/common_defs.hpp | 1 + include/common/rpc/rpc_types.hpp | 4 +- include/config.hpp | 1 + include/proxy/rpc/rpc_defs.hpp | 2 + src/client/gkfs_functions.cpp | 9 +- src/client/rpc/forward_data_proxy.cpp | 35 +++++- src/client/rpc/rpc_types.cpp | 2 + src/proxy/proxy.cpp | 3 + src/proxy/rpc/srv_data.cpp | 116 ++++++++++++++++++ 11 files changed, 304 insertions(+), 13 deletions(-) diff --git a/include/client/rpc/forward_data_proxy.hpp b/include/client/rpc/forward_data_proxy.hpp index 52659eab8..6f8c3f8f0 100644 --- a/include/client/rpc/forward_data_proxy.hpp +++ b/include/client/rpc/forward_data_proxy.hpp @@ -26,6 +26,10 @@ std::pair forward_read_proxy(const std::string& path, void* buf, off64_t offset, size_t read_size); +int +forward_truncate_proxy(const std::string& path, size_t current_size, + size_t new_size); + std::pair forward_get_chunk_stat_proxy(); diff --git a/include/client/rpc/rpc_types.hpp b/include/client/rpc/rpc_types.hpp index 4f8567550..a34ed9496 100644 --- a/include/client/rpc/rpc_types.hpp +++ b/include/client/rpc/rpc_types.hpp @@ -2574,6 +2574,134 @@ struct read_data_proxy { }; }; +//============================================================================== +// definitions for chunk_stat_proxy +struct trunc_data_proxy { + + // forward declarations of public input/output types for this RPC + class input; + + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = trunc_data_proxy; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_client_proxy_trunc_in_t; + using mercury_output_type = rpc_err_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 22; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = 0; + + // RPC name + constexpr static const auto name = gkfs::rpc::tag::client_proxy_truncate; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_client_proxy_trunc_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_err_out_t); + + class input { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, uint64_t current_size, uint64_t length) + : m_path(path), m_current_size(current_size), m_length(length) {} + + input(input&& rhs) = default; + + input(const input& other) = default; + + input& + operator=(input&& rhs) = default; + + input& + operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + uint64_t + current_size() const { + return m_current_size; + } + + uint64_t + length() const { + return m_length; + } + + explicit input(const rpc_client_proxy_trunc_in_t& other) + : m_path(other.path), m_current_size(other.current_size), + m_length(other.length) {} + + explicit + operator rpc_client_proxy_trunc_in_t() { + return { + m_path.c_str(), + m_current_size, + m_length, + }; + } + + private: + std::string m_path; + uint64_t m_current_size; + uint64_t m_length; + }; + + class output { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : m_err() {} + + output(int32_t err) : m_err(err) {} + + output(output&& rhs) = default; + + output(const output& other) = default; + + output& + operator=(output&& rhs) = default; + + output& + operator=(const output& other) = default; + + explicit output(const rpc_err_out_t& out) { + m_err = out.err; + } + + int32_t + err() const { + return m_err; + } + + private: + int32_t m_err; + }; +}; + //============================================================================== // definitions for chunk_stat_proxy struct chunk_stat_proxy { @@ -2594,7 +2722,7 @@ struct chunk_stat_proxy { // RPC public identifier // (N.B: we reuse the same IDs assigned by Margo so that the daemon // understands Hermes RPCs) - constexpr static const uint64_t public_id = 22; + constexpr static const uint64_t public_id = 23; // RPC internal Mercury identifier constexpr static const hg_id_t mercury_id = 0; @@ -2728,7 +2856,7 @@ struct create_proxy { // RPC public identifier // (N.B: we reuse the same IDs assigned by Margo so that the daemon // understands Hermes RPCs) - constexpr static const uint64_t public_id = 23; + constexpr static const uint64_t public_id = 24; // RPC internal Mercury identifier constexpr static const hg_id_t mercury_id = 0; @@ -2845,7 +2973,7 @@ struct stat_proxy { // RPC public identifier // (N.B: we reuse the same IDs assigned by Margo so that the daemon // understands Hermes RPCs) - constexpr static const uint64_t public_id = 24; + constexpr static const uint64_t public_id = 25; // RPC internal Mercury identifier constexpr static const hg_id_t mercury_id = 0; @@ -2965,7 +3093,7 @@ struct remove_proxy { // RPC public identifier // (N.B: we reuse the same IDs assigned by Margo so that the daemon // understands Hermes RPCs) - constexpr static const uint64_t public_id = 25; + constexpr static const uint64_t public_id = 26; // RPC internal Mercury identifier constexpr static const hg_id_t mercury_id = 0; @@ -3074,7 +3202,7 @@ struct update_metadentry_size_proxy { // RPC public identifier // (N.B: we reuse the same IDs assigned by Margo so that the daemon // understands Hermes RPCs) - constexpr static const uint64_t public_id = 26; + constexpr static const uint64_t public_id = 27; // RPC internal Mercury identifier constexpr static const hg_id_t mercury_id = 0; @@ -3213,7 +3341,7 @@ struct get_dirents_extended_proxy { // RPC public identifier // (N.B: we reuse the same IDs assigned by Margo so that the daemon // understands Hermes RPCs) - constexpr static const uint64_t public_id = 27; + constexpr static const uint64_t public_id = 28; // RPC internal Mercury identifier constexpr static const hg_id_t mercury_id = 0; diff --git a/include/common/common_defs.hpp b/include/common/common_defs.hpp index 8f1630092..9b6c79d28 100644 --- a/include/common/common_defs.hpp +++ b/include/common/common_defs.hpp @@ -69,6 +69,7 @@ constexpr auto client_proxy_update_size = "proxy_rpc_srv_update_metadentry_size"; constexpr auto client_proxy_write = "proxy_rpc_srv_write_data"; constexpr auto client_proxy_read = "proxy_rpc_srv_read_data"; +constexpr auto client_proxy_truncate = "proxy_rpc_srv_truncate"; constexpr auto client_proxy_chunk_stat = "proxy_rpc_srv_chunk_stat"; constexpr auto client_proxy_get_dirents_extended = "proxy_rpc_srv_get_dirents_extended"; diff --git a/include/common/rpc/rpc_types.hpp b/include/common/rpc/rpc_types.hpp index 8e4eacf52..e247d14ac 100644 --- a/include/common/rpc/rpc_types.hpp +++ b/include/common/rpc/rpc_types.hpp @@ -136,7 +136,9 @@ MERCURY_GEN_PROC(rpc_client_proxy_read_in_t, ((hg_const_string_t) (path))( (int64_t) (offset)) // file offset, NOT chunk offset ((hg_uint64_t) (read_size))((hg_bulk_t) (bulk_handle))) - +MERCURY_GEN_PROC(rpc_client_proxy_trunc_in_t, + ((hg_const_string_t) (path))((hg_uint64_t) (current_size))( + (hg_uint64_t) (length))) // proxy <-> daemon MERCURY_GEN_PROC( diff --git a/include/config.hpp b/include/config.hpp index a2ba8ecf7..0f3d2a0e1 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -124,6 +124,7 @@ constexpr auto fwd_stat = true; constexpr auto fwd_remove = true; constexpr auto fwd_update_size = true; constexpr auto fwd_io = true; +constexpr auto fwd_truncate = true; constexpr auto fwd_chunk_stat = true; constexpr auto fwd_get_dirents_single = true; // Only use proxy for io if write/read size is higher than set value diff --git a/include/proxy/rpc/rpc_defs.hpp b/include/proxy/rpc/rpc_defs.hpp index e2d83e9e3..36721d023 100644 --- a/include/proxy/rpc/rpc_defs.hpp +++ b/include/proxy/rpc/rpc_defs.hpp @@ -33,6 +33,8 @@ DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_read) DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_write) +DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_truncate) + DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_chunk_stat) DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_get_dirents_extended) diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 0a4c14264..e1ea22e83 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -768,8 +768,13 @@ gkfs_truncate(const std::string& path, off_t old_size, off_t new_size) { } } - auto err = gkfs::rpc::forward_truncate(path, old_size, new_size, - CTX->get_replicas()); + int err = 0; + if(gkfs::config::proxy::fwd_truncate && CTX->use_proxy()) { + err = gkfs::rpc::forward_truncate_proxy(path, old_size, new_size); + } else { + err = gkfs::rpc::forward_truncate(path, old_size, new_size, + CTX->get_replicas()); + } if(err) { LOG(DEBUG, "Failed to truncate data"); errno = err; diff --git a/src/client/rpc/forward_data_proxy.cpp b/src/client/rpc/forward_data_proxy.cpp index fc25df982..00d1251b5 100644 --- a/src/client/rpc/forward_data_proxy.cpp +++ b/src/client/rpc/forward_data_proxy.cpp @@ -72,8 +72,8 @@ forward_write_proxy(const string& path, const void* buf, off64_t offset, gkfs::rpc::write_data_proxy::input in(path, offset, write_size, local_buffers); - LOG(DEBUG, "proxy-host: {}, path: '{}', size: {}, offset: {}", endp.to_string(), - path, in.write_size(), in.offset()); + LOG(DEBUG, "proxy-host: {}, path: '{}', size: {}, offset: {}", + endp.to_string(), path, in.write_size(), in.offset()); // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that // we can retry for RPC_TRIES (see old commits with margo) @@ -135,8 +135,8 @@ forward_read_proxy(const string& path, void* buf, const off64_t offset, gkfs::rpc::read_data_proxy::input in(path, offset, read_size, local_buffers); - LOG(DEBUG, "proxy-host: {}, path: '{}', size: {}, offset: {}", endp.to_string(), - path, in.read_size(), in.offset()); + LOG(DEBUG, "proxy-host: {}, path: '{}', size: {}, offset: {}", + endp.to_string(), path, in.read_size(), in.offset()); // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that // we can retry for RPC_TRIES (see old commits with margo) @@ -165,6 +165,33 @@ forward_read_proxy(const string& path, void* buf, const off64_t offset, return make_pair(0, out_size); } +int +forward_truncate_proxy(const std::string& path, size_t current_size, + size_t new_size) { + auto endp = CTX->proxy_host(); + gkfs::rpc::trunc_data_proxy::input in(path, current_size, new_size); + + try { + LOG(DEBUG, "Sending RPC ..."); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we + // can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + + auto out = ld_proxy_service->post(endp, in) + .get() + .at(0); + LOG(DEBUG, "Got response success: {}", out.err()); + + return out.err() ? out.err() : 0; + + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); + return EBUSY; + } +} + pair forward_get_chunk_stat_proxy() { auto endp = CTX->proxy_host(); diff --git a/src/client/rpc/rpc_types.cpp b/src/client/rpc/rpc_types.cpp index 0b5c4869d..d97e2c6df 100644 --- a/src/client/rpc/rpc_types.cpp +++ b/src/client/rpc/rpc_types.cpp @@ -65,6 +65,8 @@ hermes::detail::register_user_request_types(uint32_t provider_id) { provider_id); (void) registered_requests().add( provider_id); + (void) registered_requests().add( + provider_id); (void) registered_requests().add( provider_id); (void) registered_requests().add(provider_id); diff --git a/src/proxy/proxy.cpp b/src/proxy/proxy.cpp index f59dc955f..f72fca744 100644 --- a/src/proxy/proxy.cpp +++ b/src/proxy/proxy.cpp @@ -45,6 +45,9 @@ register_server_ipcs(margo_instance_id mid) { MARGO_REGISTER(mid, gkfs::rpc::tag::client_proxy_read, rpc_client_proxy_read_in_t, rpc_data_out_t, proxy_rpc_srv_read) + MARGO_REGISTER(mid, gkfs::rpc::tag::client_proxy_truncate, + rpc_client_proxy_trunc_in_t, rpc_err_out_t, + proxy_rpc_srv_truncate) MARGO_REGISTER(mid, gkfs::rpc::tag::client_proxy_chunk_stat, rpc_chunk_stat_in_t, rpc_chunk_stat_out_t, proxy_rpc_srv_chunk_stat) diff --git a/src/proxy/rpc/srv_data.cpp b/src/proxy/rpc/srv_data.cpp index 49549e367..65078ceb2 100644 --- a/src/proxy/rpc/srv_data.cpp +++ b/src/proxy/rpc/srv_data.cpp @@ -181,6 +181,122 @@ proxy_rpc_srv_read(hg_handle_t handle) { DEFINE_MARGO_RPC_HANDLER(proxy_rpc_srv_read) +/** + * RPC handler for an incoming write RPC + * @param handle + * @return + */ +static hg_return_t +proxy_rpc_srv_truncate(hg_handle_t handle) { + rpc_client_proxy_trunc_in_t client_in{}; + rpc_err_out_t client_out{}; + + auto ret = margo_get_input(handle, &client_in); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Failed to retrieve input from handle", + __func__); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); + } + PROXY_DATA->log()->debug( + "{}() Got RPC with path '{}' current_size '{}' length '{}'", + __func__, client_in.path, client_in.current_size, client_in.length); + client_out.err = EIO; + // client_out.err = gkfs::rpc::forward_create(client_in.path, + // client_in.mode); + + PROXY_DATA->log()->debug("{}() Sending output err '{}'", __func__, + client_out.err); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); +} + +DEFINE_MARGO_RPC_HANDLER(proxy_rpc_srv_truncate) + + +// int trunc_data(const std::string& path, size_t current_size, size_t new_size) +// { +// assert(current_size > new_size); +// hg_return_t ret; +// rpc_trunc_in_t in; +// in.path = path.c_str(); +// in.length = new_size; +// +// bool error = false; +// +// // Find out which data server needs to delete chunks in order to contact +// only them const unsigned int chunk_start = chnk_id_for_offset(new_size, +// CHUNKSIZE); const unsigned int chunk_end = +// chnk_id_for_offset(current_size - new_size - 1, CHUNKSIZE); +// std::unordered_set hosts; +// for(unsigned int chunk_id = chunk_start; chunk_id <= chunk_end; +// ++chunk_id) { +// hosts.insert(CTX->distributor()->locate_data(path, chunk_id)); +// } +// +// std::vector rpc_handles(hosts.size()); +// std::vector rpc_waiters(hosts.size()); +// unsigned int req_num = 0; +// for (const auto& host: hosts) { +// ret = margo_create_wrap_helper(rpc_trunc_data_id, host, +// rpc_handles[req_num]); if (ret != HG_SUCCESS) { +// CTX->log()->error("{}() Unable to create Mercury handle for host: +// ", __func__, host); break; +// } +// +// // send async rpc +// ret = margo_iforward(rpc_handles[req_num], &in, +// &rpc_waiters[req_num]); if (ret != HG_SUCCESS) { +// CTX->log()->error("{}() Failed to send request to host: {}", +// __func__, host); break; +// } +// ++req_num; +// } +// +// if(req_num < hosts.size()) { +// // An error occurred. Cleanup and return +// CTX->log()->error("{}() Error -> sent only some requests {}/{}. +// Cancelling request...", __func__, req_num, hosts.size()); +// for(unsigned int i = 0; i < req_num; ++i) { +// margo_destroy(rpc_handles[i]); +// } +// errno = EIO; +// return -1; +// } +// +// assert(req_num == hosts.size()); +// // Wait for RPC responses and then get response +// rpc_err_out_t out{}; +// for (unsigned int i = 0; i < hosts.size(); ++i) { +// ret = margo_wait(rpc_waiters[i]); +// if (ret == HG_SUCCESS) { +// ret = margo_get_output(rpc_handles[i], &out); +// if (ret == HG_SUCCESS) { +// if(out.err){ +// CTX->log()->error("{}() received error response: {}", +// __func__, out.err); error = true; +// } +// } else { +// // Get output failed +// CTX->log()->error("{}() while getting rpc output", __func__); +// error = true; +// } +// } else { +// // Wait failed +// CTX->log()->error("{}() Failed while waiting for response", +// __func__); error = true; +// } +// +// /* clean up resources consumed by this rpc */ +// margo_free_output(rpc_handles[i], &out); +// margo_destroy(rpc_handles[i]); +// } +// +// if(error) { +// errno = EIO; +// return -1; +// } +// return 0; +// } + static hg_return_t proxy_rpc_srv_chunk_stat(hg_handle_t handle) { rpc_chunk_stat_in_t client_in{}; -- GitLab From 9978d338642cffb4edf90ae2f12f521107f3ec96 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Mon, 22 Apr 2024 15:54:43 +0200 Subject: [PATCH 03/24] Added Proxy truncate() support for Proxy <-> Daemon --- include/proxy/proxy_data.hpp | 1 + include/proxy/rpc/forward_data.hpp | 3 + src/daemon/daemon.cpp | 2 + src/proxy/proxy.cpp | 2 + src/proxy/rpc/forward_data.cpp | 103 +++++++++++++++++++++++++++++ src/proxy/rpc/srv_data.cpp | 92 +------------------------- 6 files changed, 114 insertions(+), 89 deletions(-) diff --git a/include/proxy/proxy_data.hpp b/include/proxy/proxy_data.hpp index 7122a449f..3b7fbc270 100644 --- a/include/proxy/proxy_data.hpp +++ b/include/proxy/proxy_data.hpp @@ -32,6 +32,7 @@ struct margo_client_ids { hg_id_t rpc_update_metadentry_size_id; hg_id_t rpc_write_id; hg_id_t rpc_read_id; + hg_id_t rpc_truncate_id; hg_id_t rpc_chunk_stat_id; hg_id_t rpc_get_dirents_extended_id; }; diff --git a/include/proxy/rpc/forward_data.hpp b/include/proxy/rpc/forward_data.hpp index 85f3cd63a..68efc0762 100644 --- a/include/proxy/rpc/forward_data.hpp +++ b/include/proxy/rpc/forward_data.hpp @@ -27,6 +27,9 @@ std::pair forward_read(const std::string& path, void* buf, int64_t offset, size_t read_size); +int +forward_truncate(const std::string& path, size_t current_size, size_t new_size); + std::pair forward_get_chunk_stat(); diff --git a/src/daemon/daemon.cpp b/src/daemon/daemon.cpp index 9d0180632..5a55ab0a5 100644 --- a/src/daemon/daemon.cpp +++ b/src/daemon/daemon.cpp @@ -258,6 +258,8 @@ register_proxy_server_rpcs(margo_instance_id mid) { MARGO_REGISTER(mid, gkfs::rpc::tag::get_dirents_extended, rpc_get_dirents_in_t, rpc_get_dirents_out_t, rpc_srv_get_dirents_extended); + MARGO_REGISTER(mid, gkfs::rpc::tag::truncate, rpc_trunc_in_t, rpc_err_out_t, + rpc_srv_truncate); // proxy daemon specific RPCs MARGO_REGISTER(mid, gkfs::rpc::tag::proxy_daemon_write, rpc_proxy_daemon_write_in_t, rpc_data_out_t, diff --git a/src/proxy/proxy.cpp b/src/proxy/proxy.cpp index f72fca744..cc1cb7344 100644 --- a/src/proxy/proxy.cpp +++ b/src/proxy/proxy.cpp @@ -132,6 +132,8 @@ register_client_rpcs(margo_instance_id mid) { PROXY_DATA->rpc_client_ids().rpc_read_id = MARGO_REGISTER(mid, gkfs::rpc::tag::proxy_daemon_read, rpc_proxy_daemon_read_in_t, rpc_data_out_t, NULL); + PROXY_DATA->rpc_client_ids().rpc_truncate_id = MARGO_REGISTER( + mid, gkfs::rpc::tag::truncate, rpc_trunc_in_t, rpc_err_out_t, NULL); PROXY_DATA->rpc_client_ids().rpc_chunk_stat_id = MARGO_REGISTER(mid, gkfs::rpc::tag::get_chunk_stat, rpc_chunk_stat_in_t, rpc_chunk_stat_out_t, NULL); diff --git a/src/proxy/rpc/forward_data.cpp b/src/proxy/rpc/forward_data.cpp index dac2e5778..d24470e87 100644 --- a/src/proxy/rpc/forward_data.cpp +++ b/src/proxy/rpc/forward_data.cpp @@ -18,6 +18,7 @@ #include #include +#include using namespace std; @@ -343,6 +344,108 @@ forward_read(const std::string& path, void* buf, const int64_t offset, return ::make_pair(err, out_size); } +int +forward_truncate(const std::string& path, size_t current_size, + size_t new_size) { + + rpc_trunc_in_t daemon_in{}; + rpc_err_out_t daemon_out{}; + hg_return_t ret{}; + bool err = false; + // fill in + daemon_in.path = path.c_str(); + daemon_in.length = new_size; + + // import pow2-optimized arithmetic functions + using namespace gkfs::utils::arithmetic; + + // Find out which data servers need to delete data chunks in order to + // contact only them + const unsigned int chunk_start = + block_index(new_size, gkfs::config::rpc::chunksize); + const unsigned int chunk_end = block_index(current_size - new_size - 1, + gkfs::config::rpc::chunksize); + + std::unordered_set hosts; + for(unsigned int chunk_id = chunk_start; chunk_id <= chunk_end; + ++chunk_id) { + hosts.insert(PROXY_DATA->distributor()->locate_data(path, chunk_id, 0)); + } + // some helper variables for async RPC + vector rpc_handles(hosts.size()); + vector rpc_waiters(hosts.size()); + unsigned int req_num = 0; + // Issue non-blocking RPC requests and wait for the result later + for(const auto& host : hosts) { + + ret = margo_create(PROXY_DATA->client_rpc_mid(), + PROXY_DATA->rpc_endpoints().at(host), + PROXY_DATA->rpc_client_ids().rpc_truncate_id, + &rpc_handles[req_num]); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Unable to create Mercury handle for host: ", __func__, + host); + break; + } + // Send RPC + ret = margo_iforward(rpc_handles[req_num], &daemon_in, + &rpc_waiters[req_num]); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error( + "{}() Unable to send non-blocking rpc for path {} and recipient {}", + __func__, path, host); + break; + } + req_num++; + } + if(req_num < hosts.size()) { + // An error occurred. Cleanup and return + PROXY_DATA->log()->error( + "{}() Error -> sent only some requests {}/{}. Cancelling request...", + __func__, req_num, hosts.size()); + for(unsigned int i = 0; i < req_num; ++i) { + margo_destroy(rpc_handles[i]); + } + // TODO Ideally wait for dangling responses + return EIO; + } + // Wait for RPC responses and then get response + for(unsigned int i = 0; i < hosts.size(); ++i) { + ret = margo_wait(rpc_waiters[i]); + if(ret == HG_SUCCESS) { + ret = margo_get_output(rpc_handles[i], &daemon_out); + if(ret == HG_SUCCESS) { + if(daemon_out.err) { + PROXY_DATA->log()->error("{}() received error response: {}", + __func__, daemon_out.err); + err = true; + } + } else { + // Get output failed + PROXY_DATA->log()->error("{}() while getting rpc output", + __func__); + err = true; + } + } else { + // Wait failed + PROXY_DATA->log()->error("{}() Failed while waiting for response", + __func__); + err = true; + } + + /* clean up resources consumed by this rpc */ + margo_free_output(rpc_handles[i], &daemon_out); + margo_destroy(rpc_handles[i]); + } + + if(err) { + errno = EBUSY; + return -1; + } + return 0; +} + pair forward_get_chunk_stat() { int err = 0; diff --git a/src/proxy/rpc/srv_data.cpp b/src/proxy/rpc/srv_data.cpp index 65078ceb2..9704825ce 100644 --- a/src/proxy/rpc/srv_data.cpp +++ b/src/proxy/rpc/srv_data.cpp @@ -200,9 +200,9 @@ proxy_rpc_srv_truncate(hg_handle_t handle) { PROXY_DATA->log()->debug( "{}() Got RPC with path '{}' current_size '{}' length '{}'", __func__, client_in.path, client_in.current_size, client_in.length); - client_out.err = EIO; - // client_out.err = gkfs::rpc::forward_create(client_in.path, - // client_in.mode); + + client_out.err = gkfs::rpc::forward_truncate( + client_in.path, client_in.current_size, client_in.length); PROXY_DATA->log()->debug("{}() Sending output err '{}'", __func__, client_out.err); @@ -211,92 +211,6 @@ proxy_rpc_srv_truncate(hg_handle_t handle) { DEFINE_MARGO_RPC_HANDLER(proxy_rpc_srv_truncate) - -// int trunc_data(const std::string& path, size_t current_size, size_t new_size) -// { -// assert(current_size > new_size); -// hg_return_t ret; -// rpc_trunc_in_t in; -// in.path = path.c_str(); -// in.length = new_size; -// -// bool error = false; -// -// // Find out which data server needs to delete chunks in order to contact -// only them const unsigned int chunk_start = chnk_id_for_offset(new_size, -// CHUNKSIZE); const unsigned int chunk_end = -// chnk_id_for_offset(current_size - new_size - 1, CHUNKSIZE); -// std::unordered_set hosts; -// for(unsigned int chunk_id = chunk_start; chunk_id <= chunk_end; -// ++chunk_id) { -// hosts.insert(CTX->distributor()->locate_data(path, chunk_id)); -// } -// -// std::vector rpc_handles(hosts.size()); -// std::vector rpc_waiters(hosts.size()); -// unsigned int req_num = 0; -// for (const auto& host: hosts) { -// ret = margo_create_wrap_helper(rpc_trunc_data_id, host, -// rpc_handles[req_num]); if (ret != HG_SUCCESS) { -// CTX->log()->error("{}() Unable to create Mercury handle for host: -// ", __func__, host); break; -// } -// -// // send async rpc -// ret = margo_iforward(rpc_handles[req_num], &in, -// &rpc_waiters[req_num]); if (ret != HG_SUCCESS) { -// CTX->log()->error("{}() Failed to send request to host: {}", -// __func__, host); break; -// } -// ++req_num; -// } -// -// if(req_num < hosts.size()) { -// // An error occurred. Cleanup and return -// CTX->log()->error("{}() Error -> sent only some requests {}/{}. -// Cancelling request...", __func__, req_num, hosts.size()); -// for(unsigned int i = 0; i < req_num; ++i) { -// margo_destroy(rpc_handles[i]); -// } -// errno = EIO; -// return -1; -// } -// -// assert(req_num == hosts.size()); -// // Wait for RPC responses and then get response -// rpc_err_out_t out{}; -// for (unsigned int i = 0; i < hosts.size(); ++i) { -// ret = margo_wait(rpc_waiters[i]); -// if (ret == HG_SUCCESS) { -// ret = margo_get_output(rpc_handles[i], &out); -// if (ret == HG_SUCCESS) { -// if(out.err){ -// CTX->log()->error("{}() received error response: {}", -// __func__, out.err); error = true; -// } -// } else { -// // Get output failed -// CTX->log()->error("{}() while getting rpc output", __func__); -// error = true; -// } -// } else { -// // Wait failed -// CTX->log()->error("{}() Failed while waiting for response", -// __func__); error = true; -// } -// -// /* clean up resources consumed by this rpc */ -// margo_free_output(rpc_handles[i], &out); -// margo_destroy(rpc_handles[i]); -// } -// -// if(error) { -// errno = EIO; -// return -1; -// } -// return 0; -// } - static hg_return_t proxy_rpc_srv_chunk_stat(hg_handle_t handle) { rpc_chunk_stat_in_t client_in{}; -- GitLab From 296b8d9ce7af2e4b33e2d7e08a35280de070e814 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Mon, 22 Apr 2024 16:18:05 +0200 Subject: [PATCH 04/24] Added Proxy decr_size support (truncate) for Client <-> Proxy --- include/client/rpc/forward_metadata_proxy.hpp | 3 + include/client/rpc/rpc_types.hpp | 121 +++++++++++++++++- include/common/common_defs.hpp | 1 + include/proxy/rpc/forward_metadata.hpp | 3 + include/proxy/rpc/rpc_defs.hpp | 2 + src/client/gkfs_functions.cpp | 24 ++-- src/client/rpc/forward_metadata_proxy.cpp | 25 ++++ src/client/rpc/rpc_types.cpp | 2 + src/proxy/proxy.cpp | 2 + src/proxy/rpc/forward_metadata.cpp | 5 + src/proxy/rpc/srv_metadata.cpp | 23 ++++ 11 files changed, 201 insertions(+), 10 deletions(-) diff --git a/include/client/rpc/forward_metadata_proxy.hpp b/include/client/rpc/forward_metadata_proxy.hpp index 57c34afb3..b8a58c8a3 100644 --- a/include/client/rpc/forward_metadata_proxy.hpp +++ b/include/client/rpc/forward_metadata_proxy.hpp @@ -25,6 +25,9 @@ forward_stat_proxy(const std::string& path, std::string& attr); int forward_remove_proxy(const std::string& path); +int +forward_decr_size_proxy(const std::string& path, size_t length); + std::pair forward_update_metadentry_size_proxy(const std::string& path, const size_t size, const off64_t offset, diff --git a/include/client/rpc/rpc_types.hpp b/include/client/rpc/rpc_types.hpp index a34ed9496..496241fb3 100644 --- a/include/client/rpc/rpc_types.hpp +++ b/include/client/rpc/rpc_types.hpp @@ -3182,6 +3182,123 @@ struct remove_proxy { }; }; +//============================================================================== +// definitions for decr_size_proxy +struct decr_size_proxy { + + // forward declarations of public input/output types for this RPC + class input; + + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = decr_size_proxy; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_trunc_in_t; + using mercury_output_type = rpc_err_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 27; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = 0; + + // RPC name + constexpr static const auto name = gkfs::rpc::tag::client_proxy_decr_size; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_trunc_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_err_out_t); + + class input { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, uint64_t length) + : m_path(path), m_length(length) {} + + input(input&& rhs) = default; + + input(const input& other) = default; + + input& + operator=(input&& rhs) = default; + + input& + operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + uint64_t + length() const { + return m_length; + } + + explicit input(const rpc_trunc_in_t& other) + : m_path(other.path), m_length(other.length) {} + + explicit + operator rpc_trunc_in_t() { + return {m_path.c_str(), m_length}; + } + + private: + std::string m_path; + uint64_t m_length; + }; + + class output { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : m_err() {} + + output(int32_t err) : m_err(err) {} + + output(output&& rhs) = default; + + output(const output& other) = default; + + output& + operator=(output&& rhs) = default; + + output& + operator=(const output& other) = default; + + explicit output(const rpc_err_out_t& out) { + m_err = out.err; + } + + int32_t + err() const { + return m_err; + } + + private: + int32_t m_err; + }; +}; + //============================================================================== // definitions for update_metadentry_size struct update_metadentry_size_proxy { @@ -3202,7 +3319,7 @@ struct update_metadentry_size_proxy { // RPC public identifier // (N.B: we reuse the same IDs assigned by Margo so that the daemon // understands Hermes RPCs) - constexpr static const uint64_t public_id = 27; + constexpr static const uint64_t public_id = 28; // RPC internal Mercury identifier constexpr static const hg_id_t mercury_id = 0; @@ -3341,7 +3458,7 @@ struct get_dirents_extended_proxy { // RPC public identifier // (N.B: we reuse the same IDs assigned by Margo so that the daemon // understands Hermes RPCs) - constexpr static const uint64_t public_id = 28; + constexpr static const uint64_t public_id = 29; // RPC internal Mercury identifier constexpr static const hg_id_t mercury_id = 0; diff --git a/include/common/common_defs.hpp b/include/common/common_defs.hpp index 9b6c79d28..cd1aad3c3 100644 --- a/include/common/common_defs.hpp +++ b/include/common/common_defs.hpp @@ -65,6 +65,7 @@ constexpr auto get_chunk_stat = "rpc_srv_chunk_stat"; constexpr auto client_proxy_create = "proxy_rpc_srv_create"; constexpr auto client_proxy_stat = "proxy_rpc_srv_stat"; constexpr auto client_proxy_remove = "proxy_rpc_srv_remove"; +constexpr auto client_proxy_decr_size = "proxy_rpc_srv_decr_size"; constexpr auto client_proxy_update_size = "proxy_rpc_srv_update_metadentry_size"; constexpr auto client_proxy_write = "proxy_rpc_srv_write_data"; diff --git a/include/proxy/rpc/forward_metadata.hpp b/include/proxy/rpc/forward_metadata.hpp index 77fe00f1e..0ba2fbdc3 100644 --- a/include/proxy/rpc/forward_metadata.hpp +++ b/include/proxy/rpc/forward_metadata.hpp @@ -27,6 +27,9 @@ forward_stat(const std::string& path); int forward_remove(const std::string& path); +int +forward_decr_size(const std::string& path, size_t length); + std::pair forward_update_metadentry_size(const std::string& path, const size_t size, const off64_t offset, const bool append_flag); diff --git a/include/proxy/rpc/rpc_defs.hpp b/include/proxy/rpc/rpc_defs.hpp index 36721d023..20718e635 100644 --- a/include/proxy/rpc/rpc_defs.hpp +++ b/include/proxy/rpc/rpc_defs.hpp @@ -27,6 +27,8 @@ DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_stat) DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_remove) +DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_decr_size) + DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_update_metadentry_size) DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_read) diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index e1ea22e83..c5befd250 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -759,16 +759,24 @@ gkfs_truncate(const std::string& path, off_t old_size, off_t new_size) { if(new_size == old_size) { return 0; } - for(auto copy = 0; copy < (CTX->get_replicas() + 1); copy++) { - auto err = gkfs::rpc::forward_decr_size(path, new_size, copy); - if(err) { - LOG(DEBUG, "Failed to decrease size"); - errno = err; - return -1; + int err = 0; + // decrease size on metadata server first + if(gkfs::config::proxy::fwd_truncate && CTX->use_proxy()) { + err = gkfs::rpc::forward_decr_size_proxy(path, new_size); + } else { + for(auto copy = 0; copy < (CTX->get_replicas() + 1); copy++) { + err = gkfs::rpc::forward_decr_size(path, new_size, copy); + if(err) { + break; + } } } - - int err = 0; + if(err) { + LOG(DEBUG, "Failed to decrease size"); + errno = err; + return -1; + } + // truncate chunks to new_size next if(gkfs::config::proxy::fwd_truncate && CTX->use_proxy()) { err = gkfs::rpc::forward_truncate_proxy(path, old_size, new_size); } else { diff --git a/src/client/rpc/forward_metadata_proxy.cpp b/src/client/rpc/forward_metadata_proxy.cpp index 2271ad943..a3a51f540 100644 --- a/src/client/rpc/forward_metadata_proxy.cpp +++ b/src/client/rpc/forward_metadata_proxy.cpp @@ -98,6 +98,31 @@ forward_remove_proxy(const std::string& path) { } } +int +forward_decr_size_proxy(const std::string& path, size_t length) { + auto endp = CTX->proxy_host(); + + try { + LOG(DEBUG, "Sending RPC ..."); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we + // can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = + ld_proxy_service + ->post(endp, path, length) + .get() + .at(0); + LOG(DEBUG, "Got response success: {}", out.err()); + + return out.err() ? out.err() : 0; + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); + return EBUSY; + } +} + pair forward_update_metadentry_size_proxy(const string& path, const size_t size, const off64_t offset, diff --git a/src/client/rpc/rpc_types.cpp b/src/client/rpc/rpc_types.cpp index d97e2c6df..7560e7cbc 100644 --- a/src/client/rpc/rpc_types.cpp +++ b/src/client/rpc/rpc_types.cpp @@ -72,6 +72,8 @@ hermes::detail::register_user_request_types(uint32_t provider_id) { (void) registered_requests().add(provider_id); (void) registered_requests().add(provider_id); (void) registered_requests().add(provider_id); + (void) registered_requests().add( + provider_id); (void) registered_requests() .add(provider_id); (void) registered_requests().add( diff --git a/src/proxy/proxy.cpp b/src/proxy/proxy.cpp index cc1cb7344..04b249508 100644 --- a/src/proxy/proxy.cpp +++ b/src/proxy/proxy.cpp @@ -57,6 +57,8 @@ register_server_ipcs(margo_instance_id mid) { rpc_stat_out_t, proxy_rpc_srv_stat) MARGO_REGISTER(mid, gkfs::rpc::tag::client_proxy_remove, rpc_rm_node_in_t, rpc_err_out_t, proxy_rpc_srv_remove) + MARGO_REGISTER(mid, gkfs::rpc::tag::client_proxy_decr_size, rpc_trunc_in_t, + rpc_err_out_t, proxy_rpc_srv_decr_size) MARGO_REGISTER(mid, gkfs::rpc::tag::client_proxy_update_size, rpc_update_metadentry_size_in_t, rpc_update_metadentry_size_out_t, diff --git a/src/proxy/rpc/forward_metadata.cpp b/src/proxy/rpc/forward_metadata.cpp index abd34f4e9..ae88d38e6 100644 --- a/src/proxy/rpc/forward_metadata.cpp +++ b/src/proxy/rpc/forward_metadata.cpp @@ -246,6 +246,11 @@ forward_remove(const std::string& path) { return remove_data(path); } +int +forward_decr_size(const std::string& path, size_t length) { + return EIO; +} + pair forward_update_metadentry_size(const string& path, const size_t size, const off64_t offset, const bool append_flag) { diff --git a/src/proxy/rpc/srv_metadata.cpp b/src/proxy/rpc/srv_metadata.cpp index 6c6277a5b..c4d41b627 100644 --- a/src/proxy/rpc/srv_metadata.cpp +++ b/src/proxy/rpc/srv_metadata.cpp @@ -88,6 +88,29 @@ proxy_rpc_srv_remove(hg_handle_t handle) { DEFINE_MARGO_RPC_HANDLER(proxy_rpc_srv_remove) +static hg_return_t +proxy_rpc_srv_decr_size(hg_handle_t handle) { + rpc_trunc_in_t client_in{}; + rpc_err_out_t client_out{}; + + auto ret = margo_get_input(handle, &client_in); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Failed to retrieve input from handle", + __func__); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); + } + PROXY_DATA->log()->debug("{}() Got RPC with path '{}' length '{}'", + __func__, client_in.path, client_in.length); + client_out.err = + gkfs::rpc::forward_decr_size(client_in.path, client_in.length); + + PROXY_DATA->log()->debug("{}() Sending output err '{}'", __func__, + client_out.err); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); +} + +DEFINE_MARGO_RPC_HANDLER(proxy_rpc_srv_decr_size) + static hg_return_t proxy_rpc_srv_update_metadentry_size(hg_handle_t handle) { -- GitLab From c1ccf9aad4839a920e51ab4acbc49fd389431ed0 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Mon, 22 Apr 2024 16:27:16 +0200 Subject: [PATCH 05/24] Added Proxy decr_size support (truncate) for Proxy <-> Daemon --- include/proxy/proxy_data.hpp | 1 + src/daemon/daemon.cpp | 2 ++ src/proxy/proxy.cpp | 3 +++ src/proxy/rpc/forward_metadata.cpp | 43 +++++++++++++++++++++++++++++- 4 files changed, 48 insertions(+), 1 deletion(-) diff --git a/include/proxy/proxy_data.hpp b/include/proxy/proxy_data.hpp index 3b7fbc270..4e7636fcf 100644 --- a/include/proxy/proxy_data.hpp +++ b/include/proxy/proxy_data.hpp @@ -28,6 +28,7 @@ struct margo_client_ids { hg_id_t rpc_create_id; hg_id_t rpc_stat_id; hg_id_t rpc_remove_id; + hg_id_t rpc_decr_size_id; hg_id_t rpc_remove_data_id; hg_id_t rpc_update_metadentry_size_id; hg_id_t rpc_write_id; diff --git a/src/daemon/daemon.cpp b/src/daemon/daemon.cpp index 5a55ab0a5..b3985f4fe 100644 --- a/src/daemon/daemon.cpp +++ b/src/daemon/daemon.cpp @@ -249,6 +249,8 @@ register_proxy_server_rpcs(margo_instance_id mid) { rpc_stat_out_t, rpc_srv_stat); MARGO_REGISTER(mid, gkfs::rpc::tag::remove_metadata, rpc_rm_node_in_t, rpc_rm_metadata_out_t, rpc_srv_remove_metadata); + MARGO_REGISTER(mid, gkfs::rpc::tag::decr_size, rpc_trunc_in_t, + rpc_err_out_t, rpc_srv_decr_size); MARGO_REGISTER(mid, gkfs::rpc::tag::remove_data, rpc_rm_node_in_t, rpc_err_out_t, rpc_srv_remove_data); MARGO_REGISTER(mid, gkfs::rpc::tag::update_metadentry_size, diff --git a/src/proxy/proxy.cpp b/src/proxy/proxy.cpp index 04b249508..633c481cd 100644 --- a/src/proxy/proxy.cpp +++ b/src/proxy/proxy.cpp @@ -147,6 +147,9 @@ register_client_rpcs(margo_instance_id mid) { PROXY_DATA->rpc_client_ids().rpc_remove_id = MARGO_REGISTER(mid, gkfs::rpc::tag::remove_metadata, rpc_rm_node_in_t, rpc_rm_metadata_out_t, NULL); + PROXY_DATA->rpc_client_ids().rpc_decr_size_id = + MARGO_REGISTER(mid, gkfs::rpc::tag::decr_size, rpc_trunc_in_t, + rpc_err_out_t, NULL); PROXY_DATA->rpc_client_ids().rpc_remove_data_id = MARGO_REGISTER(mid, gkfs::rpc::tag::remove_data, rpc_rm_node_in_t, rpc_err_out_t, NULL); diff --git a/src/proxy/rpc/forward_metadata.cpp b/src/proxy/rpc/forward_metadata.cpp index ae88d38e6..d4653f4b6 100644 --- a/src/proxy/rpc/forward_metadata.cpp +++ b/src/proxy/rpc/forward_metadata.cpp @@ -248,7 +248,48 @@ forward_remove(const std::string& path) { int forward_decr_size(const std::string& path, size_t length) { - return EIO; + hg_handle_t rpc_handle = nullptr; + rpc_trunc_in_t daemon_in{}; + rpc_err_out_t daemon_out{}; + int err = 0; + // fill in + daemon_in.path = path.c_str(); + daemon_in.length = length; + // Create handle + PROXY_DATA->log()->debug("{}() Creating Margo handle ...", __func__); + auto endp = PROXY_DATA->rpc_endpoints().at( + PROXY_DATA->distributor()->locate_file_metadata(path, 0)); + auto ret = margo_create(PROXY_DATA->client_rpc_mid(), endp, + PROXY_DATA->rpc_client_ids().rpc_decr_size_id, + &rpc_handle); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Critical error", __func__); + return EBUSY; + } + ret = margo_forward(rpc_handle, &daemon_in); + if(ret == HG_SUCCESS) { + // Get response + PROXY_DATA->log()->trace("{}() Waiting for response", __func__); + ret = margo_get_output(rpc_handle, &daemon_out); + if(ret == HG_SUCCESS) { + PROXY_DATA->log()->debug("{}() Got response success: {}", __func__, + daemon_out.err); + err = daemon_out.err; + margo_free_output(rpc_handle, &daemon_out); + } else { + // something is wrong + err = EBUSY; + PROXY_DATA->log()->error("{}() while getting rpc output", __func__); + } + } else { + // something is wrong + err = EBUSY; + PROXY_DATA->log()->error("{}() sending rpc failed", __func__); + } + + /* clean up resources consumed by this rpc */ + margo_destroy(rpc_handle); + return err; } pair -- GitLab From a57cdc225c21bd49d37afff0ad3db4f3527e9a5d Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Mon, 22 Apr 2024 18:10:14 +0200 Subject: [PATCH 06/24] Added Proxy lseek() support for Client <-> Proxy --- include/client/rpc/forward_metadata_proxy.hpp | 3 + include/client/rpc/rpc_types.hpp | 121 +++++++++++++++++- include/common/common_defs.hpp | 1 + include/config.hpp | 1 + include/proxy/rpc/forward_metadata.hpp | 3 + include/proxy/rpc/rpc_defs.hpp | 2 + src/client/gkfs_functions.cpp | 13 +- src/client/rpc/forward_data.cpp | 6 +- src/client/rpc/forward_metadata.cpp | 6 +- src/client/rpc/forward_metadata_proxy.cpp | 29 +++++ src/client/rpc/rpc_types.cpp | 2 + src/daemon/handler/srv_metadata.cpp | 4 +- src/proxy/proxy.cpp | 3 + src/proxy/rpc/forward_metadata.cpp | 6 + src/proxy/rpc/srv_metadata.cpp | 32 +++++ 15 files changed, 219 insertions(+), 13 deletions(-) diff --git a/include/client/rpc/forward_metadata_proxy.hpp b/include/client/rpc/forward_metadata_proxy.hpp index b8a58c8a3..9d5e37ef0 100644 --- a/include/client/rpc/forward_metadata_proxy.hpp +++ b/include/client/rpc/forward_metadata_proxy.hpp @@ -33,6 +33,9 @@ forward_update_metadentry_size_proxy(const std::string& path, const size_t size, const off64_t offset, const bool append_flag); +std::pair +forward_get_metadentry_size_proxy(const std::string& path); + std::pair>> forward_get_dirents_single_proxy(const std::string& path, int server); diff --git a/include/client/rpc/rpc_types.hpp b/include/client/rpc/rpc_types.hpp index 496241fb3..738b99483 100644 --- a/include/client/rpc/rpc_types.hpp +++ b/include/client/rpc/rpc_types.hpp @@ -3299,6 +3299,123 @@ struct decr_size_proxy { }; }; +//============================================================================== +// definitions for get_metadentry_size_proxy +struct get_metadentry_size_proxy { + + // forward declarations of public input/output types for this RPC + class input; + + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = get_metadentry_size_proxy; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_path_only_in_t; + using mercury_output_type = rpc_get_metadentry_size_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 28; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = 0; + + // RPC name + constexpr static const auto name = gkfs::rpc::tag::client_proxy_get_size; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_path_only_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_get_metadentry_size_out_t); + + class input { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path) : m_path(path) {} + + input(input&& rhs) = default; + + input(const input& other) = default; + + input& + operator=(input&& rhs) = default; + + input& + operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + explicit input(const rpc_path_only_in_t& other) : m_path(other.path) {} + + explicit + operator rpc_path_only_in_t() { + return {m_path.c_str()}; + } + + private: + std::string m_path; + }; + + class output { + + template + friend hg_return_t + hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : m_err(), m_ret_size() {} + + output(int32_t err, int64_t ret_size) + : m_err(err), m_ret_size(ret_size) {} + + output(output&& rhs) = default; + + output(const output& other) = default; + + output& + operator=(output&& rhs) = default; + + output& + operator=(const output& other) = default; + + explicit output(const rpc_get_metadentry_size_out_t& out) { + m_err = out.err; + m_ret_size = out.ret_size; + } + + int32_t + err() const { + return m_err; + } + + int64_t + ret_size() const { + return m_ret_size; + } + + private: + int32_t m_err; + int64_t m_ret_size; + }; +}; + //============================================================================== // definitions for update_metadentry_size struct update_metadentry_size_proxy { @@ -3319,7 +3436,7 @@ struct update_metadentry_size_proxy { // RPC public identifier // (N.B: we reuse the same IDs assigned by Margo so that the daemon // understands Hermes RPCs) - constexpr static const uint64_t public_id = 28; + constexpr static const uint64_t public_id = 29; // RPC internal Mercury identifier constexpr static const hg_id_t mercury_id = 0; @@ -3458,7 +3575,7 @@ struct get_dirents_extended_proxy { // RPC public identifier // (N.B: we reuse the same IDs assigned by Margo so that the daemon // understands Hermes RPCs) - constexpr static const uint64_t public_id = 29; + constexpr static const uint64_t public_id = 30; // RPC internal Mercury identifier constexpr static const hg_id_t mercury_id = 0; diff --git a/include/common/common_defs.hpp b/include/common/common_defs.hpp index cd1aad3c3..f9768d809 100644 --- a/include/common/common_defs.hpp +++ b/include/common/common_defs.hpp @@ -66,6 +66,7 @@ constexpr auto client_proxy_create = "proxy_rpc_srv_create"; constexpr auto client_proxy_stat = "proxy_rpc_srv_stat"; constexpr auto client_proxy_remove = "proxy_rpc_srv_remove"; constexpr auto client_proxy_decr_size = "proxy_rpc_srv_decr_size"; +constexpr auto client_proxy_get_size = "proxy_rpc_srv_get_metadentry_size"; constexpr auto client_proxy_update_size = "proxy_rpc_srv_update_metadentry_size"; constexpr auto client_proxy_write = "proxy_rpc_srv_write_data"; diff --git a/include/config.hpp b/include/config.hpp index 0f3d2a0e1..49b71be38 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -122,6 +122,7 @@ constexpr auto pid_path = "/tmp/gkfs_proxy.pid"; constexpr auto fwd_create = true; constexpr auto fwd_stat = true; constexpr auto fwd_remove = true; +constexpr auto fwd_get_size = true; constexpr auto fwd_update_size = true; constexpr auto fwd_io = true; constexpr auto fwd_truncate = true; diff --git a/include/proxy/rpc/forward_metadata.hpp b/include/proxy/rpc/forward_metadata.hpp index 0ba2fbdc3..651f847ec 100644 --- a/include/proxy/rpc/forward_metadata.hpp +++ b/include/proxy/rpc/forward_metadata.hpp @@ -30,6 +30,9 @@ forward_remove(const std::string& path); int forward_decr_size(const std::string& path, size_t length); +std::pair +forward_get_metadentry_size(const std::string& path); + std::pair forward_update_metadentry_size(const std::string& path, const size_t size, const off64_t offset, const bool append_flag); diff --git a/include/proxy/rpc/rpc_defs.hpp b/include/proxy/rpc/rpc_defs.hpp index 20718e635..da3d652ce 100644 --- a/include/proxy/rpc/rpc_defs.hpp +++ b/include/proxy/rpc/rpc_defs.hpp @@ -29,6 +29,8 @@ DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_remove) DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_decr_size) +DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_get_metadentry_size) + DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_update_metadentry_size) DECLARE_MARGO_RPC_HANDLER(proxy_rpc_srv_read) diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index c5befd250..e452ef2f9 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -708,9 +708,16 @@ gkfs_lseek(shared_ptr gkfs_fd, off_t offset, gkfs_fd->pos(gkfs_fd->pos() + offset); break; case SEEK_END: { - // TODO: handle replicas - auto ret = - gkfs::rpc::forward_get_metadentry_size(gkfs_fd->path(), 0); + std::pair ret{}; + if(gkfs::config::proxy::fwd_get_size && CTX->use_proxy()) { + ret = gkfs::rpc::forward_get_metadentry_size_proxy( + gkfs_fd->path()); + } else { + // TODO: handle replicas + ret = gkfs::rpc::forward_get_metadentry_size(gkfs_fd->path(), + 0); + } + auto err = ret.first; if(err) { errno = err; diff --git a/src/client/rpc/forward_data.cpp b/src/client/rpc/forward_data.cpp index 27c78d2b6..2e558683b 100644 --- a/src/client/rpc/forward_data.cpp +++ b/src/client/rpc/forward_data.cpp @@ -516,9 +516,9 @@ int forward_truncate(const std::string& path, size_t current_size, size_t new_size, const int8_t num_copies) { - if(CTX->use_proxy()) { - LOG(WARNING, "{} is run due to missing proxy implementation!", - __func__); + if(gkfs::config::proxy::fwd_truncate && CTX->use_proxy()) { + LOG(WARNING, "{} was called even though proxy should be used!", + __func__, gkfs::config::proxy::fwd_truncate); } // import pow2-optimized arithmetic functions diff --git a/src/client/rpc/forward_metadata.cpp b/src/client/rpc/forward_metadata.cpp index e1054740e..424f91d52 100644 --- a/src/client/rpc/forward_metadata.cpp +++ b/src/client/rpc/forward_metadata.cpp @@ -293,9 +293,9 @@ forward_remove(const std::string& path, const int8_t num_copies) { */ int forward_decr_size(const std::string& path, size_t length, const int copy) { - if(CTX->use_proxy()) { - LOG(WARNING, "{} is run due to missing proxy implementation!", - __func__); + if(gkfs::config::proxy::fwd_truncate && CTX->use_proxy()) { + LOG(WARNING, "{} was called even though proxy should be used!", + __func__, gkfs::config::proxy::fwd_truncate); } auto endp = CTX->hosts().at( CTX->distributor()->locate_file_metadata(path, copy)); diff --git a/src/client/rpc/forward_metadata_proxy.cpp b/src/client/rpc/forward_metadata_proxy.cpp index a3a51f540..6198d6579 100644 --- a/src/client/rpc/forward_metadata_proxy.cpp +++ b/src/client/rpc/forward_metadata_proxy.cpp @@ -155,6 +155,35 @@ forward_update_metadentry_size_proxy(const string& path, const size_t size, } } +pair +forward_get_metadentry_size_proxy(const std::string& path) { + auto endp = CTX->proxy_host(); + + try { + LOG(DEBUG, "Sending RPC ..."); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we + // can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = + ld_proxy_service + ->post(endp, path) + .get() + .at(0); + + LOG(DEBUG, "Got response success: {}", out.err()); + + if(out.err()) + return make_pair(out.err(), 0); + else + return make_pair(0, out.ret_size()); + } catch(const std::exception& ex) { + LOG(ERROR, "while getting rpc output"); + return make_pair(EBUSY, 0); + } +} + pair>> forward_get_dirents_single_proxy(const string& path, int server) { diff --git a/src/client/rpc/rpc_types.cpp b/src/client/rpc/rpc_types.cpp index 7560e7cbc..8dc03c911 100644 --- a/src/client/rpc/rpc_types.cpp +++ b/src/client/rpc/rpc_types.cpp @@ -74,6 +74,8 @@ hermes::detail::register_user_request_types(uint32_t provider_id) { (void) registered_requests().add(provider_id); (void) registered_requests().add( provider_id); + (void) registered_requests().add( + provider_id); (void) registered_requests() .add(provider_id); (void) registered_requests().add( diff --git a/src/daemon/handler/srv_metadata.cpp b/src/daemon/handler/srv_metadata.cpp index f766a4e65..7dbdf2fa5 100644 --- a/src/daemon/handler/srv_metadata.cpp +++ b/src/daemon/handler/srv_metadata.cpp @@ -489,8 +489,8 @@ rpc_srv_get_metadentry_size(hg_handle_t handle) { out.err = EBUSY; } - GKFS_DATA->spdlogger()->debug("{}() Sending output '{}'", __func__, - out.err); + GKFS_DATA->spdlogger()->debug("{}() Sending output err '{}' ret_size '{}'", + __func__, out.err, out.ret_size); auto hret = margo_respond(handle, &out); if(hret != HG_SUCCESS) { GKFS_DATA->spdlogger()->error("{}() Failed to respond", __func__); diff --git a/src/proxy/proxy.cpp b/src/proxy/proxy.cpp index 633c481cd..67b676abc 100644 --- a/src/proxy/proxy.cpp +++ b/src/proxy/proxy.cpp @@ -59,6 +59,9 @@ register_server_ipcs(margo_instance_id mid) { rpc_err_out_t, proxy_rpc_srv_remove) MARGO_REGISTER(mid, gkfs::rpc::tag::client_proxy_decr_size, rpc_trunc_in_t, rpc_err_out_t, proxy_rpc_srv_decr_size) + MARGO_REGISTER(mid, gkfs::rpc::tag::client_proxy_get_size, + rpc_path_only_in_t, rpc_get_metadentry_size_out_t, + proxy_rpc_srv_get_metadentry_size) MARGO_REGISTER(mid, gkfs::rpc::tag::client_proxy_update_size, rpc_update_metadentry_size_in_t, rpc_update_metadentry_size_out_t, diff --git a/src/proxy/rpc/forward_metadata.cpp b/src/proxy/rpc/forward_metadata.cpp index d4653f4b6..b99671e60 100644 --- a/src/proxy/rpc/forward_metadata.cpp +++ b/src/proxy/rpc/forward_metadata.cpp @@ -292,6 +292,12 @@ forward_decr_size(const std::string& path, size_t length) { return err; } +pair +forward_get_metadentry_size(const string& path) { + // return default + return make_pair(0, 0); +} + pair forward_update_metadentry_size(const string& path, const size_t size, const off64_t offset, const bool append_flag) { diff --git a/src/proxy/rpc/srv_metadata.cpp b/src/proxy/rpc/srv_metadata.cpp index c4d41b627..5f577f0b7 100644 --- a/src/proxy/rpc/srv_metadata.cpp +++ b/src/proxy/rpc/srv_metadata.cpp @@ -111,6 +111,38 @@ proxy_rpc_srv_decr_size(hg_handle_t handle) { DEFINE_MARGO_RPC_HANDLER(proxy_rpc_srv_decr_size) +static hg_return_t +proxy_rpc_srv_get_metadentry_size(hg_handle_t handle) { + + rpc_path_only_in_t client_in{}; + rpc_get_metadentry_size_out_t client_out{}; + + auto ret = margo_get_input(handle, &client_in); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Failed to retrieve input from handle", + __func__); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); + } + PROXY_DATA->log()->debug("{}() path: '{}'", __func__, client_in.path); + + try { + auto [err, ret_size] = + gkfs::rpc::forward_get_metadentry_size(client_in.path); + client_out.err = 0; + client_out.ret_size = ret_size; + } catch(const std::exception& e) { + PROXY_DATA->log()->error("{}() Failed to get metadentry size RPC: '{}'", + __func__, e.what()); + client_out.err = EBUSY; + } + + PROXY_DATA->log()->debug("{}() Sending output err '{}' ret_size '{}'", + __func__, client_out.err, client_out.ret_size); + return gkfs::rpc::cleanup_respond(&handle, &client_in, &client_out); +} + +DEFINE_MARGO_RPC_HANDLER(proxy_rpc_srv_get_metadentry_size) + static hg_return_t proxy_rpc_srv_update_metadentry_size(hg_handle_t handle) { -- GitLab From 0c9402441b3f537cc6e6f461ad03e488b3847a5b Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Mon, 22 Apr 2024 18:24:37 +0200 Subject: [PATCH 07/24] Added Proxy lseek() support for Proxy <-> Daemon --- include/proxy/proxy_data.hpp | 1 + src/daemon/daemon.cpp | 2 ++ src/proxy/proxy.cpp | 3 ++ src/proxy/rpc/forward_metadata.cpp | 48 ++++++++++++++++++++++++++++-- 4 files changed, 52 insertions(+), 2 deletions(-) diff --git a/include/proxy/proxy_data.hpp b/include/proxy/proxy_data.hpp index 4e7636fcf..42d829f08 100644 --- a/include/proxy/proxy_data.hpp +++ b/include/proxy/proxy_data.hpp @@ -30,6 +30,7 @@ struct margo_client_ids { hg_id_t rpc_remove_id; hg_id_t rpc_decr_size_id; hg_id_t rpc_remove_data_id; + hg_id_t rpc_get_metadentry_size_id; hg_id_t rpc_update_metadentry_size_id; hg_id_t rpc_write_id; hg_id_t rpc_read_id; diff --git a/src/daemon/daemon.cpp b/src/daemon/daemon.cpp index b3985f4fe..6cc36ab88 100644 --- a/src/daemon/daemon.cpp +++ b/src/daemon/daemon.cpp @@ -253,6 +253,8 @@ register_proxy_server_rpcs(margo_instance_id mid) { rpc_err_out_t, rpc_srv_decr_size); MARGO_REGISTER(mid, gkfs::rpc::tag::remove_data, rpc_rm_node_in_t, rpc_err_out_t, rpc_srv_remove_data); + MARGO_REGISTER(mid, gkfs::rpc::tag::get_metadentry_size, rpc_path_only_in_t, + rpc_get_metadentry_size_out_t, rpc_srv_get_metadentry_size); MARGO_REGISTER(mid, gkfs::rpc::tag::update_metadentry_size, rpc_update_metadentry_size_in_t, rpc_update_metadentry_size_out_t, diff --git a/src/proxy/proxy.cpp b/src/proxy/proxy.cpp index 67b676abc..77e17ea12 100644 --- a/src/proxy/proxy.cpp +++ b/src/proxy/proxy.cpp @@ -156,6 +156,9 @@ register_client_rpcs(margo_instance_id mid) { PROXY_DATA->rpc_client_ids().rpc_remove_data_id = MARGO_REGISTER(mid, gkfs::rpc::tag::remove_data, rpc_rm_node_in_t, rpc_err_out_t, NULL); + PROXY_DATA->rpc_client_ids().rpc_get_metadentry_size_id = MARGO_REGISTER( + mid, gkfs::rpc::tag::get_metadentry_size, rpc_path_only_in_t, + rpc_get_metadentry_size_out_t, NULL); PROXY_DATA->rpc_client_ids().rpc_update_metadentry_size_id = MARGO_REGISTER(mid, gkfs::rpc::tag::update_metadentry_size, rpc_update_metadentry_size_in_t, diff --git a/src/proxy/rpc/forward_metadata.cpp b/src/proxy/rpc/forward_metadata.cpp index b99671e60..2a42d47a6 100644 --- a/src/proxy/rpc/forward_metadata.cpp +++ b/src/proxy/rpc/forward_metadata.cpp @@ -294,8 +294,52 @@ forward_decr_size(const std::string& path, size_t length) { pair forward_get_metadentry_size(const string& path) { - // return default - return make_pair(0, 0); + hg_handle_t rpc_handle = nullptr; + rpc_path_only_in_t daemon_in{}; + rpc_get_metadentry_size_out_t daemon_out{}; + int err = 0; + off64_t ret_offset = 0; + // fill in + daemon_in.path = path.c_str(); + // Create handle + PROXY_DATA->log()->debug("{}() Creating Margo handle ...", __func__); + auto endp = PROXY_DATA->rpc_endpoints().at( + PROXY_DATA->distributor()->locate_file_metadata(path, 0)); + auto ret = margo_create( + PROXY_DATA->client_rpc_mid(), endp, + PROXY_DATA->rpc_client_ids().rpc_get_metadentry_size_id, + &rpc_handle); + if(ret != HG_SUCCESS) { + PROXY_DATA->log()->error("{}() Critical error", __func__); + return make_pair(EBUSY, 0); + ; + } + ret = margo_forward(rpc_handle, &daemon_in); + if(ret == HG_SUCCESS) { + // Get response + PROXY_DATA->log()->trace("{}() Waiting for response", __func__); + ret = margo_get_output(rpc_handle, &daemon_out); + if(ret == HG_SUCCESS) { + PROXY_DATA->log()->debug( + "{}() Got response success err '{}' ret_size '{}'", + __func__, daemon_out.err, daemon_out.ret_size); + err = daemon_out.err; + ret_offset = daemon_out.ret_size; + margo_free_output(rpc_handle, &daemon_out); + } else { + // something is wrong + err = EBUSY; + PROXY_DATA->log()->error("{}() while getting rpc output", __func__); + } + } else { + // something is wrong + err = EBUSY; + PROXY_DATA->log()->error("{}() sending rpc failed", __func__); + } + + /* clean up resources consumed by this rpc */ + margo_destroy(rpc_handle); + return make_pair(err, ret_offset); } pair -- GitLab From e7c3b0c2e41136145bf3a124bb14df2c936eb465 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Tue, 23 Apr 2024 17:29:26 +0200 Subject: [PATCH 08/24] Added full proxy support to gkfs runner script --- scripts/run/gkfs | 86 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 29 deletions(-) diff --git a/scripts/run/gkfs b/scripts/run/gkfs index 8e93a3b86..993940f9c 100755 --- a/scripts/run/gkfs +++ b/scripts/run/gkfs @@ -49,15 +49,17 @@ wait_for_gkfs_daemons() { # If valid, an additional line is added. Otherwise, the pid in the file is deleted. # Globals: # SRUN_DAEMON_PID_FILE +# SRUN_PROXY_PID_FILE # VERBOSE # Arguments: +# path to pid file # pid to write to pid file # Outputs: # Writes status to stdout if VERBOSE is true ####################################### create_pid_file() { - local pid_file=${SRUN_DAEMON_PID_FILE} - local pid=${1} + local pid_file=${1} + local pid=${2} if [[ ${VERBOSE} == true ]]; then echo -e "${C_AST_GREEN}Creating pid file at ${pid_file} with pid ${pid} ..." fi @@ -174,35 +176,35 @@ start_daemon() { echo -e "${C_AST_GREEN}GekkoFS daemons running" echo -e "${C_AST_GREEN}Startup time: ${elapsed} seconds" - #if [[ ${USE_PROXY} == true ]]; then - # echo -e "${C_AST_GREEN}Starting GekkoFS proxies (${NODE_NUM} nodes) ..." - # start_time="$(date -u +%s.%3N)" - # ${proxy_execute} & - # local proxy_pid=$! - # sleep 5 # TODO - # stop_time="$(date -u +%s.%3N)" - # elapsed="$(bc <<<"$stop_time-$start_time")" - # echo -e "${C_AST_GREEN}GekkoFS daemons probably :) running" - # echo -e "${C_AST_GREEN}Startup time: ${elapsed} seconds" - #fi + if [[ ${USE_PROXY} == true ]]; then + echo -e "${C_AST_GREEN}Starting GekkoFS proxies (${NODE_NUM} nodes) ..." + start_time="$(date -u +%s.%3N)" + ${proxy_execute} & + local proxy_pid=$! + sleep 5 # TODO + stop_time="$(date -u +%s.%3N)" + elapsed="$(bc <<<"$stop_time-$start_time")" + echo -e "${C_AST_GREEN}GekkoFS proxies probably :) running" + echo -e "${C_AST_GREEN}Startup time: ${elapsed} seconds" + fi if [[ ${RUN_FOREGROUND} == true ]]; then echo "Press 'q' to exit" while : ; do read -n 1 k <&1 if [[ $k = q ]] ; then - #if [[ ${USE_PROXY} == true ]]; then - # start_time="$(date -u +%s.%3N)" - # echo - # echo -e "${C_AST_GREEN}Shutting down GekkoFS proxies ..." - # if [[ -n ${proxy_pid} ]]; then - # kill -s SIGINT ${proxy_pid} & - # wait ${proxy_pid} - # fi - # stop_time="$(date -u +%s.%3N)" - # elapsed="$(bc <<<"$stop_time-$start_time")" - # echo -e "${C_AST_GREEN}Shutdown time: ${elapsed} seconds" - #fi + if [[ ${USE_PROXY} == true ]]; then + start_time="$(date -u +%s.%3N)" + echo + echo -e "${C_AST_GREEN}Shutting down GekkoFS proxies ..." + if [[ -n ${proxy_pid} ]]; then + kill -s SIGINT ${proxy_pid} & + wait ${proxy_pid} + fi + stop_time="$(date -u +%s.%3N)" + elapsed="$(bc <<<"$stop_time-$start_time")" + echo -e "${C_AST_GREEN}Shutdown time: ${elapsed} seconds" + fi start_time="$(date -u +%s.%3N)" echo echo -e "${C_AST_GREEN}Shutting down GekkoFS daemons ..." @@ -219,20 +221,48 @@ start_daemon() { fi done else - create_pid_file ${daemon_pid} -# create_pid_file ${proxy_pid} + create_pid_file ${SRUN_DAEMON_PID_FILE} ${daemon_pid} + create_pid_file ${SRUN_PROXY_PID_FILE} ${proxy_pid} fi } ####################################### # Stops GekkoFS daemons for the configured pid file # Globals: # SRUN_DAEMON_PID_FILE +# SRUN_PROXY_PID_FILE # VERBOSE # Outputs: # Writes status to stdout ####################################### stop_daemons() { local pid_file=${SRUN_DAEMON_PID_FILE} + local proxy_pid_file=${SRUN_PROXY_PID_FILE} + # if no daemon or proxy pid file exists, exit + if [[ ! -e ${pid_file} ]] && [[ ! -e ${proxy_pid_file} ]]; then + echo -e "${C_AST_RED}No pid files found -> no daemon or proxy running. Exiting ..." + exit 1 + fi + # attempt to shutdown proxy + if [[ -e ${proxy_pid_file} ]]; then + while IFS= read -r line + do + if ps -p "${line}" > /dev/null; then + echo -e "${C_AST_GREEN}Stopping proxy with pid ${line}" + start_time="$(date -u +%s.%3N)" + kill -s SIGINT "${line}" & + # poll pid until it stopped + if [[ ${VERBOSE} == true ]]; then + echo -e "${C_AST_GREEN}Waiting for proxies to exit ..." + fi + timeout 1 tail --pid=${line} -f /dev/null + fi + done < "${proxy_pid_file}" + rm "${proxy_pid_file}" + stop_time="$(date -u +%s.%3N)" + elapsed="$(bc <<<"$stop_time-$start_time")" + echo -e "${C_AST_GREEN}Shutdown time: ${elapsed} seconds" + fi + # attempt to shutdown daemon if [[ -e ${pid_file} ]]; then while IFS= read -r line do @@ -251,8 +281,6 @@ stop_daemons() { stop_time="$(date -u +%s.%3N)" elapsed="$(bc <<<"$stop_time-$start_time")" echo -e "${C_AST_GREEN}Shutdown time: ${elapsed} seconds" - else - echo -e "${C_AST_RED}No pid file found -> no daemon running. Exiting ..." fi } ####################################### -- GitLab From 06a235cab7818a513ddf060a9473b4cf8ca4d994 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Tue, 23 Apr 2024 17:41:01 +0200 Subject: [PATCH 09/24] gkfs runner script: Add USE_PROXY to config file --- scripts/run/gkfs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/run/gkfs b/scripts/run/gkfs index 993940f9c..503e6db45 100755 --- a/scripts/run/gkfs +++ b/scripts/run/gkfs @@ -222,7 +222,9 @@ start_daemon() { done else create_pid_file ${SRUN_DAEMON_PID_FILE} ${daemon_pid} - create_pid_file ${SRUN_PROXY_PID_FILE} ${proxy_pid} + if [[ ${USE_PROXY} == true ]]; then + create_pid_file ${SRUN_PROXY_PID_FILE} ${proxy_pid} + fi fi } ####################################### @@ -243,7 +245,7 @@ stop_daemons() { exit 1 fi # attempt to shutdown proxy - if [[ -e ${proxy_pid_file} ]]; then + if [[ ${USE_PROXY} == true ]] && [[ -e ${proxy_pid_file} ]]; then while IFS= read -r line do if ps -p "${line}" > /dev/null; then @@ -366,7 +368,7 @@ USE_SRUN=${USE_SRUN} RUN_FOREGROUND=false DAEMON_NUMACTL_=${DAEMON_NUMACTL} PROXY_NUMACTL_=${PROXY_NUMACTL} -USE_PROXY=false +USE_PROXY=${USE_PROXY} # parse input POSITIONAL=() while [[ $# -gt 0 ]]; do -- GitLab From 38f1134241b2a9f5b0cc1f5d63421253d4028024 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Wed, 24 Apr 2024 12:55:20 +0200 Subject: [PATCH 10/24] RocksDB backend: Handle NotFoundException on remove --- src/daemon/handler/srv_metadata.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/daemon/handler/srv_metadata.cpp b/src/daemon/handler/srv_metadata.cpp index 7dbdf2fa5..7351b2fb9 100644 --- a/src/daemon/handler/srv_metadata.cpp +++ b/src/daemon/handler/srv_metadata.cpp @@ -253,6 +253,11 @@ rpc_srv_remove_metadata(hg_handle_t handle) { GKFS_DATA->storage()->destroy_chunk_space(in.path); } + } catch(const gkfs::metadata::NotFoundException& e) { + GKFS_DATA->spdlogger()->warn( + "{}(): path '{}' message '{}'. Continuing, setting out.err 0.", + __func__, in.path, e.what()); + out.err = 0; } catch(const gkfs::metadata::DBException& e) { GKFS_DATA->spdlogger()->error("{}(): path '{}' message '{}'", __func__, in.path, e.what()); -- GitLab From d6b34b4312484f3685ae34f71a26d8cd1d1d9223 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Wed, 24 Apr 2024 13:20:31 +0200 Subject: [PATCH 11/24] Adding proxy operation to remove directory --- src/client/gkfs_functions.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index e452ef2f9..0495801bf 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -1347,7 +1347,11 @@ gkfs_rmdir(const std::string& path) { errno = ENOTEMPTY; return -1; } - err = gkfs::rpc::forward_remove(path, CTX->get_replicas()); + if(gkfs::config::proxy::fwd_remove && CTX->use_proxy()) { + err = gkfs::rpc::forward_remove_proxy(path); + } else { + err = gkfs::rpc::forward_remove(path, CTX->get_replicas()); + } if(err) { errno = err; return -1; -- GitLab From fe50f47c946ae63f6e4479a11ca53025dc722f72 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Fri, 14 Jun 2024 10:44:01 +0200 Subject: [PATCH 12/24] Adding gfind to examples when MPI was found --- examples/gfind/CMakeLists.txt | 21 +++++++++++++++++++-- examples/gfind/gfind.cpp | 2 -- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/examples/gfind/CMakeLists.txt b/examples/gfind/CMakeLists.txt index fef70119d..b36f8b0e3 100644 --- a/examples/gfind/CMakeLists.txt +++ b/examples/gfind/CMakeLists.txt @@ -27,10 +27,27 @@ ################################################################################ set (CMAKE_CXX_STANDARD 17) -add_executable(sfind sfind.cpp) +add_executable(sfind sfind.cpp) +set_property(TARGET sfind PROPERTY POSITION_INDEPENDENT_CODE ON) if(GKFS_INSTALL_TESTS) - install(TARGETS sfind + install(TARGETS sfind DESTINATION ${CMAKE_INSTALL_BINDIR} ) endif() + +find_package(MPI) +if (MPI_FOUND) + message(STATUS "[gekkofs] MPI was found. Building gfind example") + add_executable(gfind gfind.cpp) + set_property(TARGET gfind PROPERTY POSITION_INDEPENDENT_CODE ON) + target_link_libraries(gfind + PUBLIC + MPI::MPI_CXX + ) + if(GKFS_INSTALL_TESTS) + install(TARGETS gfind + DESTINATION ${CMAKE_INSTALL_BINDIR} + ) + endif() +endif() diff --git a/examples/gfind/gfind.cpp b/examples/gfind/gfind.cpp index 0bcf6c090..b1c291ef6 100644 --- a/examples/gfind/gfind.cpp +++ b/examples/gfind/gfind.cpp @@ -378,7 +378,6 @@ int process(char *processor_name, int world_rank, int world_size, // INIT PFIND memset(&runtime, 0, sizeof(pfind_runtime_options_t)); - int ret; /* Get timestamp file */ if (opt->timestamp_file) { if (pfind_rank == 0) { @@ -393,7 +392,6 @@ int process(char *processor_name, int world_rank, int world_size, MPI_Bcast(&runtime.ctime_min, 1, MPI_INT, 0, pfind_com); } - auto iterations = 0; if (world_rank == 0) { queue dirs; string workdir = opt->workdir; -- GitLab From 13f3676afe6829bfa841a5016916171b2e295136 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Fri, 14 Jun 2024 11:05:28 +0200 Subject: [PATCH 13/24] gfind/sfind fix find argument order enforcement --- examples/gfind/gfind.cpp | 85 ++++++++++++++++++++++------------------ examples/gfind/sfind.cpp | 20 +++++++--- 2 files changed, 61 insertions(+), 44 deletions(-) diff --git a/examples/gfind/gfind.cpp b/examples/gfind/gfind.cpp index b1c291ef6..33929153d 100644 --- a/examples/gfind/gfind.cpp +++ b/examples/gfind/gfind.cpp @@ -200,6 +200,14 @@ pfind_options_t *pfind_parse_args(int argc, char **argv, int force_print_help, } argv[i][0] = 0; argv[++i][0] = 0; + } else if(strcmp(argv[i], "-M") == 0) { + res->mountdir = strdup(argv[i + 1]); + argv[i][0] = 0; + argv[++i][0] = 0; + } else if(strcmp(argv[i], "-S") == 0) { + res->num_servers = atoi(argv[i + 1]); + argv[i][0] = 0; + argv[++i][0] = 0; } else if (!firstarg) { firstarg = strdup(argv[i]); argv[i][0] = 0; @@ -254,12 +262,12 @@ pfind_options_t *pfind_parse_args(int argc, char **argv, int force_print_help, case 's': res->stonewall_timer = atol(optarg); break; - case 'S': - res->num_servers = atoi(optarg); - break; - case 'M': - res->mountdir = strdup(optarg); - break; + // case 'S': + // res->num_servers = atoi(optarg); + // break; + // case 'M': + // res->mountdir = strdup(optarg); + // break; case 'v': res->verbosity++; break; @@ -303,9 +311,9 @@ string recv_newPath() { MPI_Bcast(&count, 1, MPI_INT, 0, MPI_COMM_WORLD); if (count == 0) return "Terminate"; - char buf[count]; - MPI_Bcast(buf, count, MPI_CHAR, 0, MPI_COMM_WORLD); - return buf; + std::vector buf(count); + MPI_Bcast(buf.data(), count, MPI_CHAR, 0, MPI_COMM_WORLD); + return std::string(buf.begin(), buf.end()); } /* Client Processing a path. @@ -325,13 +333,13 @@ void dirProcess(const string path, unsigned long long &checked, // cout << "PROCESSING " << world_rank << "/"<< world_size << " = " << path << // endl; - + int servers_per_node = ceil(opt->num_servers / (world_size - 1)); if (servers_per_node == 0) servers_per_node++; for (int it = 0; it < servers_per_node; it++) { auto server = (world_rank - 1) * servers_per_node + it; - if (server >= opt->num_servers) + if (server >= (unsigned int) opt->num_servers) break; unsigned long long total_size = 0; @@ -340,7 +348,7 @@ void dirProcess(const string path, unsigned long long &checked, (sizeof(struct dirent_extended) + 255) * 1024 * 100, server); struct dirent_extended *temp = getdir; - while (total_size < n) { + while (total_size < (unsigned long long) n) { if (strlen(temp->d_name) == 0) break; total_size += temp->d_reclen; @@ -401,36 +409,37 @@ int process(char *processor_name, int world_rank, int world_size, dirs.push(workdir); do { + std::string processpath = dirs.front(); + dirs.pop(); + send_newPath(processpath); + + auto received_strings = true; + + for (auto i = 1; i < world_size; i++) { + received_strings = true; + while (received_strings) { + received_strings = false; - string processpath = dirs.front(); - dirs.pop(); - // DISTRIBUTE WORK - send_newPath(processpath); - auto received_strings = true; - // We need to gather new directories found (we use send-recv) - for (auto i = 1; i < world_size; i++) { - received_strings = true; - while (received_strings) { - received_strings = false; - // cout << " Checking from " << i << endl; - MPI_Status mpistatus; - MPI_Probe(i, 0, MPI_COMM_WORLD, &mpistatus); - int count; - MPI_Get_count(&mpistatus, MPI_CHAR, &count); - char buf[count]; - MPI_Recv(&buf, count, MPI_CHAR, i, 0, MPI_COMM_WORLD, &mpistatus); - if (count == 0) { - continue; - } - // cout << " Receiving from " << i << " ---- " << buf << endl; - string s = buf; - dirs.push(s); - received_strings = true; + MPI_Status mpistatus; + MPI_Probe(i, 0, MPI_COMM_WORLD, &mpistatus); + + int count; + MPI_Get_count(&mpistatus, MPI_CHAR, &count); + + std::vector buf(count); + MPI_Recv(buf.data(), count, MPI_CHAR, i, 0, MPI_COMM_WORLD, &mpistatus); + + if (count == 0) { + continue; + } + std::string s(buf.begin(), buf.end()); + dirs.push(s); + received_strings = true; + } } - } - // cout << "NO more paths " << dirs.size() << endl; } while (!dirs.empty()); + auto count = 0; MPI_Bcast(&count, 1, MPI_INT, 0, MPI_COMM_WORLD); diff --git a/examples/gfind/sfind.cpp b/examples/gfind/sfind.cpp index 4e31301b2..27570da25 100644 --- a/examples/gfind/sfind.cpp +++ b/examples/gfind/sfind.cpp @@ -209,6 +209,14 @@ pfind_options_t *pfind_parse_args(int argc, char **argv, int force_print_help){ } argv[i][0] = 0; argv[++i][0] = 0; + } else if(strcmp(argv[i], "-M") == 0) { + res->mountdir = strdup(argv[i + 1]); + argv[i][0] = 0; + argv[++i][0] = 0; + } else if(strcmp(argv[i], "-S") == 0) { + res->num_servers = atoi(argv[i + 1]); + argv[i][0] = 0; + argv[++i][0] = 0; } else if (!firstarg) { firstarg = strdup(argv[i]); argv[i][0] = 0; @@ -263,12 +271,12 @@ pfind_options_t *pfind_parse_args(int argc, char **argv, int force_print_help){ case 's': res->stonewall_timer = atol(optarg); break; - case 'S': - res->num_servers = atoi(optarg); - break; - case 'M': - res->mountdir = strdup(optarg); - break; + // case 'S': + // res->num_servers = atoi(optarg); + // break; + // case 'M': + // res->mountdir = strdup(optarg); + // break; case 'v': res->verbosity++; break; -- GitLab From 94e84f40e65e0da1ec683aee9270084612c7b944 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Fri, 14 Jun 2024 11:06:17 +0200 Subject: [PATCH 14/24] Reformat examples --- examples/gfind/gfind.cpp | 814 ++++++++++++++++++++------------------- examples/gfind/sfind.cpp | 628 +++++++++++++++--------------- 2 files changed, 725 insertions(+), 717 deletions(-) diff --git a/examples/gfind/gfind.cpp b/examples/gfind/gfind.cpp index 33929153d..c9341d0ce 100644 --- a/examples/gfind/gfind.cpp +++ b/examples/gfind/gfind.cpp @@ -49,271 +49,270 @@ using namespace std; /* Minimal struct needed for io500 find */ /* We could also do the filtering on the server */ struct dirent_extended { - size_t size; - time_t ctime; - unsigned short d_reclen; - unsigned char d_type; - char d_name[1]; + size_t size; + time_t ctime; + unsigned short d_reclen; + unsigned char d_type; + char d_name[1]; }; /* Function exported from GekkoFS LD_PRELOAD, code needs to be compiled with * -fPIC, if not will segfault */ -extern "C" int gkfs_getsingleserverdir(const char *path, - struct dirent_extended *dirp, - unsigned int count, int server) - __attribute__((weak)); +extern "C" int +gkfs_getsingleserverdir(const char* path, struct dirent_extended* dirp, + unsigned int count, int server) __attribute__((weak)); /* PFIND OPTIONS EXTENDED We need to add the GekkoFS mount dir and the number of * servers */ typedef struct { - char * workdir; - int just_count; - int print_by_process; - char *results_dir; - int stonewall_timer; - int print_rates; - - char *timestamp_file; - char *name_pattern; - regex_t name_regex; - uint64_t size; - - int num_servers; - char *mountdir; - // optimizing parameters NOT USED - int queue_length; - int max_entries_per_iter; - int steal_from_next; // if true, then steal from the next process - int parallel_single_dir_access; // if 1, use hashing to parallelize single - // directory access, if 2 sequential increment - - int verbosity; + char* workdir; + int just_count; + int print_by_process; + char* results_dir; + int stonewall_timer; + int print_rates; + + char* timestamp_file; + char* name_pattern; + regex_t name_regex; + uint64_t size; + + int num_servers; + char* mountdir; + // optimizing parameters NOT USED + int queue_length; + int max_entries_per_iter; + int steal_from_next; // if true, then steal from the next process + int parallel_single_dir_access; // if 1, use hashing to parallelize single + // directory access, if 2 sequential + // increment + + int verbosity; } pfind_options_t; typedef struct { - uint64_t ctime_min; - double stonewall_endtime; - FILE *logfile; - int needs_stat; + uint64_t ctime_min; + double stonewall_endtime; + FILE* logfile; + int needs_stat; } pfind_runtime_options_t; static pfind_runtime_options_t runtime; int pfind_rank; -static pfind_options_t *opt; +static pfind_options_t* opt; -void pfind_abort(const string str) { - printf("%s", str.c_str()); - exit(1); +void +pfind_abort(const string str) { + printf("%s", str.c_str()); + exit(1); } -static void pfind_print_help(pfind_options_t *res) { - printf("pfind \nSynopsis:\n" - "pfind [-newer ] [-size c] [-name " - "] [-regex ] [-S ] [-M ]\n" - "\tworkdir = \"%s\"\n" - "\t-newer = \"%s\"\n" - "\t-name|-regex = \"%s\"\n" - "\t-S: num servers = \"%d\"\n" - "\t-M: mountdir = \"%s\"\n" - "Optional flags\n" - "\t-h: prints the help\n" - "\t--help: prints the help without initializing MPI\n", - res->workdir, res->timestamp_file, res->name_pattern, res->num_servers, - res->mountdir); +static void +pfind_print_help(pfind_options_t* res) { + printf("pfind \nSynopsis:\n" + "pfind [-newer ] [-size c] [-name " + "] [-regex ] [-S ] [-M ]\n" + "\tworkdir = \"%s\"\n" + "\t-newer = \"%s\"\n" + "\t-name|-regex = \"%s\"\n" + "\t-S: num servers = \"%d\"\n" + "\t-M: mountdir = \"%s\"\n" + "Optional flags\n" + "\t-h: prints the help\n" + "\t--help: prints the help without initializing MPI\n", + res->workdir, res->timestamp_file, res->name_pattern, + res->num_servers, res->mountdir); } MPI_Comm pfind_com; int pfind_size; -pfind_options_t *pfind_parse_args(int argc, char **argv, int force_print_help, - MPI_Comm com) { - MPI_Comm_rank(com, &pfind_rank); - MPI_Comm_size(com, &pfind_size); - pfind_com = com; - - pfind_options_t *res = (pfind_options_t *)malloc(sizeof(pfind_options_t)); - memset(res, 0, sizeof(pfind_options_t)); - int print_help = force_print_help; - - res->workdir = nullptr; - res->results_dir = nullptr; - res->verbosity = 0; - res->timestamp_file = nullptr; - res->name_pattern = nullptr; - res->size = std::numeric_limits::max(); - res->queue_length = 100000; - res->max_entries_per_iter = 1000; - char *firstarg = nullptr; - - // when we find special args, we process them - // but we need to replace them with 0 so that getopt will ignore them - // and getopt will continue to process beyond them - for (auto i = 1; i < argc - 1; i++) { - if (strcmp(argv[i], "-newer") == 0) { - res->timestamp_file = strdup(argv[i + 1]); - argv[i][0] = 0; - argv[++i][0] = 0; - } else if (strcmp(argv[i], "-size") == 0) { - char *str = argv[i + 1]; - char extension = str[strlen(str) - 1]; - str[strlen(str) - 1] = 0; - res->size = atoll(str); - switch (extension) { - case 'c': - break; - default: - pfind_abort("Unsupported exension for -size\n"); - } - argv[i][0] = 0; - argv[++i][0] = 0; - } else if (strcmp(argv[i], "-name") == 0) { - res->name_pattern = (char *)malloc(strlen(argv[i + 1]) * 4 + 100); - // transform a traditional name pattern to a regex: - char *str = argv[i + 1]; - char *out = res->name_pattern; - int pos = 0; - for (unsigned i = 0; i < strlen(str); i++) { - if (str[i] == '*') { - pos += sprintf(out + pos, ".*"); - } else if (str[i] == '.') { - pos += sprintf(out + pos, "[.]"); - } else if (str[i] == '"' || str[i] == '\"') { - // erase the " - } else { - out[pos] = str[i]; - pos++; +pfind_options_t* +pfind_parse_args(int argc, char** argv, int force_print_help, MPI_Comm com) { + MPI_Comm_rank(com, &pfind_rank); + MPI_Comm_size(com, &pfind_size); + pfind_com = com; + + pfind_options_t* res = (pfind_options_t*) malloc(sizeof(pfind_options_t)); + memset(res, 0, sizeof(pfind_options_t)); + int print_help = force_print_help; + + res->workdir = nullptr; + res->results_dir = nullptr; + res->verbosity = 0; + res->timestamp_file = nullptr; + res->name_pattern = nullptr; + res->size = std::numeric_limits::max(); + res->queue_length = 100000; + res->max_entries_per_iter = 1000; + char* firstarg = nullptr; + + // when we find special args, we process them + // but we need to replace them with 0 so that getopt will ignore them + // and getopt will continue to process beyond them + for(auto i = 1; i < argc - 1; i++) { + if(strcmp(argv[i], "-newer") == 0) { + res->timestamp_file = strdup(argv[i + 1]); + argv[i][0] = 0; + argv[++i][0] = 0; + } else if(strcmp(argv[i], "-size") == 0) { + char* str = argv[i + 1]; + char extension = str[strlen(str) - 1]; + str[strlen(str) - 1] = 0; + res->size = atoll(str); + switch(extension) { + case 'c': + break; + default: + pfind_abort("Unsupported exension for -size\n"); + } + argv[i][0] = 0; + argv[++i][0] = 0; + } else if(strcmp(argv[i], "-name") == 0) { + res->name_pattern = (char*) malloc(strlen(argv[i + 1]) * 4 + 100); + // transform a traditional name pattern to a regex: + char* str = argv[i + 1]; + char* out = res->name_pattern; + int pos = 0; + for(unsigned i = 0; i < strlen(str); i++) { + if(str[i] == '*') { + pos += sprintf(out + pos, ".*"); + } else if(str[i] == '.') { + pos += sprintf(out + pos, "[.]"); + } else if(str[i] == '"' || str[i] == '\"') { + // erase the " + } else { + out[pos] = str[i]; + pos++; + } + } + out[pos] = 0; + + auto ret = regcomp(&res->name_regex, res->name_pattern, 0); + if(ret) { + pfind_abort("Invalid regex for name given\n"); + } + argv[i][0] = 0; + argv[++i][0] = 0; + } else if(strcmp(argv[i], "-regex") == 0) { + res->name_pattern = strdup(argv[i + 1]); + auto ret = regcomp(&res->name_regex, res->name_pattern, 0); + if(ret) { + pfind_abort("Invalid regex for name given\n"); + } + argv[i][0] = 0; + argv[++i][0] = 0; + } else if(strcmp(argv[i], "-M") == 0) { + res->mountdir = strdup(argv[i + 1]); + argv[i][0] = 0; + argv[++i][0] = 0; + } else if(strcmp(argv[i], "-S") == 0) { + res->num_servers = atoi(argv[i + 1]); + argv[i][0] = 0; + argv[++i][0] = 0; + } else if(!firstarg) { + firstarg = strdup(argv[i]); + argv[i][0] = 0; } - } - out[pos] = 0; - - auto ret = regcomp(&res->name_regex, res->name_pattern, 0); - if (ret) { - pfind_abort("Invalid regex for name given\n"); - } - argv[i][0] = 0; - argv[++i][0] = 0; - } else if (strcmp(argv[i], "-regex") == 0) { - res->name_pattern = strdup(argv[i + 1]); - auto ret = regcomp(&res->name_regex, res->name_pattern, 0); - if (ret) { - pfind_abort("Invalid regex for name given\n"); - } - argv[i][0] = 0; - argv[++i][0] = 0; - } else if(strcmp(argv[i], "-M") == 0) { - res->mountdir = strdup(argv[i + 1]); - argv[i][0] = 0; - argv[++i][0] = 0; - } else if(strcmp(argv[i], "-S") == 0) { - res->num_servers = atoi(argv[i + 1]); - argv[i][0] = 0; - argv[++i][0] = 0; - } else if (!firstarg) { - firstarg = strdup(argv[i]); - argv[i][0] = 0; } - } - if (argc == 2) { - firstarg = strdup(argv[1]); - } - - int c; - while ((c = getopt(argc, argv, "CPs:r:vhD:xq:H:NM:S:")) != -1) { - if (c == -1) { - break; + if(argc == 2) { + firstarg = strdup(argv[1]); } - switch (c) { - case 'H': - res->parallel_single_dir_access = atoi(optarg); - break; - case 'N': - res->steal_from_next = 1; - break; - case 'x': - /* ignore fake arg that we added when we processed the extra args */ - break; - case 'P': - res->print_by_process = 1; - break; - case 'C': - res->just_count = 1; - break; - case 'D': - if (strcmp(optarg, "rates") == 0) { - res->print_rates = 1; - } else { - pfind_abort("Unsupported debug flag\n"); - } - break; - case 'h': - print_help = 1; - break; - case 'r': - res->results_dir = strdup(optarg); - break; - case 'q': - res->queue_length = atoi(optarg); - break; - if (res->queue_length < 10) { - pfind_abort("Queue must be at least 10 elements!\n"); - } - break; - case 's': - res->stonewall_timer = atol(optarg); - break; - // case 'S': - // res->num_servers = atoi(optarg); - // break; - // case 'M': - // res->mountdir = strdup(optarg); - // break; - case 'v': - res->verbosity++; - break; - case 0: - break; + int c; + while((c = getopt(argc, argv, "CPs:r:vhD:xq:H:NM:S:")) != -1) { + if(c == -1) { + break; + } + + switch(c) { + case 'H': + res->parallel_single_dir_access = atoi(optarg); + break; + case 'N': + res->steal_from_next = 1; + break; + case 'x': + /* ignore fake arg that we added when we processed the extra + * args */ + break; + case 'P': + res->print_by_process = 1; + break; + case 'C': + res->just_count = 1; + break; + case 'D': + if(strcmp(optarg, "rates") == 0) { + res->print_rates = 1; + } else { + pfind_abort("Unsupported debug flag\n"); + } + break; + case 'h': + print_help = 1; + break; + case 'r': + res->results_dir = strdup(optarg); + break; + case 'q': + res->queue_length = atoi(optarg); + break; + if(res->queue_length < 10) { + pfind_abort("Queue must be at least 10 elements!\n"); + } + break; + case 's': + res->stonewall_timer = atol(optarg); + break; + case 'v': + res->verbosity++; + break; + case 0: + break; + } } - } - if (res->verbosity > 2 && pfind_rank == 0) { - printf("Regex: %s\n", res->name_pattern); - } - - if (print_help) { - if (pfind_rank == 0) - pfind_print_help(res); - int init; - MPI_Initialized(&init); - if (init) { - MPI_Finalize(); + if(res->verbosity > 2 && pfind_rank == 0) { + printf("Regex: %s\n", res->name_pattern); } - exit(0); - } - if (!firstarg) { - pfind_abort("Error: pfind \n"); - } - res->workdir = firstarg; + if(print_help) { + if(pfind_rank == 0) + pfind_print_help(res); + int init; + MPI_Initialized(&init); + if(init) { + MPI_Finalize(); + } + exit(0); + } - return res; + if(!firstarg) { + pfind_abort("Error: pfind \n"); + } + res->workdir = firstarg; + + return res; } /* Master send a new path to the workers */ -void send_newPath(string path) { - auto count = path.size() + 1; - MPI_Bcast(&count, 1, MPI_INT, 0, MPI_COMM_WORLD); - MPI_Bcast((void *)path.c_str(), count, MPI_CHAR, 0, MPI_COMM_WORLD); +void +send_newPath(string path) { + auto count = path.size() + 1; + MPI_Bcast(&count, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast((void*) path.c_str(), count, MPI_CHAR, 0, MPI_COMM_WORLD); } /* Clients get a new path, getting a "0" size char means there is no new path*/ -string recv_newPath() { - int count; - MPI_Bcast(&count, 1, MPI_INT, 0, MPI_COMM_WORLD); - if (count == 0) - return "Terminate"; - std::vector buf(count); - MPI_Bcast(buf.data(), count, MPI_CHAR, 0, MPI_COMM_WORLD); - return std::string(buf.begin(), buf.end()); +string +recv_newPath() { + int count; + MPI_Bcast(&count, 1, MPI_INT, 0, MPI_COMM_WORLD); + if(count == 0) + return "Terminate"; + std::vector buf(count); + MPI_Bcast(buf.data(), count, MPI_CHAR, 0, MPI_COMM_WORLD); + return std::string(buf.begin(), buf.end()); } /* Client Processing a path. @@ -323,214 +322,221 @@ string recv_newPath() { * server, which is enough for most cases * */ -void dirProcess(const string path, unsigned long long &checked, - unsigned long long &found, queue &dirs, - unsigned int world_rank, unsigned int world_size, - pfind_options_t *opt) { - struct dirent_extended *getdir = (struct dirent_extended *)malloc( - (sizeof(struct dirent_extended) + 255) * 1024 * 100); - memset(getdir, 0, (sizeof(struct dirent_extended) + 255) * 1024 * 100); - // cout << "PROCESSING " << world_rank << "/"<< world_size << " = " << path << - // endl; - - - int servers_per_node = ceil(opt->num_servers / (world_size - 1)); - if (servers_per_node == 0) - servers_per_node++; - for (int it = 0; it < servers_per_node; it++) { - auto server = (world_rank - 1) * servers_per_node + it; - if (server >= (unsigned int) opt->num_servers) - break; - - unsigned long long total_size = 0; - auto n = gkfs_getsingleserverdir( - path.c_str(), getdir, - (sizeof(struct dirent_extended) + 255) * 1024 * 100, server); - struct dirent_extended *temp = getdir; - - while (total_size < (unsigned long long) n) { - if (strlen(temp->d_name) == 0) - break; - total_size += temp->d_reclen; - /* Queue directory to process */ - if (temp->d_type == 1) { - string slash; - if (path[path.size() - 1] != '/') - slash = "/"; - checked++; - dirs.push(path + slash + temp->d_name); - temp = reinterpret_cast(reinterpret_cast(temp) + - temp->d_reclen); - continue; - } - /* Find filtering */ - auto timeOK = true; - if (opt->timestamp_file) { - if ((uint64_t)temp->ctime < runtime.ctime_min) - timeOK = false; +void +dirProcess(const string path, unsigned long long& checked, + unsigned long long& found, queue& dirs, + unsigned int world_rank, unsigned int world_size, + pfind_options_t* opt) { + struct dirent_extended* getdir = (struct dirent_extended*) malloc( + (sizeof(struct dirent_extended) + 255) * 1024 * 100); + memset(getdir, 0, (sizeof(struct dirent_extended) + 255) * 1024 * 100); + // cout << "PROCESSING " << world_rank << "/"<< world_size << " = " << path + // << endl; + + + int servers_per_node = ceil(opt->num_servers / (world_size - 1)); + if(servers_per_node == 0) + servers_per_node++; + for(int it = 0; it < servers_per_node; it++) { + auto server = (world_rank - 1) * servers_per_node + it; + if(server >= (unsigned int) opt->num_servers) + break; + + unsigned long long total_size = 0; + auto n = gkfs_getsingleserverdir( + path.c_str(), getdir, + (sizeof(struct dirent_extended) + 255) * 1024 * 100, server); + struct dirent_extended* temp = getdir; + + while(total_size < (unsigned long long) n) { + if(strlen(temp->d_name) == 0) + break; + total_size += temp->d_reclen; + /* Queue directory to process */ + if(temp->d_type == 1) { + string slash; + if(path[path.size() - 1] != '/') + slash = "/"; + checked++; + dirs.push(path + slash + temp->d_name); + temp = reinterpret_cast( + reinterpret_cast(temp) + temp->d_reclen); + continue; + } + /* Find filtering */ + auto timeOK = true; + if(opt->timestamp_file) { + if((uint64_t) temp->ctime < runtime.ctime_min) + timeOK = false; + } + if(timeOK and (temp->size == opt->size or + opt->size == std::numeric_limits::max())) + if(!(opt->name_pattern && + regexec(&opt->name_regex, temp->d_name, 0, nullptr, 0))) + found++; + checked++; + temp = reinterpret_cast( + reinterpret_cast(temp) + temp->d_reclen); } - if (timeOK and (temp->size == opt->size or opt->size == std::numeric_limits::max())) - if (!(opt->name_pattern && - regexec(&opt->name_regex, temp->d_name, 0, nullptr, 0))) - found++; - checked++; - temp = - reinterpret_cast(reinterpret_cast(temp) + temp->d_reclen); - } - } + } } -int process(char *processor_name, int world_rank, int world_size, - pfind_options_t *opt) { - // Print off a hello world message - - // INIT PFIND - memset(&runtime, 0, sizeof(pfind_runtime_options_t)); - /* Get timestamp file */ - if (opt->timestamp_file) { - if (pfind_rank == 0) { - static struct stat timer_file{}; - if (lstat(opt->timestamp_file, &timer_file) != 0) { - printf("Could not open: \"%s\", error: %s", opt->timestamp_file, - strerror(errno)); - pfind_abort("\n"); - } - runtime.ctime_min = timer_file.st_ctime; +int +process(char* processor_name, int world_rank, int world_size, + pfind_options_t* opt) { + // Print off a hello world message + + // INIT PFIND + memset(&runtime, 0, sizeof(pfind_runtime_options_t)); + /* Get timestamp file */ + if(opt->timestamp_file) { + if(pfind_rank == 0) { + static struct stat timer_file{}; + if(lstat(opt->timestamp_file, &timer_file) != 0) { + printf("Could not open: \"%s\", error: %s", opt->timestamp_file, + strerror(errno)); + pfind_abort("\n"); + } + runtime.ctime_min = timer_file.st_ctime; + } + MPI_Bcast(&runtime.ctime_min, 1, MPI_INT, 0, pfind_com); } - MPI_Bcast(&runtime.ctime_min, 1, MPI_INT, 0, pfind_com); - } - if (world_rank == 0) { - queue dirs; - string workdir = opt->workdir; - workdir = workdir.substr(strlen(opt->mountdir), workdir.size()); - if (workdir.size() == 0) - workdir = "/"; - dirs.push(workdir); + if(world_rank == 0) { + queue dirs; + string workdir = opt->workdir; + workdir = workdir.substr(strlen(opt->mountdir), workdir.size()); + if(workdir.size() == 0) + workdir = "/"; + dirs.push(workdir); - do { - std::string processpath = dirs.front(); - dirs.pop(); - send_newPath(processpath); + do { + std::string processpath = dirs.front(); + dirs.pop(); + send_newPath(processpath); - auto received_strings = true; + auto received_strings = true; - for (auto i = 1; i < world_size; i++) { - received_strings = true; - while (received_strings) { - received_strings = false; + for(auto i = 1; i < world_size; i++) { + received_strings = true; + while(received_strings) { + received_strings = false; - MPI_Status mpistatus; - MPI_Probe(i, 0, MPI_COMM_WORLD, &mpistatus); + MPI_Status mpistatus; + MPI_Probe(i, 0, MPI_COMM_WORLD, &mpistatus); - int count; - MPI_Get_count(&mpistatus, MPI_CHAR, &count); + int count; + MPI_Get_count(&mpistatus, MPI_CHAR, &count); - std::vector buf(count); - MPI_Recv(buf.data(), count, MPI_CHAR, i, 0, MPI_COMM_WORLD, &mpistatus); + std::vector buf(count); + MPI_Recv(buf.data(), count, MPI_CHAR, i, 0, MPI_COMM_WORLD, + &mpistatus); - if (count == 0) { - continue; + if(count == 0) { + continue; + } + std::string s(buf.begin(), buf.end()); + dirs.push(s); + received_strings = true; } - std::string s(buf.begin(), buf.end()); - dirs.push(s); - received_strings = true; } - } - } while (!dirs.empty()); + } while(!dirs.empty()); - auto count = 0; - MPI_Bcast(&count, 1, MPI_INT, 0, MPI_COMM_WORLD); + auto count = 0; + MPI_Bcast(&count, 1, MPI_INT, 0, MPI_COMM_WORLD); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(MPI_COMM_WORLD); - unsigned long long *Array_checked = - (unsigned long long *)malloc(sizeof(unsigned long long) * world_size); - unsigned long long *Array_found = - (unsigned long long *)malloc(sizeof(unsigned long long) * world_size); - unsigned long long checked = 0; - unsigned long long found = 0; + unsigned long long* Array_checked = (unsigned long long*) malloc( + sizeof(unsigned long long) * world_size); + unsigned long long* Array_found = (unsigned long long*) malloc( + sizeof(unsigned long long) * world_size); + unsigned long long checked = 0; + unsigned long long found = 0; - MPI_Gather(&checked, 1, MPI_UNSIGNED_LONG_LONG, Array_checked, 1, - MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD); - MPI_Gather(&found, 1, MPI_UNSIGNED_LONG_LONG, Array_found, 1, - MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD); + MPI_Gather(&checked, 1, MPI_UNSIGNED_LONG_LONG, Array_checked, 1, + MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD); + MPI_Gather(&found, 1, MPI_UNSIGNED_LONG_LONG, Array_found, 1, + MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD); - for (int i = 0; i < world_size; i++) { - checked += Array_checked[i]; - found += Array_found[i]; - } + for(int i = 0; i < world_size; i++) { + checked += Array_checked[i]; + found += Array_found[i]; + } - cout << "MATCHED " << found << "/" << checked << endl; - } - - else { - unsigned long long checked = 0; - unsigned long long found = 0; - while (1) { - - string toProcess = recv_newPath(); - if (toProcess == "Terminate") { - break; - } - // cout << "REceived " << toProcess << " --- " << world_rank << endl; - queue dirs; - - dirProcess(toProcess, checked, found, dirs, world_rank, world_size, opt); - // Send NEW DIRS to master - while (!dirs.empty()) { - string s = dirs.front(); - dirs.pop(); - // cout << world_rank << " --> Sending " << s << endl; - MPI_Send((void *)s.c_str(), (s.size() + 1), MPI_CHAR, 0, 0, - MPI_COMM_WORLD); - } - // cout << world_rank << " --> Sending 0 " << endl; - MPI_Send((void *)0, 0, MPI_CHAR, 0, 0, MPI_COMM_WORLD); + cout << "MATCHED " << found << "/" << checked << endl; } - MPI_Barrier(MPI_COMM_WORLD); - MPI_Gather(&checked, 1, MPI_UNSIGNED_LONG_LONG, nullptr, 1, - MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD); - MPI_Gather(&found, 1, MPI_UNSIGNED_LONG_LONG, nullptr, 1, - MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD); - } + else { + unsigned long long checked = 0; + unsigned long long found = 0; + while(1) { - return 0; + string toProcess = recv_newPath(); + if(toProcess == "Terminate") { + break; + } + // cout << "REceived " << toProcess << " --- " << world_rank << + // endl; + queue dirs; + + dirProcess(toProcess, checked, found, dirs, world_rank, world_size, + opt); + // Send NEW DIRS to master + while(!dirs.empty()) { + string s = dirs.front(); + dirs.pop(); + // cout << world_rank << " --> Sending " << s << endl; + MPI_Send((void*) s.c_str(), (s.size() + 1), MPI_CHAR, 0, 0, + MPI_COMM_WORLD); + } + // cout << world_rank << " --> Sending 0 " << endl; + MPI_Send((void*) 0, 0, MPI_CHAR, 0, 0, MPI_COMM_WORLD); + } + + MPI_Barrier(MPI_COMM_WORLD); + MPI_Gather(&checked, 1, MPI_UNSIGNED_LONG_LONG, nullptr, 1, + MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD); + MPI_Gather(&found, 1, MPI_UNSIGNED_LONG_LONG, nullptr, 1, + MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD); + } + + return 0; } -int main(int argc, char **argv) { +int +main(int argc, char** argv) { - for (int i = 0; i < argc; i++) { - if (strcmp(argv[i], "--help") == 0) { - argv[i][0] = 0; - pfind_rank = 0; - pfind_parse_args(argc, argv, 1, MPI_COMM_SELF); - exit(0); + for(int i = 0; i < argc; i++) { + if(strcmp(argv[i], "--help") == 0) { + argv[i][0] = 0; + pfind_rank = 0; + pfind_parse_args(argc, argv, 1, MPI_COMM_SELF); + exit(0); + } } - } - // Initialize the MPI environment - MPI_Init(&argc, &argv); + // Initialize the MPI environment + MPI_Init(&argc, &argv); - // Get the number of processes - int world_size; - MPI_Comm_size(MPI_COMM_WORLD, &world_size); + // Get the number of processes + int world_size; + MPI_Comm_size(MPI_COMM_WORLD, &world_size); - // Get the rank of the process - int world_rank; - MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + // Get the rank of the process + int world_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); - opt = pfind_parse_args(argc, argv, 0, MPI_COMM_WORLD); - // cout << opt->num_servers << " -- " << opt->mountdir << endl; - // Get the name of the processor - char processor_name[MPI_MAX_PROCESSOR_NAME]; - int name_len; - MPI_Get_processor_name(processor_name, &name_len); + opt = pfind_parse_args(argc, argv, 0, MPI_COMM_WORLD); + // cout << opt->num_servers << " -- " << opt->mountdir << endl; + // Get the name of the processor + char processor_name[MPI_MAX_PROCESSOR_NAME]; + int name_len; + MPI_Get_processor_name(processor_name, &name_len); - process(processor_name, world_rank, world_size, opt); + process(processor_name, world_rank, world_size, opt); - // Finalize the MPI environment. - MPI_Finalize(); + // Finalize the MPI environment. + MPI_Finalize(); } diff --git a/examples/gfind/sfind.cpp b/examples/gfind/sfind.cpp index 27570da25..cb22acda6 100644 --- a/examples/gfind/sfind.cpp +++ b/examples/gfind/sfind.cpp @@ -48,258 +48,256 @@ using namespace std; /* Minimal struct needed for io500 find */ /* We could also do the filtering on the server */ struct dirent_extended { - size_t size; - time_t ctime; - unsigned short d_reclen; - unsigned char d_type; - char d_name[1]; + size_t size; + time_t ctime; + unsigned short d_reclen; + unsigned char d_type; + char d_name[1]; }; /* Function exported from GekkoFS LD_PRELOAD, code needs to be compiled with * -fPIC */ -extern "C" int gkfs_getsingleserverdir(const char *path, - struct dirent_extended *dirp, - unsigned int count, int server) - __attribute__((weak)); +extern "C" int +gkfs_getsingleserverdir(const char* path, struct dirent_extended* dirp, + unsigned int count, int server) __attribute__((weak)); /* PFIND OPTIONS EXTENDED We need to add the GekkoFS mount dir and the number of * servers */ typedef struct { - char* workdir; - int just_count; - int print_by_process; - char *results_dir; - int stonewall_timer; - int print_rates; - - char *timestamp_file; - char *name_pattern; - regex_t name_regex; - uint64_t size; - - int num_servers; - char *mountdir; - // optimizing parameters NOT USED - int queue_length; - int max_entries_per_iter; - int steal_from_next; // if true, then steal from the next process - int parallel_single_dir_access; // if 1, use hashing to parallelize single - // directory access, if 2 sequential increment - - int verbosity; + char* workdir; + int just_count; + int print_by_process; + char* results_dir; + int stonewall_timer; + int print_rates; + + char* timestamp_file; + char* name_pattern; + regex_t name_regex; + uint64_t size; + + int num_servers; + char* mountdir; + // optimizing parameters NOT USED + int queue_length; + int max_entries_per_iter; + int steal_from_next; // if true, then steal from the next process + int parallel_single_dir_access; // if 1, use hashing to parallelize single + // directory access, if 2 sequential + // increment + + int verbosity; } pfind_options_t; typedef struct { - uint64_t ctime_min; - double stonewall_endtime; - FILE *logfile; - int needs_stat; + uint64_t ctime_min; + double stonewall_endtime; + FILE* logfile; + int needs_stat; } pfind_runtime_options_t; static pfind_runtime_options_t runtime; int pfind_rank; -static pfind_options_t *opt; +static pfind_options_t* opt; -void pfind_abort(const std::string str) { - printf("%s", str.c_str()); - exit(1); +void +pfind_abort(const std::string str) { + printf("%s", str.c_str()); + exit(1); } -static void pfind_print_help(pfind_options_t *res) { - printf("pfind \nSynopsis:\n" - "pfind [-newer ] [-size c] [-name " - "] [-regex ] [-S ] [-M ]\n" - "\tworkdir = \"%s\"\n" - "\t-newer = \"%s\"\n" - "\t-name|-regex = \"%s\"\n" - "\t-S: num servers = \"%d\"\n" - "\t-M: mountdir = \"%s\"\n" - "Optional flags\n" - "\t-h: prints the help\n" - "\t--help: prints the help without initializing MPI\n",res->workdir, - res->timestamp_file, res->name_pattern, res->num_servers, - res->mountdir ); +static void +pfind_print_help(pfind_options_t* res) { + printf("pfind \nSynopsis:\n" + "pfind [-newer ] [-size c] [-name " + "] [-regex ] [-S ] [-M ]\n" + "\tworkdir = \"%s\"\n" + "\t-newer = \"%s\"\n" + "\t-name|-regex = \"%s\"\n" + "\t-S: num servers = \"%d\"\n" + "\t-M: mountdir = \"%s\"\n" + "Optional flags\n" + "\t-h: prints the help\n" + "\t--help: prints the help without initializing MPI\n", + res->workdir, res->timestamp_file, res->name_pattern, + res->num_servers, res->mountdir); } int pfind_size; -pfind_options_t *pfind_parse_args(int argc, char **argv, int force_print_help){ - - pfind_rank = 0; - pfind_size = 1; - - pfind_options_t *res = (pfind_options_t *)malloc(sizeof(pfind_options_t)); - // Init Values - res->just_count = 0; - res->print_by_process = 0; - res->stonewall_timer = 0; - res->print_rates = 0; - res->name_regex = {}; - res->num_servers = 0; - res->mountdir = nullptr; - res->queue_length = 0; - res->max_entries_per_iter = 0; - res->steal_from_next = 0; - res->parallel_single_dir_access = 0; - - auto print_help = force_print_help; - res->workdir = nullptr; - res->results_dir = nullptr; - res->verbosity = 0; - res->timestamp_file = nullptr; - res->name_pattern = nullptr; - - res->size = std::numeric_limits::max(); - res->queue_length = 100000; - res->max_entries_per_iter = 1000; - char *firstarg = nullptr; - - // when we find special args, we process them - // but we need to replace them with 0 so that getopt will ignore them - // and getopt will continue to process beyond them - for (auto i = 1; i < argc - 1; i++) { - if (strcmp(argv[i], "-newer") == 0) { - res->timestamp_file = strdup(argv[i + 1]); - argv[i][0] = 0; - argv[++i][0] = 0; - } else if (strcmp(argv[i], "-size") == 0) { - char *str = argv[i + 1]; - char extension = str[strlen(str) - 1]; - str[strlen(str) - 1] = 0; - res->size = atoll(str); - switch (extension) { - case 'c': - break; - default: - pfind_abort("Unsupported exension for -size\n"); - } - argv[i][0] = 0; - argv[++i][0] = 0; - } else if (strcmp(argv[i], "-name") == 0) { - res->name_pattern = (char *)malloc(strlen(argv[i + 1]) * 4 + 100); - // transform a traditional name pattern to a regex: - char *str = argv[i + 1]; - char *out = res->name_pattern; - auto pos = 0; - for (long unsigned int i = 0; i < strlen(str); i++) { - if (str[i] == '*') { - pos += sprintf(out + pos, ".*"); - } else if (str[i] == '.') { - pos += sprintf(out + pos, "[.]"); - } else if (str[i] == '"' || str[i] == '\"') { - // erase the " - } else { - out[pos] = str[i]; - pos++; +pfind_options_t* +pfind_parse_args(int argc, char** argv, int force_print_help) { + + pfind_rank = 0; + pfind_size = 1; + + pfind_options_t* res = (pfind_options_t*) malloc(sizeof(pfind_options_t)); + // Init Values + res->just_count = 0; + res->print_by_process = 0; + res->stonewall_timer = 0; + res->print_rates = 0; + res->name_regex = {}; + res->num_servers = 0; + res->mountdir = nullptr; + res->queue_length = 0; + res->max_entries_per_iter = 0; + res->steal_from_next = 0; + res->parallel_single_dir_access = 0; + + auto print_help = force_print_help; + res->workdir = nullptr; + res->results_dir = nullptr; + res->verbosity = 0; + res->timestamp_file = nullptr; + res->name_pattern = nullptr; + + res->size = std::numeric_limits::max(); + res->queue_length = 100000; + res->max_entries_per_iter = 1000; + char* firstarg = nullptr; + + // when we find special args, we process them + // but we need to replace them with 0 so that getopt will ignore them + // and getopt will continue to process beyond them + for(auto i = 1; i < argc - 1; i++) { + if(strcmp(argv[i], "-newer") == 0) { + res->timestamp_file = strdup(argv[i + 1]); + argv[i][0] = 0; + argv[++i][0] = 0; + } else if(strcmp(argv[i], "-size") == 0) { + char* str = argv[i + 1]; + char extension = str[strlen(str) - 1]; + str[strlen(str) - 1] = 0; + res->size = atoll(str); + switch(extension) { + case 'c': + break; + default: + pfind_abort("Unsupported exension for -size\n"); + } + argv[i][0] = 0; + argv[++i][0] = 0; + } else if(strcmp(argv[i], "-name") == 0) { + res->name_pattern = (char*) malloc(strlen(argv[i + 1]) * 4 + 100); + // transform a traditional name pattern to a regex: + char* str = argv[i + 1]; + char* out = res->name_pattern; + auto pos = 0; + for(long unsigned int i = 0; i < strlen(str); i++) { + if(str[i] == '*') { + pos += sprintf(out + pos, ".*"); + } else if(str[i] == '.') { + pos += sprintf(out + pos, "[.]"); + } else if(str[i] == '"' || str[i] == '\"') { + // erase the " + } else { + out[pos] = str[i]; + pos++; + } + } + out[pos] = 0; + + int ret = regcomp(&res->name_regex, res->name_pattern, 0); + if(ret) { + pfind_abort("Invalid regex for name given\n"); + } + argv[i][0] = 0; + argv[++i][0] = 0; + } else if(strcmp(argv[i], "-regex") == 0) { + res->name_pattern = strdup(argv[i + 1]); + int ret = regcomp(&res->name_regex, res->name_pattern, 0); + if(ret) { + pfind_abort("Invalid regex for name given\n"); + } + argv[i][0] = 0; + argv[++i][0] = 0; + } else if(strcmp(argv[i], "-M") == 0) { + res->mountdir = strdup(argv[i + 1]); + argv[i][0] = 0; + argv[++i][0] = 0; + } else if(strcmp(argv[i], "-S") == 0) { + res->num_servers = atoi(argv[i + 1]); + argv[i][0] = 0; + argv[++i][0] = 0; + } else if(!firstarg) { + firstarg = strdup(argv[i]); + argv[i][0] = 0; } - } - out[pos] = 0; - - int ret = regcomp(&res->name_regex, res->name_pattern, 0); - if (ret) { - pfind_abort("Invalid regex for name given\n"); - } - argv[i][0] = 0; - argv[++i][0] = 0; - } else if (strcmp(argv[i], "-regex") == 0) { - res->name_pattern = strdup(argv[i + 1]); - int ret = regcomp(&res->name_regex, res->name_pattern, 0); - if (ret) { - pfind_abort("Invalid regex for name given\n"); - } - argv[i][0] = 0; - argv[++i][0] = 0; - } else if(strcmp(argv[i], "-M") == 0) { - res->mountdir = strdup(argv[i + 1]); - argv[i][0] = 0; - argv[++i][0] = 0; - } else if(strcmp(argv[i], "-S") == 0) { - res->num_servers = atoi(argv[i + 1]); - argv[i][0] = 0; - argv[++i][0] = 0; - } else if (!firstarg) { - firstarg = strdup(argv[i]); - argv[i][0] = 0; } - } - if (argc == 2) { - firstarg = strdup(argv[1]); - } - - int c; - while ((c = getopt(argc, argv, "CPs:r:vhD:xq:H:NM:S:")) != -1) { - if (c == -1) { - break; + if(argc == 2) { + firstarg = strdup(argv[1]); } - switch (c) { - case 'H': - res->parallel_single_dir_access = atoi(optarg); - break; - case 'N': - res->steal_from_next = 1; - break; - case 'x': - /* ignore fake arg that we added when we processed the extra args */ - break; - case 'P': - res->print_by_process = 1; - break; - case 'C': - res->just_count = 1; - break; - case 'D': - if (strcmp(optarg, "rates") == 0) { - res->print_rates = 1; - } else { - pfind_abort("Unsupported debug flag\n"); - } - break; - case 'h': - print_help = 1; - break; - case 'r': - res->results_dir = strdup(optarg); - break; - case 'q': - res->queue_length = atoi(optarg); - break; - if (res->queue_length < 10) { - pfind_abort("Queue must be at least 10 elements!\n"); - } - break; - case 's': - res->stonewall_timer = atol(optarg); - break; - // case 'S': - // res->num_servers = atoi(optarg); - // break; - // case 'M': - // res->mountdir = strdup(optarg); - // break; - case 'v': - res->verbosity++; - break; - case 0: - break; + int c; + while((c = getopt(argc, argv, "CPs:r:vhD:xq:H:NM:S:")) != -1) { + if(c == -1) { + break; + } + + switch(c) { + case 'H': + res->parallel_single_dir_access = atoi(optarg); + break; + case 'N': + res->steal_from_next = 1; + break; + case 'x': + /* ignore fake arg that we added when we processed the extra + * args */ + break; + case 'P': + res->print_by_process = 1; + break; + case 'C': + res->just_count = 1; + break; + case 'D': + if(strcmp(optarg, "rates") == 0) { + res->print_rates = 1; + } else { + pfind_abort("Unsupported debug flag\n"); + } + break; + case 'h': + print_help = 1; + break; + case 'r': + res->results_dir = strdup(optarg); + break; + case 'q': + res->queue_length = atoi(optarg); + break; + if(res->queue_length < 10) { + pfind_abort("Queue must be at least 10 elements!\n"); + } + break; + case 's': + res->stonewall_timer = atol(optarg); + break; + case 'v': + res->verbosity++; + break; + case 0: + break; + } + } + if(res->verbosity > 2 && pfind_rank == 0) { + printf("Regex: %s\n", res->name_pattern); } - } - if (res->verbosity > 2 && pfind_rank == 0) { - printf("Regex: %s\n", res->name_pattern); - } - - if (print_help) { - if (pfind_rank == 0) - pfind_print_help(res); - exit(0); - } - - if (!firstarg) { - pfind_abort("Error: pfind \n"); - } - res->workdir = firstarg; - - return res; + + if(print_help) { + if(pfind_rank == 0) + pfind_print_help(res); + exit(0); + } + + if(!firstarg) { + pfind_abort("Error: pfind \n"); + } + res->workdir = firstarg; + + return res; } /* Client Processing a path. @@ -309,108 +307,112 @@ pfind_options_t *pfind_parse_args(int argc, char **argv, int force_print_help){ * server, which is enough for most cases * */ -void dirProcess(const string path, unsigned long long &checked, - unsigned long long &found, queue &dirs, - unsigned int world_rank, unsigned int world_size, - pfind_options_t *opt) { - struct dirent_extended *getdir = (struct dirent_extended *)malloc( - (sizeof(struct dirent_extended) + 255) * 1024 * 100); - memset(getdir, 0, (sizeof(struct dirent_extended) + 255) * 1024 * 100); - // cout << "PROCESSING " << world_rank << "/"<< world_size << " = " << path << - // endl; - - for (auto server = 0; server < opt->num_servers; server++) { - unsigned long long total_size = 0; - long unsigned int n = gkfs_getsingleserverdir( - path.c_str(), getdir, - (sizeof(struct dirent_extended) + 255) * 1024 * 100, server); - struct dirent_extended *temp = getdir; - - while (total_size < n) { - if (strlen(temp->d_name) == 0) - break; - total_size += temp->d_reclen; - /* Queue directory to process */ - if (temp->d_type == 1) { - string slash; - if (path[path.size() - 1] != '/') - slash = "/"; - checked++; - dirs.push(path + slash + temp->d_name); - temp = - reinterpret_cast(reinterpret_cast(temp) + temp->d_reclen); - continue; - } - /* Find filtering */ - auto timeOK = true; - if (opt->timestamp_file) { - if ((uint64_t)temp->ctime < runtime.ctime_min) - timeOK = false; - } - if (timeOK and (temp->size == opt->size or opt->size == std::numeric_limits::max())) - if (!(opt->name_pattern && - regexec(&opt->name_regex, temp->d_name, 0, nullptr, 0))) - found++; - checked++; - temp = reinterpret_cast(reinterpret_cast(temp) + temp->d_reclen); +void +dirProcess(const string path, unsigned long long& checked, + unsigned long long& found, queue& dirs, + unsigned int world_rank, unsigned int world_size, + pfind_options_t* opt) { + struct dirent_extended* getdir = (struct dirent_extended*) malloc( + (sizeof(struct dirent_extended) + 255) * 1024 * 100); + memset(getdir, 0, (sizeof(struct dirent_extended) + 255) * 1024 * 100); + // cout << "PROCESSING " << world_rank << "/"<< world_size << " = " << path + // << endl; + + for(auto server = 0; server < opt->num_servers; server++) { + unsigned long long total_size = 0; + long unsigned int n = gkfs_getsingleserverdir( + path.c_str(), getdir, + (sizeof(struct dirent_extended) + 255) * 1024 * 100, server); + struct dirent_extended* temp = getdir; + + while(total_size < n) { + if(strlen(temp->d_name) == 0) + break; + total_size += temp->d_reclen; + /* Queue directory to process */ + if(temp->d_type == 1) { + string slash; + if(path[path.size() - 1] != '/') + slash = "/"; + checked++; + dirs.push(path + slash + temp->d_name); + temp = reinterpret_cast( + reinterpret_cast(temp) + temp->d_reclen); + continue; + } + /* Find filtering */ + auto timeOK = true; + if(opt->timestamp_file) { + if((uint64_t) temp->ctime < runtime.ctime_min) + timeOK = false; + } + if(timeOK and (temp->size == opt->size or + opt->size == std::numeric_limits::max())) + if(!(opt->name_pattern && + regexec(&opt->name_regex, temp->d_name, 0, nullptr, 0))) + found++; + checked++; + temp = reinterpret_cast( + reinterpret_cast(temp) + temp->d_reclen); + } } - } } -int process(pfind_options_t *opt) { - // Print off a hello world message - unsigned long long found,checked; - // INIT PFIND - found = 0; - checked = 0; - memset(&runtime, 0, sizeof(pfind_runtime_options_t)); - - /* Get timestamp file */ - if (opt->timestamp_file) { - if (pfind_rank == 0) { - static struct stat timer_file{}; - if (lstat(opt->timestamp_file, &timer_file) != 0) { - printf("Could not open: \"%s\", error: %s", opt->timestamp_file, - strerror(errno)); - pfind_abort("\n"); - } - runtime.ctime_min = timer_file.st_ctime; +int +process(pfind_options_t* opt) { + // Print off a hello world message + unsigned long long found, checked; + // INIT PFIND + found = 0; + checked = 0; + memset(&runtime, 0, sizeof(pfind_runtime_options_t)); + + /* Get timestamp file */ + if(opt->timestamp_file) { + if(pfind_rank == 0) { + static struct stat timer_file{}; + if(lstat(opt->timestamp_file, &timer_file) != 0) { + printf("Could not open: \"%s\", error: %s", opt->timestamp_file, + strerror(errno)); + pfind_abort("\n"); + } + runtime.ctime_min = timer_file.st_ctime; + } } - } - queue dirs; - string workdir = opt->workdir; - workdir = workdir.substr(strlen(opt->mountdir), workdir.size()); - if (workdir.size() == 0) - workdir = "/"; - dirs.push(workdir); + queue dirs; + string workdir = opt->workdir; + workdir = workdir.substr(strlen(opt->mountdir), workdir.size()); + if(workdir.size() == 0) + workdir = "/"; + dirs.push(workdir); - do { - string processpath = dirs.front(); - dirs.pop(); + do { + string processpath = dirs.front(); + dirs.pop(); - dirProcess(processpath, checked, found, dirs, 0, 1, opt); - // cout << "NO more paths " << dirs.size() << endl; - } while (!dirs.empty()); + dirProcess(processpath, checked, found, dirs, 0, 1, opt); + // cout << "NO more paths " << dirs.size() << endl; + } while(!dirs.empty()); cout << "MATCHED " << found << "/" << checked << endl; - return 0; + return 0; } -int main(int argc, char **argv) { +int +main(int argc, char** argv) { - for (auto i = 0; i < argc; i++) { - if (strcmp(argv[i], "--help") == 0) { - argv[i][0] = 0; - pfind_rank = 0; - pfind_parse_args(argc, argv, 1); - exit(0); + for(auto i = 0; i < argc; i++) { + if(strcmp(argv[i], "--help") == 0) { + argv[i][0] = 0; + pfind_rank = 0; + pfind_parse_args(argc, argv, 1); + exit(0); + } } - } - - opt = pfind_parse_args(argc, argv, 0); - process(opt); + opt = pfind_parse_args(argc, argv, 0); + process(opt); } -- GitLab From e28714b040f79fe8137730af8cdfefbfd8d1e2bd Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Mon, 29 Apr 2024 21:26:47 +0000 Subject: [PATCH 15/24] gkfs script slurm nodelist support, bugfix, cpu affinity add better cpu affinity option via config file --- scripts/run/gkfs | 78 ++++++++++++++++++++----------- scripts/run/gkfs.conf | 11 +++-- scripts/run/gkfs_io500_proxy.conf | 24 ++++++---- 3 files changed, 74 insertions(+), 39 deletions(-) diff --git a/scripts/run/gkfs b/scripts/run/gkfs index 503e6db45..d94e1f6c5 100755 --- a/scripts/run/gkfs +++ b/scripts/run/gkfs @@ -91,8 +91,8 @@ create_pid_file() { # PROXY_ARGS_ # CPUS_PER_TASK # VERBOSE -# DAEMON_NUMACTL_ -# PROXY_NUMACTL_ +# DAEMON_AFFINITY_ +# PROXY_AFFINITY_ # USE_PROXY # DAEMON_CPUNODEBIND # DAEMON_MEMBIND @@ -104,17 +104,41 @@ create_pid_file() { ####################################### start_daemon() { local node_list - local srun_cmd + local srun_daemon_cmd + local srun_proxy_cmd local daemon_execute local proxy_execute # setup if [[ ${USE_SRUN} == true ]]; then - node_list=$(scontrol show job "${SLURM_JOB_ID}" | grep " NodeList=" | cut -d "=" -f2) - if [[ -z ${NODE_NUM} ]]; then - NODE_NUM=$(scontrol show hostname "${node_list}" | wc -l) + # check for daemon first + if [[ -n ${DAEMON_NODELIST_} ]]; then + if [[ ! -f ${DAEMON_NODELIST_} ]]; then + echo -e "${C_AST_RED}ERROR: Daemon nodelist file not found at ${DAEMON_NODELIST_}. Exiting ..." + exit 1 + fi + NODE_NUM=$(wc -l < "${DAEMON_NODELIST_}") + srun_daemon_cmd="srun --disable-status --nodelist=${DAEMON_NODELIST_} --ntasks=${NODE_NUM} --cpus-per-task=${CPUS_PER_TASK} ${SRUN_ARGS} " + else + node_list=$(scontrol show job "${SLURM_JOB_ID}" | grep " NodeList=" | cut -d "=" -f2) + if [[ -z ${NODE_NUM} ]]; then + NODE_NUM=$(scontrol show hostname "${node_list}" | wc -l) + fi + # Setting up base srun cmd + srun_daemon_cmd="srun --disable-status -N ${NODE_NUM} --ntasks=${NODE_NUM} --cpus-per-task=${CPUS_PER_TASK} ${SRUN_ARGS} " + fi + if [[ ${USE_PROXY} == true ]]; then + if [[ -n ${PROXY_NODELIST_} ]]; then + if [[ ! -f ${PROXY_NODELIST_} ]]; then + echo -e "${C_AST_RED}ERROR: Proxy nodelist file not found at ${PROXY_NODELIST_}. Exiting ..." + exit 1 + fi + NODE_NUM_PROXY=$(wc -l < "${PROXY_NODELIST_}") + srun_proxy_cmd="srun --disable-status --nodelist=${PROXY_NODELIST_} --ntasks=${NODE_NUM_PROXY} --cpus-per-task=${CPUS_PER_TASK} ${SRUN_ARGS} " + else + srun_proxy_cmd="srun --disable-status -N ${NODE_NUM} --ntasks=${NODE_NUM} --cpus-per-task=${CPUS_PER_TASK} ${SRUN_ARGS} " + NODE_NUM_PROXY=$NODE_NUM + fi fi - # Setting up base srun cmd - srun_cmd="srun --disable-status -N ${NODE_NUM} --ntasks=${NODE_NUM} --cpus-per-task=${CPUS_PER_TASK} ${SRUN_ARGS} " else NODE_NUM=1 fi @@ -136,22 +160,22 @@ start_daemon() { if [[ ${USE_PROXY} == true ]]; then daemon_cmd="${daemon_cmd} ${DAEMON_PROXY_ARGS}" fi - # Setting up numactl - if [[ ${DAEMON_NUMACTL_} == true ]]; then - daemon_cmd="numactl --cpunodebind=${DAEMON_CPUNODEBIND} --membind=${DAEMON_MEMBIND} ${daemon_cmd}" + # Set cpu affinity for daemon + if [[ -n ${DAEMON_AFFINITY_} ]]; then + daemon_cmd="${DAEMON_AFFINITY_} ${daemon_cmd}" fi # final daemon execute command - daemon_execute="${srun_cmd}${daemon_cmd}" + daemon_execute="${srun_daemon_cmd} ${SRUN_DAEMON_ARGS} ${daemon_cmd}" # Setting up base proxy command if [[ ${USE_PROXY} == true ]]; then local proxy_cmd="${PROXY_BIN} -H ${HOSTSFILE} --pid-path ${PROXY_LOCAL_PID_FILE} ${PROXY_ARGS_}" - # Setting up numactl - if [[ ${PROXY_NUMACTL_} == true ]]; then - proxy_cmd="numactl --cpunodebind=${PROXY_CPUNODEBIND} --membind=${PROXY_MEMBIND} ${proxy_cmd}" + # Set cpu affinity for proxy + if [[ -n ${PROXY_AFFINITY_} ]]; then + proxy_cmd="${PROXY_AFFINITY_} ${proxy_cmd}" fi # final proxy execute command - proxy_execute="${srun_cmd}${proxy_cmd}" + proxy_execute="${srun_proxy_cmd} ${SRUN_PROXY_ARGS} ${proxy_cmd}" fi if [[ ${VERBOSE} == true ]]; then @@ -177,7 +201,7 @@ start_daemon() { echo -e "${C_AST_GREEN}Startup time: ${elapsed} seconds" if [[ ${USE_PROXY} == true ]]; then - echo -e "${C_AST_GREEN}Starting GekkoFS proxies (${NODE_NUM} nodes) ..." + echo -e "${C_AST_GREEN}Starting GekkoFS proxies (${NODE_NUM_PROXY} nodes) ..." start_time="$(date -u +%s.%3N)" ${proxy_execute} & local proxy_pid=$! @@ -293,7 +317,7 @@ stop_daemons() { usage_short() { echo " usage: gkfs [-h/--help] [-r/--rootdir ] [-m/--mountdir ] [-a/--args ] [--proxy ] [-f/--foreground ] - [--srun ] [-n/--numnodes ] [--cpuspertask <64>] [--daemon_numactl ] [--proxy_numactl ] [-v/--verbose ] + [--srun ] [-n/--numnodes ] [--cpuspertask <64>] [-v/--verbose ] {start,stop} " } @@ -325,8 +349,6 @@ help_msg() { -n, --numnodes GekkoFS daemons are started on n nodes. Nodelist is extracted from Slurm via the SLURM_JOB_ID env variable. --cpuspertask <#cores> Set the number of cores the daemons can use. Must use '--srun'. - --daemon_numactl Use numactl for the daemon. Modify gkfs.conf for further numactl configurations. - --proxy_numactl Use numactl for the proxy. Modify gkfs.conf for further numactl configurations. -c, --config Path to configuration file. By defaults looks for a 'gkfs.conf' in this directory. -v, --verbose Increase verbosity " @@ -365,9 +387,11 @@ CPUS_PER_TASK=$(grep -c ^processor /proc/cpuinfo) DAEMON_ARGS_=${DAEMON_ARGS} PROXY_ARGS_=${PROXY_ARGS} USE_SRUN=${USE_SRUN} +DAEMON_NODELIST_=${DAEMON_NODELIST} +PROXY_NODELIST_=${PROXY_NODELIST} RUN_FOREGROUND=false -DAEMON_NUMACTL_=${DAEMON_NUMACTL} -PROXY_NUMACTL_=${PROXY_NUMACTL} +DAEMON_AFFINITY_=${DAEMON_AFFINITY} +PROXY_AFFINITY_=${PROXY_AFFINITY} USE_PROXY=${USE_PROXY} # parse input POSITIONAL=() @@ -412,13 +436,15 @@ while [[ $# -gt 0 ]]; do RUN_FOREGROUND=true shift # past argument ;; - --daemon_numactl) - DAEMON_NUMACTL_=true + --daemon_nodelist) + DAEMON_NODELIST_="$2" shift # past argument + shift # past value ;; - --proxy_numactl) - PROXY_NUMACTL_=true + --proxy_nodelist) + PROXY_NODELIST_="$2" shift # past argument + shift # past value ;; --cpuspertask) CPUS_PER_TASK=$2 diff --git a/scripts/run/gkfs.conf b/scripts/run/gkfs.conf index bd74219a9..ba62afddd 100644 --- a/scripts/run/gkfs.conf +++ b/scripts/run/gkfs.conf @@ -16,11 +16,14 @@ DAEMON_PID_FILE=./gkfs_daemon.pid DAEMON_ARGS="" # Use Slurm's srun to start the daemons on multiple nodes and set specific srun args USE_SRUN=false +# path to hostfile for srun for daemon +DAEMON_NODELIST="" +# srun args SRUN_ARGS="--ntasks-per-node=1 --overcommit --contiguous --oversubscribe --mem=0" -# use numactl to pin daemon to socket -DAEMON_NUMACTL=false -DAEMON_CPUNODEBIND="1" -DAEMON_MEMBIND="1" +# Specific srun args for daemon +SRUN_DAEMON_ARGS="" +# use cpu affinity. Set this eg to `taskset -c ...` +DAEMON_AFFINITY="" # logging GKFS_DAEMON_LOG_LEVEL=info diff --git a/scripts/run/gkfs_io500_proxy.conf b/scripts/run/gkfs_io500_proxy.conf index 2b636463b..7855c06a6 100644 --- a/scripts/run/gkfs_io500_proxy.conf +++ b/scripts/run/gkfs_io500_proxy.conf @@ -9,31 +9,37 @@ PROXY_BIN=/lustre/miifs01/project/m2_zdvresearch/vef/io500/bin/gkfs_proxy LIBGKFS_HOSTS_FILE=/lustre/miifs01/project/m2_zdvresearch/vef/io500/run/gkfs_hostfile ## daemon configuration -#DAEMON_ROOTDIR=/dev/shm/vef_gkfs_rootdir -DAEMON_ROOTDIR=/localscratch/${SLURM_JOB_ID}/vef_gkfs_rootdir +DAEMON_ROOTDIR=/dev/shm/vef_gkfs_rootdir +#DAEMON_ROOTDIR=/localscratch/${SLURM_JOB_ID}/vef_gkfs_rootdir DAEMON_MOUNTDIR=/dev/shm/vef_gkfs_mountdir # additional daemon arguments (see `gkfs_daemon -h`) # use numactl to pin daemon to socket DAEMON_ARGS="-l ib0 -c" -DAEMON_NUMACTL=true -DAEMON_CPUNODEBIND="1" -DAEMON_MEMBIND="1" +# use cpu affinity. Set this eg to `taskset -c ...` +DAEMON_AFFINITY="" ## proxy configuration DAEMON_PROXY_ARGS="--proxy-listen ib0 --proxy-protocol ofi+sockets" PROXY_LOCAL_PID_FILE=/dev/shm/vef_gkfs_proxy.pid PROXY_ARGS="" -PROXY_NUMACTL=true -PROXY_CPUNODEBIND="0" -PROXY_MEMBIND="0" +# use cpu affinity. Set this eg to `taskset -c ...` +PROXY_AFFINITY="" ## slurm configuration # Use Slurm's srun to start the daemons on multiple nodes and set specific srun args USE_SRUN=true +# path to hostfile for srun for daemon and proxy +DAEMON_NODELIST="" +PROXY_NODELIST="" +# srun args SRUN_ARGS="--ntasks-per-node=1 --overcommit --contiguous --oversubscribe --mem=0" +# Specific srun args for daemon +SRUN_DAEMON_ARGS="" +# Specific srun args for proxy +SRUN_PROXY_ARGS="" # path to daemon pid file; created where the script is run SRUN_DAEMON_PID_FILE=/lustre/miifs01/project/m2_zdvresearch/vef/io500/run/gkfs_daemon.pid -SRUN_PROXY_PID_FILE=/lustre/miifs01/project/m2_zdvresearch/vef/io500/run/gkfs_proxy.pid # TODO +SRUN_PROXY_PID_FILE=/lustre/miifs01/project/m2_zdvresearch/vef/io500/run/gkfs_proxy.pid # logging configuration GKFS_DAEMON_LOG_LEVEL=info -- GitLab From 7a10c3525c7a603b5b72193be09eb636bd004727 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Tue, 30 Apr 2024 16:56:35 +0200 Subject: [PATCH 16/24] Improve log output for client RPCs --- src/client/rpc/forward_metadata_proxy.cpp | 28 ++++++++++++++--------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/client/rpc/forward_metadata_proxy.cpp b/src/client/rpc/forward_metadata_proxy.cpp index 6198d6579..0324e452d 100644 --- a/src/client/rpc/forward_metadata_proxy.cpp +++ b/src/client/rpc/forward_metadata_proxy.cpp @@ -28,7 +28,7 @@ forward_create_proxy(const std::string& path, const mode_t mode) { auto endp = CTX->proxy_host(); try { - LOG(DEBUG, "Sending RPC ..."); + LOG(DEBUG, "{}() Sending RPC for path '{}'...", __func__, path); // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we // can retry for RPC_TRIES (see old commits with margo) // TODO(amiranda): hermes will eventually provide a post(endpoint) @@ -42,7 +42,8 @@ forward_create_proxy(const std::string& path, const mode_t mode) { return out.err() ? out.err() : 0; } catch(const std::exception& ex) { - LOG(ERROR, "while getting rpc output"); + LOG(ERROR, "{}() getting rpc output for path '{}' failed", __func__, + path); return EBUSY; } } @@ -53,7 +54,7 @@ forward_stat_proxy(const std::string& path, string& attr) { auto endp = CTX->proxy_host(); try { - LOG(DEBUG, "Sending RPC ..."); + LOG(DEBUG, "{}() Sending RPC for path '{}'...", __func__, path); // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we // can retry for RPC_TRIES (see old commits with margo) // TODO(amiranda): hermes will eventually provide a post(endpoint) @@ -70,7 +71,8 @@ forward_stat_proxy(const std::string& path, string& attr) { attr = out.db_val(); return 0; } catch(const std::exception& ex) { - LOG(ERROR, "while getting rpc output"); + LOG(ERROR, "{}() getting rpc output for path '{}' failed", __func__, + path); return EBUSY; } } @@ -80,7 +82,7 @@ forward_remove_proxy(const std::string& path) { auto endp = CTX->proxy_host(); try { - LOG(DEBUG, "Sending RPC ..."); + LOG(DEBUG, "{}() Sending RPC for path '{}'...", __func__, path); // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we // can retry for RPC_TRIES (see old commits with margo) // TODO(amiranda): hermes will eventually provide a post(endpoint) @@ -93,7 +95,8 @@ forward_remove_proxy(const std::string& path) { return out.err() ? out.err() : 0; } catch(const std::exception& ex) { - LOG(ERROR, "while getting rpc output"); + LOG(ERROR, "{}() getting rpc output for path '{}' failed", __func__, + path); return EBUSY; } } @@ -103,7 +106,7 @@ forward_decr_size_proxy(const std::string& path, size_t length) { auto endp = CTX->proxy_host(); try { - LOG(DEBUG, "Sending RPC ..."); + LOG(DEBUG, "{}() Sending RPC for path '{}'...", __func__, path); // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we // can retry for RPC_TRIES (see old commits with margo) // TODO(amiranda): hermes will eventually provide a post(endpoint) @@ -118,7 +121,8 @@ forward_decr_size_proxy(const std::string& path, size_t length) { return out.err() ? out.err() : 0; } catch(const std::exception& ex) { - LOG(ERROR, "while getting rpc output"); + LOG(ERROR, "{}() getting rpc output for path '{}' failed", __func__, + path); return EBUSY; } } @@ -150,7 +154,8 @@ forward_update_metadentry_size_proxy(const string& path, const size_t size, else return make_pair(0, out.ret_size()); } catch(const std::exception& ex) { - LOG(ERROR, "while getting rpc output"); + LOG(ERROR, "{}() getting rpc output for path '{}' failed", __func__, + path); return make_pair(EBUSY, 0); } } @@ -160,7 +165,7 @@ forward_get_metadentry_size_proxy(const std::string& path) { auto endp = CTX->proxy_host(); try { - LOG(DEBUG, "Sending RPC ..."); + LOG(DEBUG, "{}() Sending RPC for path '{}'...", __func__, path); // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we // can retry for RPC_TRIES (see old commits with margo) // TODO(amiranda): hermes will eventually provide a post(endpoint) @@ -179,7 +184,8 @@ forward_get_metadentry_size_proxy(const std::string& path) { else return make_pair(0, out.ret_size()); } catch(const std::exception& ex) { - LOG(ERROR, "while getting rpc output"); + LOG(ERROR, "{}() getting rpc output for path '{}' failed", __func__, + path); return make_pair(EBUSY, 0); } } -- GitLab From 4330fdb5cbf7ab8aa4cdc59ba0e02b85118dfb1e Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Wed, 1 May 2024 12:41:52 +0200 Subject: [PATCH 17/24] Avoid race condition and segfault during read/write --- src/client/gkfs_functions.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 0495801bf..85455f002 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -1044,10 +1044,12 @@ gkfs_pwrite(int fd, const void* buf, size_t count, off64_t offset) { */ ssize_t gkfs_write(int fd, const void* buf, size_t count) { - auto gkfs_file = CTX->file_map()->get(fd); + auto gkfs_fd = CTX->file_map()->get(fd); + if(!gkfs_fd) + return 0; // call pwrite and update pos - auto ret = gkfs_write_ws(*gkfs_file, reinterpret_cast(buf), - count, gkfs_file->pos(), true); + auto ret = gkfs_write_ws(*gkfs_fd, reinterpret_cast(buf), + count, gkfs_fd->pos(), true); return ret; } @@ -1216,6 +1218,8 @@ gkfs_pread(int fd, void* buf, size_t count, off64_t offset) { ssize_t gkfs_read(int fd, void* buf, size_t count) { auto gkfs_fd = CTX->file_map()->get(fd); + if(!gkfs_fd) + return 0; auto pos = gkfs_fd->pos(); // retrieve the current offset auto ret = gkfs_read_ws(*gkfs_fd, reinterpret_cast(buf), count, pos); // Update offset in file descriptor in the file map -- GitLab From c73e68ebc29d5ca4906d70791aa89f7d183f6bc3 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Fri, 3 May 2024 13:15:28 +0200 Subject: [PATCH 18/24] Avoid nullptr in gkfsfunctions --- src/client/gkfs_functions.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index 85455f002..cfa0769ce 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -1030,6 +1030,8 @@ gkfs_write_ws(gkfs::filemap::OpenFile& file, const char* buf, size_t count, ssize_t gkfs_pwrite(int fd, const void* buf, size_t count, off64_t offset) { auto file = CTX->file_map()->get(fd); + if(!file) + return 0; return gkfs_write_ws(*file, reinterpret_cast(buf), count, offset); } @@ -1066,6 +1068,8 @@ ssize_t gkfs_pwritev(int fd, const struct iovec* iov, int iovcnt, off_t offset) { auto file = CTX->file_map()->get(fd); + if(!file) + return 0; auto pos = offset; // keep track of current position ssize_t written = 0; ssize_t ret; @@ -1105,6 +1109,8 @@ ssize_t gkfs_writev(int fd, const struct iovec* iov, int iovcnt) { auto gkfs_fd = CTX->file_map()->get(fd); + if(!gkfs_fd) + return 0; auto pos = gkfs_fd->pos(); // retrieve the current offset auto ret = gkfs_pwritev(fd, iov, iovcnt, pos); assert(ret != 0); @@ -1204,6 +1210,8 @@ gkfs_read_ws(const gkfs::filemap::OpenFile& file, char* buf, size_t count, ssize_t gkfs_pread(int fd, void* buf, size_t count, off64_t offset) { auto gkfs_fd = CTX->file_map()->get(fd); + if(!gkfs_fd) + return 0; return gkfs_read_ws(*gkfs_fd, reinterpret_cast(buf), count, offset); } @@ -1242,6 +1250,8 @@ ssize_t gkfs_preadv(int fd, const struct iovec* iov, int iovcnt, off_t offset) { auto file = CTX->file_map()->get(fd); + if(!file) + return 0; auto pos = offset; // keep track of current position ssize_t read = 0; ssize_t ret; @@ -1281,6 +1291,8 @@ ssize_t gkfs_readv(int fd, const struct iovec* iov, int iovcnt) { auto gkfs_fd = CTX->file_map()->get(fd); + if(!gkfs_fd) + return 0; auto pos = gkfs_fd->pos(); // retrieve the current offset auto ret = gkfs_preadv(fd, iov, iovcnt, pos); assert(ret != 0); -- GitLab From 59065b2e90f5f1ac56c00ec622c78e0385aa2494 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Thu, 27 Jun 2024 16:52:04 +0200 Subject: [PATCH 19/24] Allow to set separate dirbuffsize in proxy --- include/config.hpp | 1 + src/client/rpc/forward_metadata_proxy.cpp | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/config.hpp b/include/config.hpp index 49b71be38..c49ee873f 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -137,6 +137,7 @@ namespace rpc { constexpr auto chunksize = 524288; // in bytes (e.g., 524288 == 512KB) // size of preallocated buffer to hold directory entries in rpc call constexpr auto dirents_buff_size = (8 * 1024 * 1024); // 8 mega +constexpr auto dirents_buff_size_proxy = (128 * 1024 * 1024); // 8 mega /* * Indicates the number of concurrent progress to drive I/O operations of chunk * files to and from local file systems The value is directly mapped to created diff --git a/src/client/rpc/forward_metadata_proxy.cpp b/src/client/rpc/forward_metadata_proxy.cpp index 0324e452d..bbd8b8dfe 100644 --- a/src/client/rpc/forward_metadata_proxy.cpp +++ b/src/client/rpc/forward_metadata_proxy.cpp @@ -203,11 +203,12 @@ forward_get_dirents_single_proxy(const string& path, int server) { * buffer. Moreover we don't need a zeroed buffer here. */ auto large_buffer = std::unique_ptr( - new char[gkfs::config::rpc::dirents_buff_size]); + new char[gkfs::config::rpc::dirents_buff_size_proxy]); // We use the full size per server... - const std::size_t per_host_buff_size = gkfs::config::rpc::dirents_buff_size; - vector> output; + const std::size_t per_host_buff_size = gkfs::config::rpc::dirents_buff_size_proxy; + auto output_ptr = make_unique< + vector>>(); // expose local buffers for RMA from servers std::vector exposed_buffers; -- GitLab From d6724f78bee39c1ee606216f7166db78735beab8 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Fri, 14 Jun 2024 12:47:22 +0200 Subject: [PATCH 20/24] Finalizing gkfs script and configs --- scripts/run/gkfs | 18 +++++++++++ scripts/run/gkfs.conf | 49 +++++++++++++++++++---------- scripts/run/gkfs_io500.conf | 51 +++++++++++++++++++++++++++++++ scripts/run/gkfs_io500_proxy.conf | 50 ------------------------------ 4 files changed, 102 insertions(+), 66 deletions(-) create mode 100644 scripts/run/gkfs_io500.conf delete mode 100644 scripts/run/gkfs_io500_proxy.conf diff --git a/scripts/run/gkfs b/scripts/run/gkfs index d94e1f6c5..502f3bc3b 100755 --- a/scripts/run/gkfs +++ b/scripts/run/gkfs @@ -141,9 +141,18 @@ start_daemon() { fi else NODE_NUM=1 + if [[ ${USE_PROXY} == true ]]; then + NODE_NUM_PROXY=$NODE_NUM + fi fi if [[ ${VERBOSE} == true ]]; then + echo -e "${C_AST_GREEN}-------------------CLIENT SETTINGS------------------------" + echo -e "${C_AST_GREEN}Set env variables for client: " + echo -e "LD_PRELOAD=${PRELOAD_LIB}" + echo -e "LIBGKFS_HOSTS_FILE=${HOSTSFILE}" + echo -e "LIBGKFS_PROXY_PID_FILE=${PROXY_LOCAL_PID_FILE}" + echo -e "${C_AST_GREEN}----------------------------------------------------------" echo -e "${C_AST_GREEN}mountdir: ${MOUNTDIR}" echo -e "${C_AST_GREEN}rootdir: ${ROOTDIR}" echo -e "${C_AST_GREEN}node_num: ${NODE_NUM}" @@ -393,6 +402,15 @@ RUN_FOREGROUND=false DAEMON_AFFINITY_=${DAEMON_AFFINITY} PROXY_AFFINITY_=${PROXY_AFFINITY} USE_PROXY=${USE_PROXY} +# use absolute paths for everything +DAEMON_BIN=$(readlink -f ${DAEMON_BIN}) +PROXY_BIN=$(readlink -f ${PROXY_BIN}) +PRELOAD_LIB=$(readlink -f ${PRELOAD_LIB}) +HOSTSFILE=$(readlink -f ${HOSTSFILE}) +PROXY_LOCAL_PID_FILE=$(readlink -f ${PROXY_LOCAL_PID_FILE}) +SRUN_DAEMON_PID_FILE=$(readlink -f ${SRUN_DAEMON_PID_FILE}) +SRUN_PROXY_PID_FILE=$(readlink -f ${SRUN_PROXY_PID_FILE}) + # parse input POSITIONAL=() while [[ $# -gt 0 ]]; do diff --git a/scripts/run/gkfs.conf b/scripts/run/gkfs.conf index ba62afddd..fc7922ddb 100644 --- a/scripts/run/gkfs.conf +++ b/scripts/run/gkfs.conf @@ -3,30 +3,47 @@ # binaries (default for project_dir/build PRELOAD_LIB=../../build/src/client/libgkfs_intercept.so DAEMON_BIN=../../build/src/daemon/gkfs_daemon +PROXY_BIN=../../build/src/proxy/gkfs_proxy -# client configuration +# client configuration (needs to be set for all clients) LIBGKFS_HOSTS_FILE=./gkfs_hostfile -# daemon configuration -DAEMON_ROOTDIR=/dev/shm/gkfs_rootdir -DAEMON_MOUNTDIR=/dev/shm/gkfs_mountdir -# path to daemon pid file; created where the script is run -DAEMON_PID_FILE=./gkfs_daemon.pid +## daemon configuration +DAEMON_ROOTDIR=/dev/shm/vef_gkfs_rootdir +DAEMON_MOUNTDIR=/dev/shm/vef_gkfs_mountdir # additional daemon arguments (see `gkfs_daemon -h`) -DAEMON_ARGS="" +# use numactl to pin daemon to socket +DAEMON_ARGS="-l lo -c" +# use cpu affinity. Set this eg to `taskset -c ...` +DAEMON_AFFINITY="" + +## proxy configuration +USE_PROXY=false +DAEMON_PROXY_ARGS="--proxy-listen lo --proxy-protocol ofi+sockets" +PROXY_LOCAL_PID_FILE=/dev/shm/vef_gkfs_proxy.pid +PROXY_ARGS="-p ofi+sockets" +# use cpu affinity. Set this eg to `taskset -c ...` +PROXY_AFFINITY="" + +## slurm configuration # Use Slurm's srun to start the daemons on multiple nodes and set specific srun args USE_SRUN=false -# path to hostfile for srun for daemon -DAEMON_NODELIST="" -# srun args -SRUN_ARGS="--ntasks-per-node=1 --overcommit --contiguous --oversubscribe --mem=0" +DAEMON_NODELIST=./hostfile +PROXY_NODELIST=./hostfile +SRUN_ARGS="--overlap --ntasks-per-node=1 --overcommit --overlap --oversubscribe --mem=0" # Specific srun args for daemon SRUN_DAEMON_ARGS="" -# use cpu affinity. Set this eg to `taskset -c ...` -DAEMON_AFFINITY="" +# Specific srun args for proxy +SRUN_PROXY_ARGS="" +# path to daemon pid file; created where the script is run +SRUN_DAEMON_PID_FILE=./gkfs_daemon.pid +SRUN_PROXY_PID_FILE=./gkfs_proxy.pid # logging GKFS_DAEMON_LOG_LEVEL=info -GKFS_DAEMON_LOG_PATH=/dev/shm/vef_gkfs_daemon.log -LIBGKFS_LOG=errors,warnings -LIBGKFS_LOG_OUTPUT=/dev/shm/vef_gkfs_client.log +GKFS_DAEMON_LOG_PATH=/dev/shm/gkfs_daemon.log +GKFS_PROXY_LOG_LEVEL=info +GKFS_PROXY_LOG_PATH=/dev/shm/gkfs_proxy.log +# Modify the following for the client +# LIBGKFS_LOG=errors,warnings +# LIBGKFS_LOG_OUTPUT=/tmp/gkfs_client.log diff --git a/scripts/run/gkfs_io500.conf b/scripts/run/gkfs_io500.conf new file mode 100644 index 000000000..eaca98fde --- /dev/null +++ b/scripts/run/gkfs_io500.conf @@ -0,0 +1,51 @@ +#!/bin/bash + +# binaries (default for project_dir/build +PRELOAD_LIB=/lustre/project/nhr-admire/vef/gekkofs/build/src/client/libgkfs_intercept.so +DAEMON_BIN=/lustre/project/nhr-admire/vef/gekkofs/build/src/daemon/gkfs_daemon +PROXY_BIN=/lustre/project/nhr-admire/vef/gekkofs/build/src/proxy/gkfs_proxy + +## client configuration +LIBGKFS_HOSTS_FILE=/lustre/project/nhr-admire/vef/run/io500/gkfs_hostfile + +## daemon configuration +#DAEMON_ROOTDIR=/dev/shm/vef_gkfs_rootdir +DAEMON_ROOTDIR=/localscratch/${SLURM_JOB_ID}/vef_gkfs_rootdir +#DAEMON_ROOTDIR=/localscratch/${SLURM_JOB_ID}/vef_gkfs_rootdir +DAEMON_MOUNTDIR=/dev/shm/vef_gkfs_mountdir +# additional daemon arguments (see `gkfs_daemon -h`) +# use numactl to pin daemon to socket +DAEMON_ARGS="-P ofi+verbs -l ib0 -c" +# use cpu affinity. Set this eg to `taskset -c ...` +DAEMON_AFFINITY="taskset -c 0-63" + +## proxy configuration +USE_PROXY=false +DAEMON_PROXY_ARGS="--proxy-listen ib0 --proxy-protocol ofi+verbs" +PROXY_LOCAL_PID_FILE=/dev/shm/vef_gkfs_proxy.pid +PROXY_ARGS="-p ofi+verbs" +# use cpu affinity. Set this eg to `taskset -c ...` +PROXY_AFFINITY="taskset -c 0-63" + +## slurm configuration +# Use Slurm's srun to start the daemons on multiple nodes and set specific srun args +USE_SRUN=true +DAEMON_NODELIST=/lustre/project/nhr-admire/vef/hostfile +PROXY_NODELIST=/lustre/project/nhr-admire/vef/hostfile +SRUN_ARGS="--overlap --ntasks-per-node=1 --overcommit --overlap --oversubscribe --mem=0" +# Specific srun args for daemon +SRUN_DAEMON_ARGS="" +# Specific srun args for proxy +SRUN_PROXY_ARGS="" +# path to daemon pid file; created where the script is run +SRUN_DAEMON_PID_FILE=/lustre/project/nhr-admire/vef/run/io500/gkfs_daemon.pid +SRUN_PROXY_PID_FILE=/lustre/project/nhr-admire/vef/run/io500/gkfs_proxy.pid + +# logging configuration +GKFS_DAEMON_LOG_LEVEL=info +GKFS_DAEMON_LOG_PATH=/dev/shm/vef_gkfs_daemon.log +GKFS_PROXY_LOG_LEVEL=info +GKFS_PROXY_LOG_PATH=/dev/shm/vef_gkfs_proxy.log +# Modify the following for the client +# LIBGKFS_LOG=errors,warnings +# LIBGKFS_LOG_OUTPUT=/dev/shm/vef_gkfs_client.log \ No newline at end of file diff --git a/scripts/run/gkfs_io500_proxy.conf b/scripts/run/gkfs_io500_proxy.conf deleted file mode 100644 index 7855c06a6..000000000 --- a/scripts/run/gkfs_io500_proxy.conf +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash - -# binaries (default for project_dir/build -PRELOAD_LIB=/lustre/miifs01/project/m2_zdvresearch/vef/io500/lib/libgkfs_intercept.so -DAEMON_BIN=/lustre/miifs01/project/m2_zdvresearch/vef/io500/bin/gkfs_daemon -PROXY_BIN=/lustre/miifs01/project/m2_zdvresearch/vef/io500/bin/gkfs_proxy - -## client configuration -LIBGKFS_HOSTS_FILE=/lustre/miifs01/project/m2_zdvresearch/vef/io500/run/gkfs_hostfile - -## daemon configuration -DAEMON_ROOTDIR=/dev/shm/vef_gkfs_rootdir -#DAEMON_ROOTDIR=/localscratch/${SLURM_JOB_ID}/vef_gkfs_rootdir -DAEMON_MOUNTDIR=/dev/shm/vef_gkfs_mountdir -# additional daemon arguments (see `gkfs_daemon -h`) -# use numactl to pin daemon to socket -DAEMON_ARGS="-l ib0 -c" -# use cpu affinity. Set this eg to `taskset -c ...` -DAEMON_AFFINITY="" - -## proxy configuration -DAEMON_PROXY_ARGS="--proxy-listen ib0 --proxy-protocol ofi+sockets" -PROXY_LOCAL_PID_FILE=/dev/shm/vef_gkfs_proxy.pid -PROXY_ARGS="" -# use cpu affinity. Set this eg to `taskset -c ...` -PROXY_AFFINITY="" - -## slurm configuration -# Use Slurm's srun to start the daemons on multiple nodes and set specific srun args -USE_SRUN=true -# path to hostfile for srun for daemon and proxy -DAEMON_NODELIST="" -PROXY_NODELIST="" -# srun args -SRUN_ARGS="--ntasks-per-node=1 --overcommit --contiguous --oversubscribe --mem=0" -# Specific srun args for daemon -SRUN_DAEMON_ARGS="" -# Specific srun args for proxy -SRUN_PROXY_ARGS="" -# path to daemon pid file; created where the script is run -SRUN_DAEMON_PID_FILE=/lustre/miifs01/project/m2_zdvresearch/vef/io500/run/gkfs_daemon.pid -SRUN_PROXY_PID_FILE=/lustre/miifs01/project/m2_zdvresearch/vef/io500/run/gkfs_proxy.pid - -# logging configuration -GKFS_DAEMON_LOG_LEVEL=info -GKFS_DAEMON_LOG_PATH=/dev/shm/vef_gkfs_daemon.log -GKFS_PROXY_LOG_LEVEL=info -GKFS_PROXY_LOG_PATH=/dev/shm/vef_gkfs_proxy.log -LIBGKFS_LOG=errors,warnings -LIBGKFS_LOG_OUTPUT=/dev/shm/vef_gkfs_client.log \ No newline at end of file -- GitLab From f26b11b584899d97a2c95a72a569bb2cd2a3ea3b Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Thu, 27 Jun 2024 17:37:20 +0200 Subject: [PATCH 21/24] Finalizing proxy --- include/client/rpc/forward_metadata.hpp | 3 ++- include/client/rpc/forward_metadata_proxy.hpp | 3 ++- src/client/gkfs_functions.cpp | 11 ++++++----- src/client/rpc/forward_metadata.cpp | 12 +++++++----- src/client/rpc/forward_metadata_proxy.cpp | 9 +++++---- 5 files changed, 22 insertions(+), 16 deletions(-) diff --git a/include/client/rpc/forward_metadata.hpp b/include/client/rpc/forward_metadata.hpp index 36797f875..ac3cd4512 100644 --- a/include/client/rpc/forward_metadata.hpp +++ b/include/client/rpc/forward_metadata.hpp @@ -84,7 +84,8 @@ forward_get_metadentry_size(const std::string& path, const int copy); std::pair> forward_get_dirents(const std::string& path); -std::pair>> +std::pair>>> forward_get_dirents_single(const std::string& path, int server); #ifdef HAS_SYMLINKS diff --git a/include/client/rpc/forward_metadata_proxy.hpp b/include/client/rpc/forward_metadata_proxy.hpp index 9d5e37ef0..6f44d10ea 100644 --- a/include/client/rpc/forward_metadata_proxy.hpp +++ b/include/client/rpc/forward_metadata_proxy.hpp @@ -36,7 +36,8 @@ forward_update_metadentry_size_proxy(const std::string& path, const size_t size, std::pair forward_get_metadentry_size_proxy(const std::string& path); -std::pair>> +std::pair>>> forward_get_dirents_single_proxy(const std::string& path, int server); } // namespace gkfs::rpc diff --git a/src/client/gkfs_functions.cpp b/src/client/gkfs_functions.cpp index cfa0769ce..90a1ca0ae 100644 --- a/src/client/gkfs_functions.cpp +++ b/src/client/gkfs_functions.cpp @@ -301,9 +301,9 @@ gkfs_create(const std::string& path, mode_t mode) { return -1; } - // if(check_parent_dir(path)) { - // return -1; - // } + if(check_parent_dir(path)) { + return -1; + } int err = 0; if(gkfs::config::proxy::fwd_create && CTX->use_proxy()) { // no replication support for proxy @@ -1662,7 +1662,8 @@ extern "C" int gkfs_getsingleserverdir(const char* path, struct dirent_extended* dirp, unsigned int count, int server) { - pair, bool, size_t, time_t>>> + pair, bool, size_t, time_t>>>> ret{}; if(gkfs::config::proxy::fwd_get_dirents_single && CTX->use_proxy()) { ret = gkfs::rpc::forward_get_dirents_single_proxy(path, server); @@ -1676,7 +1677,7 @@ gkfs_getsingleserverdir(const char* path, struct dirent_extended* dirp, return -1; } - auto open_dir = ret.second; + auto& open_dir = *ret.second; unsigned int pos = 0; unsigned int written = 0; struct dirent_extended* current_dirp = nullptr; diff --git a/src/client/rpc/forward_metadata.cpp b/src/client/rpc/forward_metadata.cpp index 424f91d52..e569d873f 100644 --- a/src/client/rpc/forward_metadata.cpp +++ b/src/client/rpc/forward_metadata.cpp @@ -782,7 +782,7 @@ forward_get_dirents(const string& path) { * reusing the forward_get_dirents code. As we only need a server, we could * simplify the code removing the asynchronous part. */ -pair>> +pair>>> forward_get_dirents_single(const string& path, int server) { if(gkfs::config::proxy::fwd_get_dirents_single && CTX->use_proxy()) { @@ -805,7 +805,8 @@ forward_get_dirents_single(const string& path, int server) { // We use the full size per server... const std::size_t per_host_buff_size = gkfs::config::rpc::dirents_buff_size; - vector> output; + auto output_ptr = make_unique< + vector>>(); // expose local buffers for RMA from servers std::vector exposed_buffers; @@ -819,7 +820,7 @@ forward_get_dirents_single(const string& path, int server) { } catch(const std::exception& ex) { LOG(ERROR, "{}() Failed to expose buffers for RMA. err '{}'", __func__, ex.what()); - return make_pair(EBUSY, output); + return make_pair(EBUSY, std::move(output_ptr)); } auto err = 0; @@ -899,9 +900,10 @@ forward_get_dirents_single(const string& path, int server) { auto name = std::string(names_ptr); // number of characters in entry + \0 terminator names_ptr += name.size() + 1; - output.emplace_back(std::forward_as_tuple(name, ftype, size, ctime)); + output_ptr->emplace_back( + std::forward_as_tuple(name, ftype, size, ctime)); } - return make_pair(err, output); + return make_pair(err, std::move(output_ptr)); } diff --git a/src/client/rpc/forward_metadata_proxy.cpp b/src/client/rpc/forward_metadata_proxy.cpp index bbd8b8dfe..b6a39be34 100644 --- a/src/client/rpc/forward_metadata_proxy.cpp +++ b/src/client/rpc/forward_metadata_proxy.cpp @@ -190,7 +190,7 @@ forward_get_metadentry_size_proxy(const std::string& path) { } } -pair>> +pair>>> forward_get_dirents_single_proxy(const string& path, int server) { LOG(DEBUG, "{}() enter for path '{}'", __func__, path) @@ -221,7 +221,7 @@ forward_get_dirents_single_proxy(const string& path, int server) { } catch(const std::exception& ex) { LOG(ERROR, "{}() Failed to expose buffers for RMA. err '{}'", __func__, ex.what()); - return make_pair(EBUSY, output); + return make_pair(EBUSY, std::move(output_ptr)); } auto err = 0; @@ -301,9 +301,10 @@ forward_get_dirents_single_proxy(const string& path, int server) { auto name = std::string(names_ptr); // number of characters in entry + \0 terminator names_ptr += name.size() + 1; - output.emplace_back(std::forward_as_tuple(name, ftype, size, ctime)); + output_ptr->emplace_back( + std::forward_as_tuple(name, ftype, size, ctime)); } - return make_pair(err, output); + return make_pair(err, std::move(output_ptr)); } } // namespace rpc -- GitLab From 54ada931fda06cdf9a5629660ff3fa723c6e20de Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Thu, 27 Jun 2024 14:46:59 +0200 Subject: [PATCH 22/24] Update Hermes submodule to support multiple clients --- external/hermes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/hermes b/external/hermes index 402a7e5b3..a50dd9443 160000 --- a/external/hermes +++ b/external/hermes @@ -1 +1 @@ -Subproject commit 402a7e5b3d408a1e2cca0b9a6f3e4041513db861 +Subproject commit a50dd9443387dc287a5e588f209d6b445ea3d3af -- GitLab From 6cbc499928701f2bb5101530fe0d788659dcf78a Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Thu, 27 Jun 2024 18:43:17 +0200 Subject: [PATCH 23/24] Code format --- include/client/rpc/rpc_types.hpp | 33 ++++++++--------------- include/config.hpp | 2 +- src/client/hooks.cpp | 3 +-- src/client/rpc/forward_metadata_proxy.cpp | 3 ++- src/daemon/daemon.cpp | 9 ++++--- 5 files changed, 21 insertions(+), 29 deletions(-) diff --git a/include/client/rpc/rpc_types.hpp b/include/client/rpc/rpc_types.hpp index 738b99483..a259e46d1 100644 --- a/include/client/rpc/rpc_types.hpp +++ b/include/client/rpc/rpc_types.hpp @@ -2379,8 +2379,7 @@ struct write_data_proxy { : m_path(other.path), m_offset(other.offset), m_write_size(other.write_size), m_buffers(other.bulk_handle) {} - explicit - operator rpc_client_proxy_write_in_t() { + explicit operator rpc_client_proxy_write_in_t() { return {m_path.c_str(), m_offset, m_write_size, hg_bulk_t(m_buffers)}; } @@ -2519,8 +2518,7 @@ struct read_data_proxy { : m_path(other.path), m_offset(other.offset), m_read_size(other.read_size), m_buffers(other.bulk_handle) {} - explicit - operator rpc_client_proxy_read_in_t() { + explicit operator rpc_client_proxy_read_in_t() { return {m_path.c_str(), m_offset, m_read_size, hg_bulk_t(m_buffers)}; } @@ -2652,8 +2650,7 @@ struct trunc_data_proxy { : m_path(other.path), m_current_size(other.current_size), m_length(other.length) {} - explicit - operator rpc_client_proxy_trunc_in_t() { + explicit operator rpc_client_proxy_trunc_in_t() { return { m_path.c_str(), m_current_size, @@ -2768,8 +2765,7 @@ struct chunk_stat_proxy { explicit input(const rpc_chunk_stat_in_t& other) : m_dummy(other.dummy) {} - explicit - operator rpc_chunk_stat_in_t() { + explicit operator rpc_chunk_stat_in_t() { return {m_dummy}; } @@ -2908,8 +2904,7 @@ struct create_proxy { explicit input(const rpc_mk_node_in_t& other) : m_path(other.path), m_mode(other.mode) {} - explicit - operator rpc_mk_node_in_t() { + explicit operator rpc_mk_node_in_t() { return {m_path.c_str(), m_mode}; } @@ -3018,8 +3013,7 @@ struct stat_proxy { explicit input(const rpc_path_only_in_t& other) : m_path(other.path) {} - explicit - operator rpc_path_only_in_t() { + explicit operator rpc_path_only_in_t() { return {m_path.c_str()}; } @@ -3138,8 +3132,7 @@ struct remove_proxy { explicit input(const rpc_rm_node_in_t& other) : m_path(other.path) {} - explicit - operator rpc_rm_node_in_t() { + explicit operator rpc_rm_node_in_t() { return {m_path.c_str()}; } @@ -3254,8 +3247,7 @@ struct decr_size_proxy { explicit input(const rpc_trunc_in_t& other) : m_path(other.path), m_length(other.length) {} - explicit - operator rpc_trunc_in_t() { + explicit operator rpc_trunc_in_t() { return {m_path.c_str(), m_length}; } @@ -3364,8 +3356,7 @@ struct get_metadentry_size_proxy { explicit input(const rpc_path_only_in_t& other) : m_path(other.path) {} - explicit - operator rpc_path_only_in_t() { + explicit operator rpc_path_only_in_t() { return {m_path.c_str()}; } @@ -3500,8 +3491,7 @@ struct update_metadentry_size_proxy { : m_path(other.path), m_size(other.size), m_offset(other.offset), m_append(other.append) {} - explicit - operator rpc_update_metadentry_size_in_t() { + explicit operator rpc_update_metadentry_size_in_t() { return {m_path.c_str(), m_size, m_offset, m_append}; } @@ -3635,8 +3625,7 @@ struct get_dirents_extended_proxy { : m_path(other.path), m_server(other.server), m_buffers(other.bulk_handle) {} - explicit - operator rpc_proxy_get_dirents_in_t() { + explicit operator rpc_proxy_get_dirents_in_t() { return {m_path.c_str(), m_server, hg_bulk_t(m_buffers)}; } diff --git a/include/config.hpp b/include/config.hpp index c49ee873f..a1de84c39 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -136,7 +136,7 @@ constexpr auto fwd_io_count_threshold = 0; namespace rpc { constexpr auto chunksize = 524288; // in bytes (e.g., 524288 == 512KB) // size of preallocated buffer to hold directory entries in rpc call -constexpr auto dirents_buff_size = (8 * 1024 * 1024); // 8 mega +constexpr auto dirents_buff_size = (8 * 1024 * 1024); // 8 mega constexpr auto dirents_buff_size_proxy = (128 * 1024 * 1024); // 8 mega /* * Indicates the number of concurrent progress to drive I/O operations of chunk diff --git a/src/client/hooks.cpp b/src/client/hooks.cpp index 1c3c3390b..5ae152c88 100644 --- a/src/client/hooks.cpp +++ b/src/client/hooks.cpp @@ -993,8 +993,7 @@ hook_lgetxattr(const char* path, const char* name, void* value, size_t size) { if(CTX->relativize_path(path, rel_path)) { return -ENOTSUP; } - return syscall_no_intercept_wrapper(SYS_lgetxattr, path, name, value, - size); + return syscall_no_intercept_wrapper(SYS_lgetxattr, path, name, value, size); } diff --git a/src/client/rpc/forward_metadata_proxy.cpp b/src/client/rpc/forward_metadata_proxy.cpp index b6a39be34..f2c41d4ce 100644 --- a/src/client/rpc/forward_metadata_proxy.cpp +++ b/src/client/rpc/forward_metadata_proxy.cpp @@ -206,7 +206,8 @@ forward_get_dirents_single_proxy(const string& path, int server) { new char[gkfs::config::rpc::dirents_buff_size_proxy]); // We use the full size per server... - const std::size_t per_host_buff_size = gkfs::config::rpc::dirents_buff_size_proxy; + const std::size_t per_host_buff_size = + gkfs::config::rpc::dirents_buff_size_proxy; auto output_ptr = make_unique< vector>>(); diff --git a/src/daemon/daemon.cpp b/src/daemon/daemon.cpp index 6cc36ab88..8691bbab2 100644 --- a/src/daemon/daemon.cpp +++ b/src/daemon/daemon.cpp @@ -415,9 +415,11 @@ init_environment() { // init margo for proxy RPC if(!GKFS_DATA->bind_proxy_addr().empty()) { - GKFS_DATA->spdlogger()->debug("{}() Initializing Distributor ... ", __func__); + GKFS_DATA->spdlogger()->debug("{}() Initializing Distributor ... ", + __func__); try { - auto distributor = std::make_shared(); + auto distributor = + std::make_shared(); RPC_DATA->distributor(distributor); } catch(const std::exception& e) { GKFS_DATA->spdlogger()->error( @@ -438,7 +440,8 @@ init_environment() { e.what()); throw; } - GKFS_DATA->spdlogger()->debug("{}() Proxy RPC server running.", __func__); + GKFS_DATA->spdlogger()->debug("{}() Proxy RPC server running.", + __func__); } // Init Argobots ESs to drive IO -- GitLab From 64915a03f498791bd84010e7898a11c36d360737 Mon Sep 17 00:00:00 2001 From: Marc Vef Date: Thu, 27 Jun 2024 19:03:00 +0200 Subject: [PATCH 24/24] Added Changelog, Readme, Cleanup --- .gitmodules | 2 +- CHANGELOG.md | 7 +++ README.md | 45 ++++++++++++++++++- examples/distributors/guided/generate.py | 4 +- include/client/rpc/forward_data_proxy.hpp | 4 +- include/client/rpc/forward_metadata_proxy.hpp | 4 +- include/common/cmake_configure.hpp.in | 4 +- include/config.hpp | 2 +- include/proxy/env.hpp | 4 +- include/proxy/proxy.hpp | 4 +- include/proxy/proxy_data.hpp | 4 +- include/proxy/rpc/forward_data.hpp | 4 +- include/proxy/rpc/forward_metadata.hpp | 4 +- include/proxy/rpc/rpc_defs.hpp | 4 +- include/proxy/rpc/rpc_util.hpp | 4 +- include/proxy/util.hpp | 4 +- include/version.hpp.in | 4 +- marc/CMakeLists.txt | 24 ---------- marc/main.cpp | 45 ------------------- .../copyright-headers/README.md | 4 +- src/client/rpc/forward_data_proxy.cpp | 5 +-- src/client/rpc/forward_metadata_proxy.cpp | 4 +- src/daemon/daemon.cpp | 12 ----- src/daemon/handler/srv_data.cpp | 3 +- src/proxy/CMakeLists.txt | 15 ------- src/proxy/env.cpp | 4 +- src/proxy/proxy.cpp | 19 +------- src/proxy/proxy_data.cpp | 4 +- src/proxy/rpc/forward_data.cpp | 4 +- src/proxy/rpc/forward_metadata.cpp | 4 +- src/proxy/rpc/srv_data.cpp | 4 +- src/proxy/rpc/srv_metadata.cpp | 4 +- src/proxy/util.cpp | 4 +- tests/integration/conftest.py.in | 4 +- tests/integration/pytest.ini.in | 4 +- tests/integration/pytest.install.ini.in | 4 +- tests/unit/test_guided_distributor.cpp | 4 +- 37 files changed, 111 insertions(+), 172 deletions(-) delete mode 100644 marc/CMakeLists.txt delete mode 100644 marc/main.cpp diff --git a/.gitmodules b/.gitmodules index 941213b3d..276395241 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "external/hermes"] path = external/hermes - url = https://github.com/marcvef/hermes.git + url = https://github.com/gekkofs/hermes.git [submodule "tests/scripts/bats"] path = tests/scripts/bats url = https://github.com/bats-core/bats-core.git diff --git a/CHANGELOG.md b/CHANGELOG.md index d8d8198ac..6fb21bebc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,13 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] ### New +- Added the GekkoFS proxy as an optional gateway between client and daemon. The proxy is started on each compute node + that houses clients ([!191](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_request/191)). + - Additional options for the GekkoFS daemon were added to integrate the GekkoFS proxy. + - The proxy introduced a new executable: `gkfs_proxy`. + - The `gkfs` run script has been significantly reworked to accommodate the proxy and a number of additional features, + e.g., CPU socket pinning. + - The environment variable `LIBGKFS_PROXY_PID_FILE` was added for clients when a non-default pid file path is in use. - Added client-side metrics including the periodic export to a file or ZeroMQ sink via the TCP protocol ([!176](https://storage.bsc.es/gitlab/hpc/gekkofs/-/merge_request/176)). - CMake option added to enable this optional feature `-DGKFS_ENABLE_CLIENT_METRICS=ON` diff --git a/README.md b/README.md index fd35d3bfe..e57f23483 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,8 @@ GekkoFS testing support: `python38-devel` (**>Python-3.6 required**) execute the following command from the root of the source directory: `git submodule update --init` 3. Set up the necessary environment variables where the compiled direct GekkoFS dependencies will be installed at (we assume the path `/home/foo/gekkofs_deps/install` in the following) - - `export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/home/foo/gekkofs_deps/install/lib:/home/foo/gekkofs_deps/install/lib64` + - + `export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/home/foo/gekkofs_deps/install/lib:/home/foo/gekkofs_deps/install/lib64` 4. Download and compile the direct dependencies, e.g., - Download example: `gekkofs/scripts/dl_dep.sh /home/foo/gekkofs_deps/git` - Compilation example: `gekkofs/scripts/compile_dep.sh /home/foo/gekkofs_deps/git /home/foo/gekkofs_deps/install` @@ -372,6 +373,48 @@ total_bytes: 1802366 total_iops: 4 ``` +### GekkoFS proxy + +The GekkoFS proxy is an additional (alternative) component that runs on each client and acts as gateway between the +client and daemons. It can improve network stability, e.g., for opa-psm2, and provides a basis for future asynchronous +I/O as well as client caching techniques to control file system semantics. + +The `gkfs` script fully supports the GekkoFS proxy and an example can be found in `scripts/run`. When using the proxy +manually additional arguments are required on the daemon side, i.e., which network interface and protocol should be +used: + +```bash + --proxy-listen eno1 --proxy-protocol ofi+sockets +``` + +The proxy is started thereafter: + +```bash +./gkfs_proxy -H ./gkfs_hostfile --pid-path ./vef_gkfs_proxy.pid -p ofi+sockets +``` + +The shared hostfile was generated by the daemons whereas the pid_path is local to the machine and is +detected by clients. The pid-path defaults to `/tmp/gkfs_proxy.pid`. + +Under default operation, clients detect automatically whether to use the proxy. If another proxy path is used, the +environment variable `LIBGKFS_PROXY_PID_FILE` can be set for the clients. + +Alternatively, the `gkfs` automatically sets all required arguments: + +```bash +scripts/run/gkfs -c scripts/run/gkfs.conf -f start --proxy +* [gkfs] Starting GekkoFS daemons (1 nodes) ... +* [gkfs] GekkoFS daemons running +* [gkfs] Startup time: 2.013 seconds +* [gkfs] Starting GekkoFS proxies (1 nodes) ... +* [gkfs] GekkoFS proxies running +* [gkfs] Startup time: 5.002 seconds +Press 'q' to exit +``` + +Please consult `include/config.hpp` for additional configuration options. Note, GekkoFS proxy does not support +replication. + ## Acknowledgment This software was partially supported by the EC H2020 funded NEXTGenIO project (Project ID: 671951, www.nextgenio.eu). diff --git a/examples/distributors/guided/generate.py b/examples/distributors/guided/generate.py index e4e21d963..35a48411e 100644 --- a/examples/distributors/guided/generate.py +++ b/examples/distributors/guided/generate.py @@ -1,6 +1,6 @@ ### -# Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain -# Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany +# Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain +# Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany # This software was partially supported by the # EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/include/client/rpc/forward_data_proxy.hpp b/include/client/rpc/forward_data_proxy.hpp index 6f8c3f8f0..5bb791273 100644 --- a/include/client/rpc/forward_data_proxy.hpp +++ b/include/client/rpc/forward_data_proxy.hpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/include/client/rpc/forward_metadata_proxy.hpp b/include/client/rpc/forward_metadata_proxy.hpp index 6f44d10ea..2e87106fa 100644 --- a/include/client/rpc/forward_metadata_proxy.hpp +++ b/include/client/rpc/forward_metadata_proxy.hpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/include/common/cmake_configure.hpp.in b/include/common/cmake_configure.hpp.in index 8d2405805..f8fc88dc8 100644 --- a/include/common/cmake_configure.hpp.in +++ b/include/common/cmake_configure.hpp.in @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/include/config.hpp b/include/config.hpp index a1de84c39..e859b20f3 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -71,7 +71,7 @@ constexpr auto zero_buffer_before_read = false; * regardless of their order minimizing the gap between bulk transfers. * Due to spinning in a loop this increases CPU utilization */ -constexpr auto spin_lock_read = true; +constexpr auto spin_lock_read = false; } // namespace io namespace log { diff --git a/include/proxy/env.hpp b/include/proxy/env.hpp index 4e6db83a2..bd17fe5a7 100644 --- a/include/proxy/env.hpp +++ b/include/proxy/env.hpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/include/proxy/proxy.hpp b/include/proxy/proxy.hpp index 34bea585b..cd1ec99b0 100644 --- a/include/proxy/proxy.hpp +++ b/include/proxy/proxy.hpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/include/proxy/proxy_data.hpp b/include/proxy/proxy_data.hpp index 42d829f08..97a54b7e3 100644 --- a/include/proxy/proxy_data.hpp +++ b/include/proxy/proxy_data.hpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/include/proxy/rpc/forward_data.hpp b/include/proxy/rpc/forward_data.hpp index 68efc0762..4dd329506 100644 --- a/include/proxy/rpc/forward_data.hpp +++ b/include/proxy/rpc/forward_data.hpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/include/proxy/rpc/forward_metadata.hpp b/include/proxy/rpc/forward_metadata.hpp index 651f847ec..c5ff43098 100644 --- a/include/proxy/rpc/forward_metadata.hpp +++ b/include/proxy/rpc/forward_metadata.hpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/include/proxy/rpc/rpc_defs.hpp b/include/proxy/rpc/rpc_defs.hpp index da3d652ce..306d15b87 100644 --- a/include/proxy/rpc/rpc_defs.hpp +++ b/include/proxy/rpc/rpc_defs.hpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/include/proxy/rpc/rpc_util.hpp b/include/proxy/rpc/rpc_util.hpp index 23cd37dfa..8dc626e18 100644 --- a/include/proxy/rpc/rpc_util.hpp +++ b/include/proxy/rpc/rpc_util.hpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/include/proxy/util.hpp b/include/proxy/util.hpp index 02d1e8a39..96141adef 100644 --- a/include/proxy/util.hpp +++ b/include/proxy/util.hpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/include/version.hpp.in b/include/version.hpp.in index 6d5172540..975310730 100644 --- a/include/version.hpp.in +++ b/include/version.hpp.in @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/marc/CMakeLists.txt b/marc/CMakeLists.txt deleted file mode 100644 index 1674ac495..000000000 --- a/marc/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -set(MARC_SRC - main.cpp -) -set(MARC_HEADERS -) - -add_executable(marc_test ${MARC_SRC} ${MARC_HEADERS}) - -target_link_libraries(marc_test - PUBLIC - # gkfs_intercept - # internal libs - # spdlog - # fmt::fmt - # others - # Threads::Threads - PRIVATE - # open issue for std::filesystem https://gitlab.kitware.com/cmake/cmake/-/issues/17834 - stdc++fs -) - -target_include_directories(marc_test - PRIVATE -) diff --git a/marc/main.cpp b/marc/main.cpp deleted file mode 100644 index 2da8f0c29..000000000 --- a/marc/main.cpp +++ /dev/null @@ -1,45 +0,0 @@ -#include -#include -#include -#include - -using namespace std; - -using ns = chrono::nanoseconds; -using get_time = chrono::steady_clock; - -int -main(int argc, char* argv[]) { - - auto filen = atoi(argv[1]); - - // cout << mkdir("/tmp/mountdir/bla", 0775) << endl; - // auto buf = "BUFFERINO2"; - // struct stat attr; - // cout << creat("/tmp/mountdir/creat.txt", 0666) << endl; - // cout << - // creat("/tmp/mountdir/#test-dir.0/mdtest_tree.0/file.mdtest.0000000.0000000005", - // 0666) << endl; cout << stat("/tmp/mountdir/creat.txt", &attr) << endl; - // cout << unlink("/tmp/mountdir/creat.txt") << endl; - - - auto start_t = get_time::now(); - int fd; - for(int i = 0; i < filen; ++i) { - string p = "/dev/shm/vef_gkfs_mountdir/file" + to_string(i); - fd = creat(p.c_str(), 0666); - if(i % 25000 == 0) - cout << i << " files processed." << endl; - close(fd); - } - - auto end_t = get_time::now(); - auto diff = end_t - start_t; - - auto diff_count = chrono::duration_cast(diff).count(); - - cout << diff_count << "ns\t" << (diff_count) / 1000000. << "ms" << endl; - cout << filen / ((diff_count) / 1000000000.) << " files per second" << endl; - - return 0; -} \ No newline at end of file diff --git a/scripts/maintainer-tools/copyright-headers/README.md b/scripts/maintainer-tools/copyright-headers/README.md index b4f190490..9abbfa479 100644 --- a/scripts/maintainer-tools/copyright-headers/README.md +++ b/scripts/maintainer-tools/copyright-headers/README.md @@ -41,8 +41,8 @@ updated fields should show in a diff view: /* - Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany - + Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - + Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/src/client/rpc/forward_data_proxy.cpp b/src/client/rpc/forward_data_proxy.cpp index 00d1251b5..a25406297 100644 --- a/src/client/rpc/forward_data_proxy.cpp +++ b/src/client/rpc/forward_data_proxy.cpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). @@ -40,7 +40,6 @@ forward_write_proxy(const string& path, const void* buf, off64_t offset, size_t write_size) { LOG(DEBUG, "Using write proxy for path '{}' offset '{}' size '{}' ...", path, offset, write_size); - // TODO mostly copy pasta from forward_data. assert(write_size > 0); // Calculate chunkid boundaries and numbers so that daemons know in diff --git a/src/client/rpc/forward_metadata_proxy.cpp b/src/client/rpc/forward_metadata_proxy.cpp index f2c41d4ce..0e43b352d 100644 --- a/src/client/rpc/forward_metadata_proxy.cpp +++ b/src/client/rpc/forward_metadata_proxy.cpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/src/daemon/daemon.cpp b/src/daemon/daemon.cpp index 8691bbab2..073fb60a1 100644 --- a/src/daemon/daemon.cpp +++ b/src/daemon/daemon.cpp @@ -237,10 +237,6 @@ init_rpc_server() { void register_proxy_server_rpcs(margo_instance_id mid) { - // MARGO_REGISTER(mid, gkfs::rpc::tag::write, rpc_write_data_in_t, - // rpc_data_out_t, rpc_srv_write); - // MARGO_REGISTER(mid, gkfs::rpc::tag::read, rpc_read_data_in_t, - // rpc_data_out_t, rpc_srv_read); MARGO_REGISTER(mid, gkfs::rpc::tag::get_chunk_stat, rpc_chunk_stat_in_t, rpc_chunk_stat_out_t, rpc_srv_get_chunk_stat); MARGO_REGISTER(mid, gkfs::rpc::tag::create, rpc_mk_node_in_t, rpc_err_out_t, @@ -294,14 +290,6 @@ init_proxy_rpc_server() { args.hg_init_info = &hg_options; auto* mid = margo_init_ext(GKFS_DATA->bind_proxy_addr().c_str(), MARGO_SERVER_MODE, &args); - // hg_options.na_class = nullptr; - // if(gkfs::rpc::protocol::ofi_psm2 == GKFS_DATA->proxy_rpc_protocol()) - // hg_options.na_init_info.progress_mode = NA_NO_BLOCK; - // // Start Margo (this will also initialize Argobots and Mercury - // internally) auto mid = - // margo_init_opt(GKFS_DATA->bind_proxy_addr().c_str(), - // MARGO_SERVER_MODE, &hg_options, HG_TRUE, - // gkfs::config::rpc::proxy_handler_xstreams); if(mid == MARGO_INSTANCE_NULL) { throw runtime_error("Failed to initialize the Margo proxy RPC server"); } diff --git a/src/daemon/handler/srv_data.cpp b/src/daemon/handler/srv_data.cpp index 18c240b07..daf41fe4a 100644 --- a/src/daemon/handler/srv_data.cpp +++ b/src/daemon/handler/srv_data.cpp @@ -54,7 +54,6 @@ #endif using namespace std; - namespace { /** @@ -673,6 +672,7 @@ rpc_srv_proxy_write(hg_handle_t handle) { /* * 1. Setup */ + // TODO Proxy write does not support the chunk bitmap yet rpc_proxy_daemon_write_in_t in{}; rpc_data_out_t out{}; hg_bulk_t bulk_handle = nullptr; @@ -975,6 +975,7 @@ rpc_srv_proxy_read(hg_handle_t handle) { /* * 1. Setup */ + // TODO Proxy write does not support the chunk bitmap yet rpc_proxy_daemon_read_in_t in{}; rpc_data_out_t out{}; hg_bulk_t bulk_handle = nullptr; diff --git a/src/proxy/CMakeLists.txt b/src/proxy/CMakeLists.txt index 774a4406a..e9e9c3869 100644 --- a/src/proxy/CMakeLists.txt +++ b/src/proxy/CMakeLists.txt @@ -61,19 +61,4 @@ target_link_libraries( Threads::Threads ) -#set(PROXY_HEADERS -# ../../include/config.hpp -# ../../include/common/common_defs.hpp -# ../../include/common/rpc/rpc_types.hpp -# ../../include/common/rpc/rpc_util.hpp -# ../../include/proxy/env.hpp -# ../../include/proxy/proxy.hpp -# ../../include/proxy/proxy_data.hpp -# ../../include/proxy/util.hpp -# ../../include/proxy/rpc/forward_data.hpp -# ../../include/proxy/rpc/forward_metadata.hpp -# ../../include/proxy/rpc/rpc_defs.hpp -# ../../include/proxy/rpc/rpc_util.hpp -# ) - install(TARGETS gkfs_proxy RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/src/proxy/env.cpp b/src/proxy/env.cpp index 95f9bd408..9bb88de5c 100644 --- a/src/proxy/env.cpp +++ b/src/proxy/env.cpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/src/proxy/proxy.cpp b/src/proxy/proxy.cpp index 77e17ea12..89ad33cc4 100644 --- a/src/proxy/proxy.cpp +++ b/src/proxy/proxy.cpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). @@ -89,13 +89,6 @@ init_ipc_server() { args.hg_init_info = &hg_options; auto* mid = margo_init_ext(gkfs::rpc::protocol::na_sm, MARGO_SERVER_MODE, &args); - - // hg_options.na_class = nullptr; - // // Start Margo (this will also initialize Argobots and Mercury - // internally) auto mid = margo_init_opt(gkfs::rpc::protocol::na_sm, - // MARGO_SERVER_MODE, - // &hg_options, HG_TRUE, - // gkfs::config::rpc::proxy_handler_xstreams); if(mid == MARGO_INSTANCE_NULL) { throw runtime_error("Failed to initialize the Margo IPC server"); } @@ -183,14 +176,6 @@ init_rpc_client(const string& protocol) { args.json_config = margo_config.c_str(); args.hg_init_info = &hg_options; auto* mid = margo_init_ext(protocol.c_str(), MARGO_CLIENT_MODE, &args); - - // hg_options.na_class = nullptr; - // if(gkfs::rpc::protocol::ofi_psm2 == protocol.c_str()) - // hg_options.na_init_info.progress_mode = NA_NO_BLOCK; - // // Start Margo (this will also initialize Argobots and Mercury - // internally) auto mid = margo_init_opt(protocol.c_str(), - // MARGO_CLIENT_MODE, &hg_options, - // HG_TRUE, 0); if(mid == MARGO_INSTANCE_NULL) { throw runtime_error("Failed to initialize the Margo RPC client"); } diff --git a/src/proxy/proxy_data.cpp b/src/proxy/proxy_data.cpp index 5d93f5297..6fb1026b5 100644 --- a/src/proxy/proxy_data.cpp +++ b/src/proxy/proxy_data.cpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/src/proxy/rpc/forward_data.cpp b/src/proxy/rpc/forward_data.cpp index d24470e87..49f9db0ec 100644 --- a/src/proxy/rpc/forward_data.cpp +++ b/src/proxy/rpc/forward_data.cpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/src/proxy/rpc/forward_metadata.cpp b/src/proxy/rpc/forward_metadata.cpp index 2a42d47a6..830b7db51 100644 --- a/src/proxy/rpc/forward_metadata.cpp +++ b/src/proxy/rpc/forward_metadata.cpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/src/proxy/rpc/srv_data.cpp b/src/proxy/rpc/srv_data.cpp index 9704825ce..472692ee9 100644 --- a/src/proxy/rpc/srv_data.cpp +++ b/src/proxy/rpc/srv_data.cpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/src/proxy/rpc/srv_metadata.cpp b/src/proxy/rpc/srv_metadata.cpp index 5f577f0b7..df07724d1 100644 --- a/src/proxy/rpc/srv_metadata.cpp +++ b/src/proxy/rpc/srv_metadata.cpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/src/proxy/util.cpp b/src/proxy/util.cpp index 2a85376bf..c3f5c6045 100644 --- a/src/proxy/util.cpp +++ b/src/proxy/util.cpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). diff --git a/tests/integration/conftest.py.in b/tests/integration/conftest.py.in index 8eaf4cafc..54ab13af1 100644 --- a/tests/integration/conftest.py.in +++ b/tests/integration/conftest.py.in @@ -1,6 +1,6 @@ ################################################################################ -# Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain # -# Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany # +# Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany # # # # This software was partially supported by the # # EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # diff --git a/tests/integration/pytest.ini.in b/tests/integration/pytest.ini.in index e86bcb476..d90dab3cc 100644 --- a/tests/integration/pytest.ini.in +++ b/tests/integration/pytest.ini.in @@ -1,6 +1,6 @@ ################################################################################ -# Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain # -# Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany # +# Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany # # # # This software was partially supported by the # # EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # diff --git a/tests/integration/pytest.install.ini.in b/tests/integration/pytest.install.ini.in index 7886d3371..a578853ae 100644 --- a/tests/integration/pytest.install.ini.in +++ b/tests/integration/pytest.install.ini.in @@ -1,6 +1,6 @@ ################################################################################ -# Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain # -# Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany # +# Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain # +# Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany # # # # This software was partially supported by the # # EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). # diff --git a/tests/unit/test_guided_distributor.cpp b/tests/unit/test_guided_distributor.cpp index 36b6f5910..6a7057b3a 100644 --- a/tests/unit/test_guided_distributor.cpp +++ b/tests/unit/test_guided_distributor.cpp @@ -1,6 +1,6 @@ /* - Copyright 2018-2020, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2020, Johannes Gutenberg Universitaet Mainz, Germany + Copyright 2018-2024, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2024, Johannes Gutenberg Universitaet Mainz, Germany This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). -- GitLab