From 2f923cdf7bb4b458d8590272142d2ce78d84dc91 Mon Sep 17 00:00:00 2001 From: Tommaso Tocci Date: Fri, 9 Aug 2019 12:48:50 +0200 Subject: [PATCH 01/71] bugfix: preload library fail with bash executable --- src/global/env_util.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/global/env_util.cpp b/src/global/env_util.cpp index efcd38e44..8cbf50811 100644 --- a/src/global/env_util.cpp +++ b/src/global/env_util.cpp @@ -22,7 +22,7 @@ namespace gkfs { using namespace std; string get_env(const string& env_name) { - char* env_value = getenv(env_name.c_str()); + char* env_value = secure_getenv(env_name.c_str()); if (env_value == nullptr) { throw runtime_error("Environment variable not set: " + env_name); } -- GitLab From 1674b427c200786242ecb2fa31467975e32ea067 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Wed, 4 Sep 2019 09:18:24 +0200 Subject: [PATCH 02/71] Add hermes dependency to build system --- .gitmodules | 3 +++ CMakeLists.txt | 7 +++++++ external/hermes | 1 + src/client/CMakeLists.txt | 1 + 4 files changed, 12 insertions(+) create mode 100644 .gitmodules create mode 160000 external/hermes diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..0a6a93c55 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "external/hermes"] + path = external/hermes + url = https://github.com/bsc-ssrg/hermes.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 04901c4cb..eee0c3e3c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -153,6 +153,13 @@ set_target_properties(fmt INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_SOURCE_DIR}/external/spdlog" ) +add_library(hermes INTERFACE) +# we cannot use target_include_directories with CMake < 3.11 +set_target_properties(hermes + PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_SOURCE_DIR}/external/hermes/include" +) + set(INCLUDE_DIR "${CMAKE_SOURCE_DIR}/include") # define include directories that are relevant for all targets diff --git a/external/hermes b/external/hermes new file mode 160000 index 000000000..9779ad922 --- /dev/null +++ b/external/hermes @@ -0,0 +1 @@ +Subproject commit 9779ad922b3e1a0e657516294721a5386f52d89d diff --git a/src/client/CMakeLists.txt b/src/client/CMakeLists.txt index 62a1bd1ac..3c801a6e5 100644 --- a/src/client/CMakeLists.txt +++ b/src/client/CMakeLists.txt @@ -49,6 +49,7 @@ target_link_libraries(gkfs_intercept dl ${ABT_LIBRARIES} mercury + hermes ${MARGO_LIBRARIES} Boost::boost # needed for tokenizer header Threads::Threads -- GitLab From 3b730a3b9110623019086149c58604d340c66f5a Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Wed, 4 Sep 2019 09:32:57 +0200 Subject: [PATCH 03/71] Fix syntax error in GCC 4.9 --- include/client/open_file_map.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/client/open_file_map.hpp b/include/client/open_file_map.hpp index b7c4b1265..aeb188833 100644 --- a/include/client/open_file_map.hpp +++ b/include/client/open_file_map.hpp @@ -44,7 +44,7 @@ class OpenFile { protected: FileType type_; std::string path_; - std::array(OpenFile_flags::flag_count)> flags_ = {false}; + std::array(OpenFile_flags::flag_count)> flags_ = {{false}}; unsigned long pos_; std::mutex pos_mutex_; std::mutex flag_mutex_; -- GitLab From dfe696024d64a0b4fca2fbde3da83aea74ca199a Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Wed, 4 Sep 2019 14:34:38 +0200 Subject: [PATCH 04/71] Update Hermes to HEAD --- external/hermes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/hermes b/external/hermes index 9779ad922..c71c6e435 160000 --- a/external/hermes +++ b/external/hermes @@ -1 +1 @@ -Subproject commit 9779ad922b3e1a0e657516294721a5386f52d89d +Subproject commit c71c6e435dd0f4d39f8b69bf0ed16e88e857d95f -- GitLab From cda9e83dae463acb58888356177a6ede19a07d0c Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Fri, 6 Sep 2019 10:50:20 +0200 Subject: [PATCH 05/71] Update hermes submodule --- external/hermes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/hermes b/external/hermes index c71c6e435..8bcdee34c 160000 --- a/external/hermes +++ b/external/hermes @@ -1 +1 @@ -Subproject commit c71c6e435dd0f4d39f8b69bf0ed16e88e857d95f +Subproject commit 8bcdee34c2702a46243cfc3e527d2b34acb31608 -- GitLab From 48d56a8bccac43ea33861fdee64ed948f722647a Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Fri, 6 Sep 2019 10:54:33 +0200 Subject: [PATCH 06/71] fs_config rpc now uses Hermes instead of Margo --- include/client/preload_context.hpp | 9 ++ include/client/preload_util.hpp | 5 + include/client/rpc/hg_rpcs.hpp | 220 +++++++++++++++++++++++++++ src/client/CMakeLists.txt | 2 + src/client/preload.cpp | 52 +++++++ src/client/preload_context.cpp | 11 ++ src/client/preload_util.cpp | 75 ++++++++- src/client/rpc/hg_rpcs.cpp | 27 ++++ src/client/rpc/ld_rpc_management.cpp | 65 +++----- 9 files changed, 423 insertions(+), 43 deletions(-) create mode 100644 include/client/rpc/hg_rpcs.hpp create mode 100644 src/client/rpc/hg_rpcs.cpp diff --git a/include/client/preload_context.hpp b/include/client/preload_context.hpp index cb42c9227..96b0d8ef7 100644 --- a/include/client/preload_context.hpp +++ b/include/client/preload_context.hpp @@ -14,6 +14,7 @@ #ifndef IFS_PRELOAD_CTX_HPP #define IFS_PRELOAD_CTX_HPP +#include #include #include #include @@ -61,7 +62,10 @@ class PreloadContext { std::vector mountdir_components_; std::string mountdir_; +#if 1 // TODO(amiranda): remove std::vector hosts_; +#endif + std::vector hosts2_; uint64_t local_host_id_; bool interception_enabled_; @@ -85,8 +89,13 @@ class PreloadContext { void cwd(const std::string& path); const std::string& cwd() const; +#if 1 // TODO(amiranda) remove const std::vector& hosts() const; void hosts(const std::vector& addrs); +#endif + + const std::vector& hosts2() const; + void hosts2(const std::vector& addrs); uint64_t local_host_id() const; void local_host_id(uint64_t id); diff --git a/include/client/preload_util.hpp b/include/client/preload_util.hpp index 99fde438e..5bfbae74b 100644 --- a/include/client/preload_util.hpp +++ b/include/client/preload_util.hpp @@ -41,6 +41,11 @@ struct MetadentryUpdateFlags { // Margo instances extern margo_instance_id ld_margo_rpc_id; + +// Hermes instance +namespace hermes { class async_engine; } +extern std::unique_ptr ld_network_service; + // RPC IDs extern hg_id_t rpc_config_id; extern hg_id_t rpc_mk_node_id; diff --git a/include/client/rpc/hg_rpcs.hpp b/include/client/rpc/hg_rpcs.hpp new file mode 100644 index 000000000..9cedc30c7 --- /dev/null +++ b/include/client/rpc/hg_rpcs.hpp @@ -0,0 +1,220 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GKFS_RPCS_HPP +#define GKFS_RPCS_HPP + +// C includes +#include +#include +#include + +// C++ includes +#include + +// hermes includes +#include + +#ifndef HG_GEN_PROC_NAME +#define HG_GEN_PROC_NAME(struct_type_name) \ + hg_proc_ ## struct_type_name +#endif + + +#include +#include + +namespace hermes { namespace detail { + +struct hg_void_t { }; + +static HG_INLINE hg_return_t +hg_proc_void_t(hg_proc_t proc, void *data) +{ + (void) proc; + (void) data; + + return HG_SUCCESS; +} + +}} // namespace hermes::detail + +//============================================================================== +// definitions for fs_config +struct fs_config { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = fs_config; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = hermes::detail::hg_void_t; + using mercury_output_type = rpc_config_out_t; + + // RPC public identifier + constexpr static const uint64_t public_id = 3033006080; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::fs_config; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + hermes::detail::hg_proc_void_t; + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_config_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input() { } + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + explicit + input(const hermes::detail::hg_void_t& other) { } + + explicit + operator hermes::detail::hg_void_t() { + return {}; + } + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_mountdir(), + m_rootdir(), + m_atime_state(), + m_mtime_state(), + m_ctime_state(), + m_link_cnt_state(), + m_blocks_state(), + m_uid(), + m_gid() {} + + output(const std::string& mountdir, + const std::string& rootdir, + bool atime_state, + bool mtime_state, + bool ctime_state, + bool link_cnt_state, + bool blocks_state, + uint32_t uid, + uint32_t gid) : + m_mountdir(mountdir), + m_rootdir(rootdir), + m_atime_state(atime_state), + m_mtime_state(mtime_state), + m_ctime_state(ctime_state), + m_link_cnt_state(link_cnt_state), + m_blocks_state(blocks_state), + m_uid(uid), + m_gid(gid) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_config_out_t& out) { + m_mountdir = out.mountdir; + m_rootdir = out.rootdir; + m_atime_state = out.atime_state; + m_mtime_state = out.mtime_state; + m_ctime_state = out.ctime_state; + m_link_cnt_state = out.link_cnt_state; + m_blocks_state = out.blocks_state; + m_uid = out.uid; + m_gid = out.gid; + } + + std::string + mountdir() const { + return m_mountdir; + } + + std::string + rootdir() const { + return m_rootdir; + } + + bool + atime_state() const { + return m_atime_state; + } + + bool + mtime_state() const { + return m_mtime_state; + } + + bool + ctime_state() const { + return m_ctime_state; + } + + bool + link_cnt_state() const { + return m_link_cnt_state; + } + + bool + blocks_state() const { + return m_blocks_state; + } + + uint32_t + uid() const { + return m_uid; + } + + uint32_t + gid() const { + return m_gid; + } + + private: + std::string m_mountdir; + std::string m_rootdir; + bool m_atime_state; + bool m_mtime_state; + bool m_ctime_state; + bool m_link_cnt_state; + bool m_blocks_state; + uint32_t m_uid; + uint32_t m_gid; + }; +}; +#endif // GKFS_RPCS_HPP diff --git a/src/client/CMakeLists.txt b/src/client/CMakeLists.txt index 3c801a6e5..4807923dc 100644 --- a/src/client/CMakeLists.txt +++ b/src/client/CMakeLists.txt @@ -8,6 +8,7 @@ set(PRELOAD_SRC preload.cpp resolve.cpp preload_util.cpp + rpc/hg_rpcs.cpp rpc/ld_rpc_management.cpp rpc/ld_rpc_data_ws.cpp rpc/ld_rpc_metadentry.cpp @@ -31,6 +32,7 @@ set(PRELOAD_HEADERS ../../include/client/preload.hpp ../../include/client/resolve.hpp ../../include/client/preload_util.hpp + ../../include/client/rpc/hg_rpcs.hpp ../../include/client/rpc/ld_rpc_management.hpp ../../include/client/rpc/ld_rpc_data_ws.hpp ../../include/client/rpc/ld_rpc_metadentry.hpp diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 6b63dd95f..405050d73 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -23,9 +23,12 @@ #include #include #include +#include +#include #include + using namespace std; // // thread to initialize the whole margo shazaam only once per process @@ -49,6 +52,7 @@ hg_id_t rpc_chunk_stat_id; // Margo instances margo_instance_id ld_margo_rpc_id; +std::unique_ptr ld_network_service; static inline void exit_error_msg(int errcode, const string& msg) { CTX->log()->error(msg); @@ -117,6 +121,22 @@ void register_client_rpcs(margo_instance_id mid) { rpc_chunk_stat_in_t, rpc_chunk_stat_out_t, NULL); + + fmt::print(stdout, "rpc_config_id: {}\n", rpc_config_id); + fmt::print(stdout, "rpc_mk_node_id: {}\n", rpc_mk_node_id); + fmt::print(stdout, "rpc_stat_id: {}\n", rpc_stat_id); + fmt::print(stdout, "rpc_rm_node_id: {}\n", rpc_rm_node_id); + fmt::print(stdout, "rpc_decr_size_id: {}\n", rpc_decr_size_id); + fmt::print(stdout, "rpc_update_metadentry_id: {}\n", rpc_update_metadentry_id); + fmt::print(stdout, "rpc_get_metadentry_size_id: {}\n", rpc_get_metadentry_size_id); + fmt::print(stdout, "rpc_update_metadentry_size_id: {}\n", rpc_update_metadentry_size_id); + fmt::print(stdout, "rpc_mk_symlink_id: {}\n", rpc_mk_symlink_id); + fmt::print(stdout, "rpc_write_data_id: {}\n", rpc_write_data_id); + fmt::print(stdout, "rpc_read_data_id: {}\n", rpc_read_data_id); + fmt::print(stdout, "rpc_trunc_data_id: {}\n", rpc_trunc_data_id); + fmt::print(stdout, "rpc_get_dirents_id: {}\n", rpc_get_dirents_id); + fmt::print(stdout, "rpc_chunk_stat_id: {}\n", rpc_chunk_stat_id); + } /** @@ -149,6 +169,33 @@ bool init_margo_client(const std::string& na_plugin) { return true; } + + + +/** + * Initializes the Hermes client for a given transport prefix + * @param transport_prefix + * @return true if succesfully initialized; false otherwise + */ +bool init_hermes_client(const std::string& transport_prefix) { + + try { + ld_network_service = + std::make_unique( + hermes::get_transport_type(transport_prefix)); + ld_network_service->run(); + } catch (const std::exception& ex) { + fmt::print(stderr, "Failed to initialize Hermes RPC client {}\n", + ex.what()); + return false; + } + + rpc_config_id = fs_config::public_id; + + return true; +} + + /** * This function is only called in the preload constructor and initializes Argobots and Margo clients */ @@ -159,6 +206,11 @@ void init_ld_environment_() { exit_error_msg(EXIT_FAILURE, "Unable to initializa Margo RPC client"); } + // initialize Hermes interface to Mercury + if (!init_hermes_client(RPC_PROTOCOL)) { + exit_error_msg(EXIT_FAILURE, "Unable to initialize Hermes RPC client"); + } + try { load_hosts(); } catch (const std::exception& e) { diff --git a/src/client/preload_context.cpp b/src/client/preload_context.cpp index 79e7ac7a0..530f665f7 100644 --- a/src/client/preload_context.cpp +++ b/src/client/preload_context.cpp @@ -11,6 +11,7 @@ SPDX-License-Identifier: MIT */ +#include #include #include @@ -57,6 +58,7 @@ const std::string& PreloadContext::cwd() const { return cwd_; } +#if 1 // TODO(amiranda) remove const std::vector& PreloadContext::hosts() const { return hosts_; } @@ -64,6 +66,15 @@ const std::vector& PreloadContext::hosts() const { void PreloadContext::hosts(const std::vector& addrs) { hosts_ = addrs; } +#endif + +const std::vector& PreloadContext::hosts2() const { + return hosts2_; +} + +void PreloadContext::hosts2(const std::vector& endpoints) { + hosts2_ = endpoints; +} uint64_t PreloadContext::local_host_id() const { return local_host_id_; diff --git a/src/client/preload_util.cpp b/src/client/preload_util.cpp index 02d7c5151..5a5b303ca 100644 --- a/src/client/preload_util.cpp +++ b/src/client/preload_util.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -103,6 +104,35 @@ vector> load_hosts_file(const std::string& lfpath) { return hosts; } +hermes::endpoint lookup_endpoint(const std::string& uri, + std::size_t max_retries = 3) { + + CTX->log()->debug("{}() Looking up address '{}'", __func__, uri); + + std::random_device rd; // obtain a random number from hardware + std::size_t attempts = 0; + std::string error_msg; + + do { + try { + return ld_network_service->lookup(uri); + } catch (const exception& ex) { + error_msg = ex.what(); + CTX->log()->warn("{}() Failed to lookup address '{}'. Attempts [{}/{}]", + __func__, uri, attempts + 1, max_retries); + // Wait a random amount of time and try again + std::mt19937 g(rd()); // seed the random generator + std::uniform_int_distribution<> distr(50, 50 * (attempts + 2)); // define the range + std::this_thread::sleep_for(std::chrono::milliseconds(distr(g))); + continue; + } + } while (++attempts < max_retries); + + throw std::runtime_error( + fmt::format("Endpoint for address '{}' could not be found ({})", + uri, error_msg)); +} + hg_addr_t margo_addr_lookup_retry(const std::string& uri) { CTX->log()->debug("{}() Lookink up address '{}'", __func__, uri); // try to look up 3 times before erroring out @@ -152,7 +182,10 @@ void load_hosts() { auto local_hostname = get_my_hostname(true); bool local_host_found = false; - vector addrs(hosts.size()); + vector addrs(hosts.size()); // TODO(amiranda) remove + + std::vector addrs2; + addrs2.reserve(hosts.size()); vector host_ids(hosts.size()); // populate vector with [0, ..., host_size - 1] @@ -171,8 +204,14 @@ void load_hosts() { for (const auto& id: host_ids) { const auto& hostname = hosts.at(id).first; const auto& uri = hosts.at(id).second; - auto addr = margo_addr_lookup_retry(uri); - addrs.at(id) = addr; + + auto endp = ::lookup_endpoint(uri); + + auto it = std::next(addrs2.begin(), id); + addrs2.emplace(it, endp); + + auto addr = margo_addr_lookup_retry(uri); // TODO(amiranda) remove + addrs.at(id) = addr; // TODO(amiranda) remove if (!local_host_found && hostname == local_hostname) { CTX->log()->debug("{}() Found local host: {}", __func__, hostname); @@ -181,12 +220,42 @@ void load_hosts() { } } +#if 0 + fmt::print(stdout, " YYY hi!\n"); + + std::for_each( + addrs.begin(), + addrs.end(), + [](hg_addr_t addr) { + hg_class_t* hg_class = margo_get_class(ld_margo_rpc_id); + hg_size_t bsize = 0; + hg_return ret = HG_Addr_to_string(hg_class, NULL, &bsize, addr); + + const auto buffer = std::make_unique(bsize); + HG_Addr_to_string(hg_class, buffer.get(), &bsize, addr); + fmt::print(stdout, " XXX {}\n", std::string(buffer.get())); + } + ); + + std::for_each( + addrs2.begin(), + addrs2.end(), + [](const hermes::endpoint& endp) { + fmt::print(stdout, " ZZZ {}\n", endp.to_string()); + } + ); +#endif + if (!local_host_found) { CTX->log()->warn("{}() Failed to find local host." "Fallback: use host id '0' as local host", __func__); CTX->local_host_id(0); } + +#if 1 // TODO(amiranda) remove CTX->hosts(addrs); +#endif + CTX->hosts2(addrs2); } void cleanup_addresses() { diff --git a/src/client/rpc/hg_rpcs.cpp b/src/client/rpc/hg_rpcs.cpp new file mode 100644 index 000000000..04d03fe8f --- /dev/null +++ b/src/client/rpc/hg_rpcs.cpp @@ -0,0 +1,27 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#include +#include + +namespace hermes { namespace detail { + +//============================================================================== +// register request types so that they can be used by users and the engine +// +void +register_user_request_types() { + (void) registered_requests().add(); +} + +}} // namespace hermes::detail diff --git a/src/client/rpc/ld_rpc_management.cpp b/src/client/rpc/ld_rpc_management.cpp index 08fa1031e..4789f387f 100644 --- a/src/client/rpc/ld_rpc_management.cpp +++ b/src/client/rpc/ld_rpc_management.cpp @@ -17,6 +17,8 @@ #include // see https://github.com/boostorg/tokenizer/issues/9 #include #include +#include +#include namespace rpc_send { @@ -27,56 +29,39 @@ namespace rpc_send { * @return */ bool get_fs_config() { - hg_handle_t handle; - rpc_config_out_t out{}; - // fill in - auto ret = margo_create(ld_margo_rpc_id, CTX->hosts().at(CTX->local_host_id()), rpc_config_id, &handle); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() creating handle for failed", __func__); - return false; - } - CTX->log()->debug("{}() Forwarding request", __func__); - for (int i = 0; i < RPC_TRIES; ++i) { - ret = margo_forward_timed(handle, nullptr, RPC_TIMEOUT); - if (ret == HG_SUCCESS) { - break; - } - CTX->log()->warn("{}() Failed to forward request. Error: {}. Attempt {}/{}", __func__, HG_Error_to_string(ret), i+1, RPC_TRIES); - } - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Failed to forward request. Giving up after {} attempts", __func__, RPC_TRIES); - margo_destroy(handle); - return false; - } - /* decode response */ - CTX->log()->debug("{}() Waiting for response", __func__); - ret = margo_get_output(handle, &out); - if (ret != HG_SUCCESS) { + auto endp = CTX->hosts2().at(CTX->local_host_id()); + fs_config::output out; + + try { + CTX->log()->debug("{}() Retrieving file system configurations from daemon", __func__); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can retry + // for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + out = ld_network_service->post(endp).get().at(0); + } catch (const std::exception& ex) { CTX->log()->error("{}() Retrieving fs configurations from daemon", __func__); - margo_destroy(handle); return false; } - CTX->mountdir(out.mountdir); + CTX->mountdir(out.mountdir()); CTX->log()->info("Mountdir: '{}'", CTX->mountdir()); - CTX->fs_conf()->rootdir = out.rootdir; - CTX->fs_conf()->atime_state = out.atime_state; - CTX->fs_conf()->mtime_state = out.mtime_state; - CTX->fs_conf()->ctime_state = out.ctime_state; - CTX->fs_conf()->link_cnt_state = out.link_cnt_state; - CTX->fs_conf()->blocks_state = out.blocks_state; - CTX->fs_conf()->uid = out.uid; - CTX->fs_conf()->gid = out.gid; + CTX->fs_conf()->rootdir = out.rootdir(); + CTX->fs_conf()->atime_state = out.atime_state(); + CTX->fs_conf()->mtime_state = out.mtime_state(); + CTX->fs_conf()->ctime_state = out.ctime_state(); + CTX->fs_conf()->link_cnt_state = out.link_cnt_state(); + CTX->fs_conf()->blocks_state = out.blocks_state(); + CTX->fs_conf()->uid = out.uid(); + CTX->fs_conf()->gid = out.gid(); - CTX->log()->debug("{}() Got response with mountdir {}", __func__, out.mountdir); + CTX->log()->debug("{}() Got response with mountdir {}", __func__, out.mountdir()); - /* clean up resources consumed by this rpc */ - margo_free_output(handle, &out); - margo_destroy(handle); return true; } -} \ No newline at end of file +} -- GitLab From cc157419311145c05d7c023bfecebc7fcf59b820 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Fri, 6 Sep 2019 12:19:10 +0200 Subject: [PATCH 07/71] create rpc now uses Hermes instead of Margo --- include/client/rpc/hg_rpcs.hpp | 112 +++++++++++++++++++++++++++ src/client/preload.cpp | 1 + src/client/rpc/hg_rpcs.cpp | 1 + src/client/rpc/ld_rpc_metadentry.cpp | 52 +++++-------- 4 files changed, 134 insertions(+), 32 deletions(-) diff --git a/include/client/rpc/hg_rpcs.hpp b/include/client/rpc/hg_rpcs.hpp index 9cedc30c7..c9759d7a9 100644 --- a/include/client/rpc/hg_rpcs.hpp +++ b/include/client/rpc/hg_rpcs.hpp @@ -217,4 +217,116 @@ struct fs_config { uint32_t m_gid; }; }; + + +//============================================================================== +// definitions for create +struct create { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = create; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_mk_node_in_t; + using mercury_output_type = rpc_err_out_t; + + // RPC public identifier + constexpr static const uint64_t public_id = 796590080; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::create; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_mk_node_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_err_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, + uint32_t mode) : + m_path(path), + m_mode(mode) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + uint32_t + mode() const { + return m_mode; + } + + explicit + input(const rpc_mk_node_in_t& other) : + m_path(other.path), + m_mode(other.mode) { } + + explicit + operator rpc_mk_node_in_t() { + return {m_path.c_str(), m_mode}; + } + + private: + std::string m_path; + uint32_t m_mode; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err() {} + + output(int32_t err) : + m_err(err) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_err_out_t& out) { + m_err = out.err; + } + + int32_t + err() const { + return m_err; + } + + private: + int32_t m_err; + }; +}; + + #endif // GKFS_RPCS_HPP diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 405050d73..61fd5ac85 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -191,6 +191,7 @@ bool init_hermes_client(const std::string& transport_prefix) { } rpc_config_id = fs_config::public_id; + rpc_mk_node_id = create::public_id; return true; } diff --git a/src/client/rpc/hg_rpcs.cpp b/src/client/rpc/hg_rpcs.cpp index 04d03fe8f..52e31ac9b 100644 --- a/src/client/rpc/hg_rpcs.cpp +++ b/src/client/rpc/hg_rpcs.cpp @@ -22,6 +22,7 @@ namespace hermes { namespace detail { void register_user_request_types() { (void) registered_requests().add(); + (void) registered_requests().add(); } }} // namespace hermes::detail diff --git a/src/client/rpc/ld_rpc_metadentry.cpp b/src/client/rpc/ld_rpc_metadentry.cpp index 59f85b408..9f37380f9 100644 --- a/src/client/rpc/ld_rpc_metadentry.cpp +++ b/src/client/rpc/ld_rpc_metadentry.cpp @@ -19,6 +19,7 @@ #include #include #include +#include namespace rpc_send { @@ -30,42 +31,29 @@ margo_forward_timed_wrap(const hg_handle_t& handle, void* in_struct) { } int mk_node(const std::string& path, const mode_t mode) { - hg_handle_t handle; - rpc_mk_node_in_t in{}; - rpc_err_out_t out{}; + int err = EUNKNOWN; - // fill in - in.path = path.c_str(); - in.mode = mode; - // Create handle - CTX->log()->debug("{}() Creating Mercury handle ...", __func__); - auto ret = margo_create_wrap(rpc_mk_node_id, path, handle); - if (ret != HG_SUCCESS) { + auto endp = CTX->hosts2().at( + CTX->distributor()->locate_file_metadata(path)); + + try { + CTX->log()->debug("{}() Sending RPC ...", __func__); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can + // retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = + ld_network_service->post(endp, path, mode).get().at(0); + err = out.err(); + CTX->log()->debug("{}() Got response success: {}", __func__, err); + + } catch(const std::exception& ex) { + CTX->log()->error("{}() while getting rpc output", __func__); errno = EBUSY; return -1; } - // Send rpc - CTX->log()->debug("{}() About to send RPC ...", __func__); - ret = margo_forward_timed_wrap(handle, &in); - // Get response - if (ret == HG_SUCCESS) { - CTX->log()->trace("{}() Waiting for response", __func__); - ret = margo_get_output(handle, &out); - if (ret == HG_SUCCESS) { - CTX->log()->debug("{}() Got response success: {}", __func__, out.err); - err = out.err; - } else { - // something is wrong - errno = EBUSY; - CTX->log()->error("{}() while getting rpc output", __func__); - } - /* clean up resources consumed by this rpc */ - margo_free_output(handle, &out); - } else { - CTX->log()->warn("{}() timed out", __func__); - errno = EBUSY; - } - margo_destroy(handle); + return err; } -- GitLab From 9d8bdb918c673bb4872c22537f65af4126b8b4a3 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Fri, 6 Sep 2019 12:30:26 +0200 Subject: [PATCH 08/71] RPC definitions are now in gkfs::rpc (client) --- include/client/rpc/hg_rpcs.hpp | 6 ++++++ src/client/preload.cpp | 4 ++-- src/client/rpc/hg_rpcs.cpp | 4 ++-- src/client/rpc/ld_rpc_management.cpp | 4 ++-- src/client/rpc/ld_rpc_metadentry.cpp | 2 +- 5 files changed, 13 insertions(+), 7 deletions(-) diff --git a/include/client/rpc/hg_rpcs.hpp b/include/client/rpc/hg_rpcs.hpp index c9759d7a9..24d0721dc 100644 --- a/include/client/rpc/hg_rpcs.hpp +++ b/include/client/rpc/hg_rpcs.hpp @@ -49,6 +49,9 @@ hg_proc_void_t(hg_proc_t proc, void *data) }} // namespace hermes::detail +namespace gkfs { +namespace rpc { + //============================================================================== // definitions for fs_config struct fs_config { @@ -328,5 +331,8 @@ struct create { }; }; +} // namespace rpc +} // namespace gkfs + #endif // GKFS_RPCS_HPP diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 61fd5ac85..4489018a2 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -190,8 +190,8 @@ bool init_hermes_client(const std::string& transport_prefix) { return false; } - rpc_config_id = fs_config::public_id; - rpc_mk_node_id = create::public_id; + rpc_config_id = gkfs::rpc::fs_config::public_id; + rpc_mk_node_id = gkfs::rpc::create::public_id; return true; } diff --git a/src/client/rpc/hg_rpcs.cpp b/src/client/rpc/hg_rpcs.cpp index 52e31ac9b..5452de31b 100644 --- a/src/client/rpc/hg_rpcs.cpp +++ b/src/client/rpc/hg_rpcs.cpp @@ -21,8 +21,8 @@ namespace hermes { namespace detail { // void register_user_request_types() { - (void) registered_requests().add(); - (void) registered_requests().add(); + (void) registered_requests().add(); + (void) registered_requests().add(); } }} // namespace hermes::detail diff --git a/src/client/rpc/ld_rpc_management.cpp b/src/client/rpc/ld_rpc_management.cpp index 4789f387f..4937d9f52 100644 --- a/src/client/rpc/ld_rpc_management.cpp +++ b/src/client/rpc/ld_rpc_management.cpp @@ -31,7 +31,7 @@ namespace rpc_send { bool get_fs_config() { auto endp = CTX->hosts2().at(CTX->local_host_id()); - fs_config::output out; + gkfs::rpc::fs_config::output out; try { CTX->log()->debug("{}() Retrieving file system configurations from daemon", __func__); @@ -40,7 +40,7 @@ bool get_fs_config() { // TODO(amiranda): hermes will eventually provide a post(endpoint) // returning one result and a broadcast(endpoint_set) returning a // result_set. When that happens we can remove the .at(0) :/ - out = ld_network_service->post(endp).get().at(0); + out = ld_network_service->post(endp).get().at(0); } catch (const std::exception& ex) { CTX->log()->error("{}() Retrieving fs configurations from daemon", __func__); return false; diff --git a/src/client/rpc/ld_rpc_metadentry.cpp b/src/client/rpc/ld_rpc_metadentry.cpp index 9f37380f9..548f811db 100644 --- a/src/client/rpc/ld_rpc_metadentry.cpp +++ b/src/client/rpc/ld_rpc_metadentry.cpp @@ -44,7 +44,7 @@ int mk_node(const std::string& path, const mode_t mode) { // returning one result and a broadcast(endpoint_set) returning a // result_set. When that happens we can remove the .at(0) :/ auto out = - ld_network_service->post(endp, path, mode).get().at(0); + ld_network_service->post(endp, path, mode).get().at(0); err = out.err(); CTX->log()->debug("{}() Got response success: {}", __func__, err); -- GitLab From 3a17e07a5ddf9ed95b7a85e1e91f467cb00de839 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Fri, 6 Sep 2019 13:56:47 +0200 Subject: [PATCH 09/71] stat RPC now uses Hermes instead of Margo --- include/client/rpc/hg_rpcs.hpp | 109 +++++++++++++++++++++++++++ src/client/preload.cpp | 1 + src/client/rpc/hg_rpcs.cpp | 1 + src/client/rpc/ld_rpc_metadentry.cpp | 60 ++++++--------- 4 files changed, 133 insertions(+), 38 deletions(-) diff --git a/include/client/rpc/hg_rpcs.hpp b/include/client/rpc/hg_rpcs.hpp index 24d0721dc..aa1d0ea78 100644 --- a/include/client/rpc/hg_rpcs.hpp +++ b/include/client/rpc/hg_rpcs.hpp @@ -331,6 +331,115 @@ struct create { }; }; +//============================================================================== +// definitions for stat +struct stat { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = stat; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_path_only_in_t; + using mercury_output_type = rpc_stat_out_t; + + // RPC public identifier + constexpr static const uint64_t public_id = 1396244480; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::stat; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_path_only_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_stat_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path ) : + m_path(path) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + explicit + input(const rpc_path_only_in_t& other) : + m_path(other.path) { } + + explicit + operator rpc_path_only_in_t() { + return {m_path.c_str()}; + } + + private: + std::string m_path; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err(), + m_db_val() {} + + output(int32_t err, const std::string& db_val) : + m_err(err), + m_db_val(db_val) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_stat_out_t& out) { + m_err = out.err; + m_db_val = out.db_val; + } + + int32_t + err() const { + return m_err; + } + + std::string + db_val() const { + return m_db_val; + } + + private: + int32_t m_err; + std::string m_db_val; + }; +}; + } // namespace rpc } // namespace gkfs diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 4489018a2..47f93e6e3 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -192,6 +192,7 @@ bool init_hermes_client(const std::string& transport_prefix) { rpc_config_id = gkfs::rpc::fs_config::public_id; rpc_mk_node_id = gkfs::rpc::create::public_id; + rpc_stat_id = gkfs::rpc::stat::public_id; return true; } diff --git a/src/client/rpc/hg_rpcs.cpp b/src/client/rpc/hg_rpcs.cpp index 5452de31b..bad6bd98a 100644 --- a/src/client/rpc/hg_rpcs.cpp +++ b/src/client/rpc/hg_rpcs.cpp @@ -23,6 +23,7 @@ void register_user_request_types() { (void) registered_requests().add(); (void) registered_requests().add(); + (void) registered_requests().add(); } }} // namespace hermes::detail diff --git a/src/client/rpc/ld_rpc_metadentry.cpp b/src/client/rpc/ld_rpc_metadentry.cpp index 548f811db..0047670da 100644 --- a/src/client/rpc/ld_rpc_metadentry.cpp +++ b/src/client/rpc/ld_rpc_metadentry.cpp @@ -58,50 +58,34 @@ int mk_node(const std::string& path, const mode_t mode) { } int stat(const std::string& path, string& attr) { - hg_handle_t handle; - rpc_path_only_in_t in{}; - rpc_stat_out_t out{}; - int err = 0; - // fill in - in.path = path.c_str(); - CTX->log()->debug("{}() Creating Mercury handle ...", __func__); - auto ret = margo_create_wrap(rpc_stat_id, path, handle); - if (ret != HG_SUCCESS) { - errno = EBUSY; - return -1; - } - // Send rpc - ret = margo_forward_timed_wrap(handle, &in); - // Get response - if (ret != HG_SUCCESS) { - errno = EBUSY; - CTX->log()->error("{}() timed out", __func__); - margo_destroy(handle); - return -1; - } - ret = margo_get_output(handle, &out); - if (ret != HG_SUCCESS) { + auto endp = CTX->hosts2().at( + CTX->distributor()->locate_file_metadata(path)); + + try { + CTX->log()->debug("{}() Sending RPC ...", __func__); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can + // retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = + ld_network_service->post(endp, path).get().at(0); + CTX->log()->debug("{}() Got response success: {}", __func__, out.err()); + + if(out.err() != 0) { + errno = out.err(); + return -1; + } else { + attr = out.db_val(); + } + } catch(const std::exception& ex) { CTX->log()->error("{}() while getting rpc output", __func__); errno = EBUSY; - margo_free_output(handle, &out); - margo_destroy(handle); return -1; } - CTX->log()->debug("{}() Got response success: {}", __func__, out.err); - - if(out.err != 0) { - err = -1; - errno = out.err; - } else { - attr = out.db_val; - } - - /* clean up resources consumed by this rpc */ - margo_free_output(handle, &out); - margo_destroy(handle); - return err; + return 0; } int decr_size(const std::string& path, size_t length) { -- GitLab From 55da3cba5ba9d4bb7aa3c527e093f78aa24ad47f Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Fri, 6 Sep 2019 16:58:11 +0200 Subject: [PATCH 10/71] remove RPC now uses Hermes instead of Margo --- include/client/rpc/hg_rpcs.hpp | 100 +++++++++++++++++++++++ src/client/preload.cpp | 1 + src/client/rpc/hg_rpcs.cpp | 1 + src/client/rpc/ld_rpc_metadentry.cpp | 115 ++++++++++++++------------- 4 files changed, 160 insertions(+), 57 deletions(-) diff --git a/include/client/rpc/hg_rpcs.hpp b/include/client/rpc/hg_rpcs.hpp index aa1d0ea78..752209bc6 100644 --- a/include/client/rpc/hg_rpcs.hpp +++ b/include/client/rpc/hg_rpcs.hpp @@ -440,6 +440,106 @@ struct stat { }; }; +//============================================================================== +// definitions for remove +struct remove { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = remove; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_rm_node_in_t; + using mercury_output_type = rpc_err_out_t; + + // RPC public identifier + constexpr static const uint64_t public_id = 2549415936; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::remove; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_rm_node_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_err_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path) : + m_path(path) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + explicit + input(const rpc_rm_node_in_t& other) : + m_path(other.path) { } + + explicit + operator rpc_rm_node_in_t() { + return {m_path.c_str()}; + } + + private: + std::string m_path; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err() {} + + output(int32_t err) : + m_err(err) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_err_out_t& out) { + m_err = out.err; + } + + int32_t + err() const { + return m_err; + } + + private: + int32_t m_err; + }; +}; + } // namespace rpc } // namespace gkfs diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 47f93e6e3..79a9cc81f 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -193,6 +193,7 @@ bool init_hermes_client(const std::string& transport_prefix) { rpc_config_id = gkfs::rpc::fs_config::public_id; rpc_mk_node_id = gkfs::rpc::create::public_id; rpc_stat_id = gkfs::rpc::stat::public_id; + rpc_rm_node_id = gkfs::rpc::remove::public_id; return true; } diff --git a/src/client/rpc/hg_rpcs.cpp b/src/client/rpc/hg_rpcs.cpp index bad6bd98a..9d4424a9c 100644 --- a/src/client/rpc/hg_rpcs.cpp +++ b/src/client/rpc/hg_rpcs.cpp @@ -24,6 +24,7 @@ register_user_request_types() { (void) registered_requests().add(); (void) registered_requests().add(); (void) registered_requests().add(); + (void) registered_requests().add(); } }} // namespace hermes::detail diff --git a/src/client/rpc/ld_rpc_metadentry.cpp b/src/client/rpc/ld_rpc_metadentry.cpp index 0047670da..1954d697b 100644 --- a/src/client/rpc/ld_rpc_metadentry.cpp +++ b/src/client/rpc/ld_rpc_metadentry.cpp @@ -137,70 +137,71 @@ int decr_size(const std::string& path, size_t length) { } int rm_node(const std::string& path, const bool remove_metadentry_only) { - hg_return_t ret; - int err = 0; // assume we succeed - // if metadentry should only removed only, send only 1 rpc to remove the metadata - // else send an rpc to all hosts and thus broadcast chunk_removal. - auto rpc_target_size = remove_metadentry_only ? static_cast(1) : CTX->hosts().size(); - - CTX->log()->debug("{}() Creating Mercury handles for all nodes ...", __func__); - vector rpc_handles(rpc_target_size); - vector rpc_waiters(rpc_target_size); - vector rpc_in(rpc_target_size); - // Send rpc to all nodes as all of them can have chunks for this path - for (size_t i = 0; i < rpc_target_size; i++) { - // fill in - rpc_in[i].path = path.c_str(); - // create handle - // if only the metadentry needs to removed send one rpc to metadentry's responsible node - if (remove_metadentry_only) - ret = margo_create_wrap(rpc_rm_node_id, path, rpc_handles[i]); - else - ret = margo_create_wrap_helper(rpc_rm_node_id, i, rpc_handles[i]); - if (ret != HG_SUCCESS) { - CTX->log()->warn("{}() Unable to create Mercury handle", __func__); - // We use continue here to remove at least some data - // XXX In the future we can discuss RPC retrying. This should be a function to be used in general - errno = EBUSY; - err = -1; - } - // send async rpc - ret = margo_iforward(rpc_handles[i], &rpc_in[i], &rpc_waiters[i]); - if (ret != HG_SUCCESS) { - CTX->log()->warn("{}() Unable to create Mercury handle", __func__); + + // if only the metadentry should be removed, send one rpc to the + // metadentry's responsible node to remove the metadata + // else, send an rpc to all hosts and thus broadcast chunk_removal. + if(remove_metadentry_only) { + + auto idx = CTX->distributor()->locate_file_metadata(path); + auto endp = CTX->hosts2().at(idx); + + try { + + CTX->log()->debug("{}() Sending RPC ...", __func__); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can + // retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = + ld_network_service->post(endp, path).get().at(0); + + CTX->log()->debug("{}() Got response success: {}", __func__, out.err()); + + assert(out.err() == 0); + + } catch(const std::exception& ex) { + CTX->log()->error("{}() while getting rpc output", __func__); errno = EBUSY; - err = -1; + return -1; } + + return 0; } - // Wait for RPC responses and then get response - for (size_t i = 0; i < rpc_target_size; i++) { - // XXX We might need a timeout here to not wait forever for an output that never comes? - ret = margo_wait(rpc_waiters[i]); - if (ret != HG_SUCCESS) { - CTX->log()->warn("{}() Unable to wait for margo_request handle for path {} recipient {}", __func__, path, i); - errno = EBUSY; - err = -1; - } - rpc_err_out_t out{}; - ret = margo_get_output(rpc_handles[i], &out); - if (ret == HG_SUCCESS) { - CTX->log()->debug("{}() Got response success: {}", __func__, out.err); - if (err != 0) { - errno = out.err; - err = -1; + + std::size_t rpc_target_size2 = CTX->hosts2().size(); + std::vector> handles; + + hermes::endpoint_set endps; + + std::copy(CTX->hosts2().begin(), + CTX->hosts2().end(), + std::back_inserter(endps)); + + try { + + auto output_set = + ld_network_service->broadcast(endps, path).get(); + + // Wait for RPC responses and then get response + for (const auto& out : output_set) { + CTX->log()->debug("{}() Got response success: {}", __func__, out.err()); + + if(out.err() != 0) { + errno = out.err(); + return -1; } - } else { - // something is wrong - errno = EBUSY; - err = -1; - CTX->log()->error("{}() while getting rpc output", __func__); } - /* clean up resources consumed by this rpc */ - margo_free_output(rpc_handles[i], &out); - margo_destroy(rpc_handles[i]); + + return 0; + + } catch(const std::exception& ex) { + CTX->log()->error("{}() while getting rpc output", __func__); + errno = EBUSY; + return -1; } - return err; } -- GitLab From a1444c1efcde7592a0daa3ad4a80621660ba1e37 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Fri, 6 Sep 2019 17:31:32 +0200 Subject: [PATCH 11/71] Fix error converting Mercury structs to C++ objects --- include/client/rpc/hg_rpcs.hpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/include/client/rpc/hg_rpcs.hpp b/include/client/rpc/hg_rpcs.hpp index 752209bc6..112a0ba1b 100644 --- a/include/client/rpc/hg_rpcs.hpp +++ b/include/client/rpc/hg_rpcs.hpp @@ -152,8 +152,15 @@ struct fs_config { explicit output(const rpc_config_out_t& out) { - m_mountdir = out.mountdir; - m_rootdir = out.rootdir; + + if(out.mountdir != nullptr) { + m_mountdir = out.mountdir; + } + + if(out.rootdir != nullptr) { + m_rootdir = out.rootdir; + } + m_atime_state = out.atime_state; m_mtime_state = out.mtime_state; m_ctime_state = out.ctime_state; @@ -421,7 +428,10 @@ struct stat { explicit output(const rpc_stat_out_t& out) { m_err = out.err; - m_db_val = out.db_val; + + if(out.db_val != nullptr) { + m_db_val = out.db_val; + } } int32_t -- GitLab From 906123345b08daa9f69fd0681cd7dcb67c6b9788 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Fri, 6 Sep 2019 18:38:24 +0200 Subject: [PATCH 12/71] decr_size RPC now uses Hermes instead of Margo --- include/client/rpc/hg_rpcs.hpp | 108 +++++++++++++++++++++++++++ src/client/rpc/hg_rpcs.cpp | 1 + src/client/rpc/ld_rpc_metadentry.cpp | 79 +++++++++----------- 3 files changed, 144 insertions(+), 44 deletions(-) diff --git a/include/client/rpc/hg_rpcs.hpp b/include/client/rpc/hg_rpcs.hpp index 112a0ba1b..b08cd3b7e 100644 --- a/include/client/rpc/hg_rpcs.hpp +++ b/include/client/rpc/hg_rpcs.hpp @@ -550,6 +550,114 @@ struct remove { }; }; +//============================================================================== +// definitions for decr_size +struct decr_size { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = decr_size; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_trunc_in_t; + using mercury_output_type = rpc_err_out_t; + + // RPC public identifier + constexpr static const uint64_t public_id = 1291649024; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::decr_size; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_trunc_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_err_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, uint64_t length) : + m_path(path), + m_length(length) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + uint64_t + length() const { + return m_length; + } + + explicit + input(const rpc_trunc_in_t& other) : + m_path(other.path), + m_length(other.length) { } + + explicit + operator rpc_trunc_in_t() { + return {m_path.c_str(), m_length}; + } + + private: + std::string m_path; + uint64_t m_length; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err() {} + + output(int32_t err) : + m_err(err) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_err_out_t& out) { + m_err = out.err; + } + + int32_t + err() const { + return m_err; + } + + private: + int32_t m_err; + }; +}; + } // namespace rpc } // namespace gkfs diff --git a/src/client/rpc/hg_rpcs.cpp b/src/client/rpc/hg_rpcs.cpp index 9d4424a9c..1f8e0aead 100644 --- a/src/client/rpc/hg_rpcs.cpp +++ b/src/client/rpc/hg_rpcs.cpp @@ -25,6 +25,7 @@ register_user_request_types() { (void) registered_requests().add(); (void) registered_requests().add(); (void) registered_requests().add(); + (void) registered_requests().add(); } }} // namespace hermes::detail diff --git a/src/client/rpc/ld_rpc_metadentry.cpp b/src/client/rpc/ld_rpc_metadentry.cpp index 1954d697b..6f1580bcc 100644 --- a/src/client/rpc/ld_rpc_metadentry.cpp +++ b/src/client/rpc/ld_rpc_metadentry.cpp @@ -76,9 +76,11 @@ int stat(const std::string& path, string& attr) { if(out.err() != 0) { errno = out.err(); return -1; - } else { - attr = out.db_val(); } + + attr = out.db_val(); + return 0; + } catch(const std::exception& ex) { CTX->log()->error("{}() while getting rpc output", __func__); errno = EBUSY; @@ -89,51 +91,36 @@ int stat(const std::string& path, string& attr) { } int decr_size(const std::string& path, size_t length) { - hg_handle_t handle; - rpc_trunc_in_t in{}; - int err = 0; - in.path = path.c_str(); - in.length = length; - CTX->log()->debug("{}() Creating Mercury handle ...", __func__); - auto ret = margo_create_wrap(rpc_decr_size_id, path, handle); - if (ret != HG_SUCCESS) { - errno = EBUSY; - return -1; - } + auto endp = CTX->hosts2().at( + CTX->distributor()->locate_file_metadata(path)); - // Send rpc - ret = margo_forward_timed_wrap(handle, &in); - // Get response - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() timed out", __func__); - margo_destroy(handle); - errno = EBUSY; - return -1; - } + try { - rpc_err_out_t out{}; - ret = margo_get_output(handle, &out); - if (ret != HG_SUCCESS) { + CTX->log()->debug("{}() Sending RPC ...", __func__); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can + // retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = + ld_network_service->post( + endp, path, length).get().at(0); + + CTX->log()->debug("{}() Got response success: {}", __func__, out.err()); + + if(out.err() != 0) { + errno = out.err(); + return -1; + } + + return 0; + + } catch(const std::exception& ex) { CTX->log()->error("{}() while getting rpc output", __func__); - margo_free_output(handle, &out); - margo_destroy(handle); errno = EBUSY; return -1; } - - CTX->log()->debug("{}() Got response: {}", __func__, out.err); - - if(out.err != 0){ - //In case of error out.err contains the - //corresponding value of errno - errno = out.err; - err = -1; - } - - margo_free_output(handle, &out); - margo_destroy(handle); - return err; } int rm_node(const std::string& path, const bool remove_metadentry_only) { @@ -143,8 +130,8 @@ int rm_node(const std::string& path, const bool remove_metadentry_only) { // else, send an rpc to all hosts and thus broadcast chunk_removal. if(remove_metadentry_only) { - auto idx = CTX->distributor()->locate_file_metadata(path); - auto endp = CTX->hosts2().at(idx); + auto endp = CTX->hosts2().at( + CTX->distributor()->locate_file_metadata(path)); try { @@ -159,7 +146,12 @@ int rm_node(const std::string& path, const bool remove_metadentry_only) { CTX->log()->debug("{}() Got response success: {}", __func__, out.err()); - assert(out.err() == 0); + if(out.err() != 0) { + errno = out.err(); + return -1; + } + + return 0; } catch(const std::exception& ex) { CTX->log()->error("{}() while getting rpc output", __func__); @@ -170,7 +162,6 @@ int rm_node(const std::string& path, const bool remove_metadentry_only) { return 0; } - std::size_t rpc_target_size2 = CTX->hosts2().size(); std::vector> handles; -- GitLab From 9575ad6f6f7db4bc5605160cac721baeff4599df Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Fri, 6 Sep 2019 20:16:29 +0200 Subject: [PATCH 13/71] update_metadentry RPC now uses Hermes instead of Margo --- include/client/rpc/hg_rpcs.hpp | 260 +++++++++++++++++++++++++++ src/client/preload.cpp | 2 + src/client/rpc/hg_rpcs.cpp | 1 + src/client/rpc/ld_rpc_metadentry.cpp | 90 +++++----- 4 files changed, 306 insertions(+), 47 deletions(-) diff --git a/include/client/rpc/hg_rpcs.hpp b/include/client/rpc/hg_rpcs.hpp index b08cd3b7e..0963c4bc6 100644 --- a/include/client/rpc/hg_rpcs.hpp +++ b/include/client/rpc/hg_rpcs.hpp @@ -658,6 +658,266 @@ struct decr_size { }; }; +//============================================================================== +// definitions for update_metadentry +struct update_metadentry { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = update_metadentry; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_update_metadentry_in_t; + using mercury_output_type = rpc_err_out_t; + + // RPC public identifier + constexpr static const uint64_t public_id = 99483648; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::update_metadentry; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_update_metadentry_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_err_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, + uint64_t nlink, + uint32_t mode, + uint32_t uid, + uint32_t gid, + int64_t size, + int64_t blocks, + int64_t atime, + int64_t mtime, + int64_t ctime, + bool nlink_flag, + bool mode_flag, + bool size_flag, + bool block_flag, + bool atime_flag, + bool mtime_flag, + bool ctime_flag) : + m_path(path), + m_nlink(nlink), + m_mode(mode), + m_uid(uid), + m_gid(gid), + m_size(size), + m_blocks(blocks), + m_atime(atime), + m_mtime(mtime), + m_ctime(ctime), + m_nlink_flag(nlink_flag), + m_mode_flag(mode_flag), + m_size_flag(size_flag), + m_block_flag(block_flag), + m_atime_flag(atime_flag), + m_mtime_flag(mtime_flag), + m_ctime_flag(ctime_flag) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + uint64_t + nlink() const { + return m_nlink; + } + + uint32_t + mode() const { + return m_mode; + } + + uint32_t + uid() const { + return m_uid; + } + + uint32_t + gid() const { + return m_gid; + } + + int64_t + size() const { + return m_size; + } + + int64_t + blocks() const { + return m_blocks; + } + + int64_t + atime() const { + return m_atime; + } + + int64_t + mtime() const { + return m_mtime; + } + + int64_t + ctime() const { + return m_ctime; + } + + bool + nlink_flag() const { + return m_nlink_flag; + } + + bool + mode_flag() const { + return m_mode_flag; + } + + bool + size_flag() const { + return m_size_flag; + } + + bool + block_flag() const { + return m_block_flag; + } + + bool + atime_flag() const { + return m_atime_flag; + } + + bool + mtime_flag() const { + return m_mtime_flag; + } + + bool + ctime_flag() const { + return m_ctime_flag; + } + + explicit + input(const rpc_update_metadentry_in_t& other) : + m_path(other.path), + m_nlink(other.nlink), + m_mode(other.mode), + m_uid(other.uid), + m_gid(other.gid), + m_size(other.size), + m_blocks(other.blocks), + m_atime(other.atime), + m_mtime(other.mtime), + m_ctime(other.ctime), + m_nlink_flag(other.nlink_flag), + m_mode_flag(other.mode_flag), + m_size_flag(other.size_flag), + m_block_flag(other.block_flag), + m_atime_flag(other.atime_flag), + m_mtime_flag(other.mtime_flag), + m_ctime_flag(other.ctime_flag) { } + + explicit + operator rpc_update_metadentry_in_t() { + return {m_path.c_str(), + m_nlink, + m_mode, + m_uid, + m_gid, + m_size, + m_blocks, + m_atime, + m_mtime, + m_ctime, + m_nlink_flag, + m_mode_flag, + m_size_flag, + m_block_flag, + m_atime_flag, + m_mtime_flag, + m_ctime_flag}; + } + + private: + std::string m_path; + uint64_t m_nlink; + uint32_t m_mode; + uint32_t m_uid; + uint32_t m_gid; + int64_t m_size; + int64_t m_blocks; + int64_t m_atime; + int64_t m_mtime; + int64_t m_ctime; + bool m_nlink_flag; + bool m_mode_flag; + bool m_size_flag; + bool m_block_flag; + bool m_atime_flag; + bool m_mtime_flag; + bool m_ctime_flag; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err() {} + + output(int32_t err) : + m_err(err) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_err_out_t& out) { + m_err = out.err; + } + + int32_t + err() const { + return m_err; + } + + private: + int32_t m_err; + }; +}; + } // namespace rpc } // namespace gkfs diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 79a9cc81f..4cdeec10a 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -194,6 +194,8 @@ bool init_hermes_client(const std::string& transport_prefix) { rpc_mk_node_id = gkfs::rpc::create::public_id; rpc_stat_id = gkfs::rpc::stat::public_id; rpc_rm_node_id = gkfs::rpc::remove::public_id; + rpc_decr_size_id = gkfs::rpc::decr_size::public_id; + rpc_update_metadentry_id = gkfs::rpc::update_metadentry::public_id; return true; } diff --git a/src/client/rpc/hg_rpcs.cpp b/src/client/rpc/hg_rpcs.cpp index 1f8e0aead..fcfdb787a 100644 --- a/src/client/rpc/hg_rpcs.cpp +++ b/src/client/rpc/hg_rpcs.cpp @@ -26,6 +26,7 @@ register_user_request_types() { (void) registered_requests().add(); (void) registered_requests().add(); (void) registered_requests().add(); + (void) registered_requests().add(); } }} // namespace hermes::detail diff --git a/src/client/rpc/ld_rpc_metadentry.cpp b/src/client/rpc/ld_rpc_metadentry.cpp index 6f1580bcc..0b87e16c0 100644 --- a/src/client/rpc/ld_rpc_metadentry.cpp +++ b/src/client/rpc/ld_rpc_metadentry.cpp @@ -162,7 +162,6 @@ int rm_node(const std::string& path, const bool remove_metadentry_only) { return 0; } - std::size_t rpc_target_size2 = CTX->hosts2().size(); std::vector> handles; hermes::endpoint_set endps; @@ -197,56 +196,53 @@ int rm_node(const std::string& path, const bool remove_metadentry_only) { int update_metadentry(const string& path, const Metadata& md, const MetadentryUpdateFlags& md_flags) { - hg_handle_t handle; - rpc_update_metadentry_in_t in{}; - rpc_err_out_t out{}; - int err = EUNKNOWN; - // fill in - // add data - in.path = path.c_str(); - in.size = md_flags.size ? md.size() : 0; - in.nlink = md_flags.link_count ? md.link_count() : 0; - in.blocks = md_flags.blocks ? md.blocks() : 0; - in.atime = md_flags.atime ? md.atime() : 0; - in.mtime = md_flags.mtime ? md.mtime() : 0; - in.ctime = md_flags.ctime ? md.ctime() : 0; - // add data flags - in.size_flag = bool_to_merc_bool(md_flags.size); - in.nlink_flag = bool_to_merc_bool(md_flags.link_count); - in.block_flag = bool_to_merc_bool(md_flags.blocks); - in.atime_flag = bool_to_merc_bool(md_flags.atime); - in.mtime_flag = bool_to_merc_bool(md_flags.mtime); - in.ctime_flag = bool_to_merc_bool(md_flags.ctime); - CTX->log()->debug("{}() Creating Mercury handle ...", __func__); - auto ret = margo_create_wrap(rpc_update_metadentry_id, path, handle); - if (ret != HG_SUCCESS) { - errno = EBUSY; - return -1; - } - // Send rpc - ret = margo_forward_timed_wrap(handle, &in); - // Get response - if (ret == HG_SUCCESS) { - CTX->log()->trace("{}() Waiting for response", __func__); - ret = margo_get_output(handle, &out); - if (ret == HG_SUCCESS) { - CTX->log()->debug("{}() Got response success: {}", __func__, out.err); - err = out.err; - } else { - // something is wrong - errno = EBUSY; - CTX->log()->error("{}() while getting rpc output", __func__); + auto endp = CTX->hosts2().at( + CTX->distributor()->locate_file_metadata(path)); + + try { + + CTX->log()->debug("{}() Sending RPC ...", __func__); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can + // retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = + ld_network_service->post( + endp, + path, + (md_flags.link_count ? md.link_count() : 0), + /* mode */ 0, + /* uid */ 0, + /* gid */ 0, + (md_flags.size ? md.size() : 0), + (md_flags.blocks ? md.blocks() : 0), + (md_flags.atime ? md.atime() : 0), + (md_flags.mtime ? md.mtime() : 0), + (md_flags.ctime ? md.ctime() : 0), + bool_to_merc_bool(md_flags.link_count), + /* mode_flag */ false, + bool_to_merc_bool(md_flags.size), + bool_to_merc_bool(md_flags.blocks), + bool_to_merc_bool(md_flags.atime), + bool_to_merc_bool(md_flags.mtime), + bool_to_merc_bool(md_flags.ctime)).get().at(0); + + CTX->log()->debug("{}() Got response success: {}", __func__, out.err()); + + if(out.err() != 0) { + errno = out.err(); + return -1; } - /* clean up resources consumed by this rpc */ - margo_free_output(handle, &out); - } else { - CTX->log()->warn("{}() timed out", __func__); + + return 0; + + } catch(const std::exception& ex) { + CTX->log()->error("{}() while getting rpc output", __func__); errno = EBUSY; + return -1; } - - margo_destroy(handle); - return err; } int update_metadentry_size(const string& path, const size_t size, const off64_t offset, const bool append_flag, -- GitLab From 086e03faf23b27c55c62e40baacd2df84ea74a11 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Fri, 6 Sep 2019 20:40:21 +0200 Subject: [PATCH 14/71] get_metadentry_size RPC now uses Hermes instead of Margo --- include/client/rpc/hg_rpcs.hpp | 108 +++++++++++++++++++++++++++ src/client/preload.cpp | 1 + src/client/rpc/hg_rpcs.cpp | 1 + src/client/rpc/ld_rpc_metadentry.cpp | 58 ++++++-------- 4 files changed, 134 insertions(+), 34 deletions(-) diff --git a/include/client/rpc/hg_rpcs.hpp b/include/client/rpc/hg_rpcs.hpp index 0963c4bc6..b8e93a57e 100644 --- a/include/client/rpc/hg_rpcs.hpp +++ b/include/client/rpc/hg_rpcs.hpp @@ -918,6 +918,114 @@ struct update_metadentry { }; }; +//============================================================================== +// definitions for get_metadentry_size +struct get_metadentry_size { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = get_metadentry_size; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_path_only_in_t; + using mercury_output_type = rpc_get_metadentry_size_out_t; + + // RPC public identifier + constexpr static const uint64_t public_id = 3426484224; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::get_metadentry_size; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_path_only_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_err_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path ) : + m_path(path) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + explicit + input(const rpc_path_only_in_t& other) : + m_path(other.path) { } + + explicit + operator rpc_path_only_in_t() { + return {m_path.c_str()}; + } + + private: + std::string m_path; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err(), + m_ret_size() {} + + output(int32_t err, int64_t ret_size) : + m_err(err), + m_ret_size(ret_size) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_get_metadentry_size_out_t& out) { + m_err = out.err; + m_ret_size = out.ret_size; + } + + int32_t + err() const { + return m_err; + } + + int64_t + ret_size() const { + return m_ret_size; + } + + private: + int32_t m_err; + int64_t m_ret_size; + }; +}; } // namespace rpc } // namespace gkfs diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 4cdeec10a..b11ff09b9 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -196,6 +196,7 @@ bool init_hermes_client(const std::string& transport_prefix) { rpc_rm_node_id = gkfs::rpc::remove::public_id; rpc_decr_size_id = gkfs::rpc::decr_size::public_id; rpc_update_metadentry_id = gkfs::rpc::update_metadentry::public_id; + rpc_get_metadentry_size_id = gkfs::rpc::get_metadentry_size::public_id; return true; } diff --git a/src/client/rpc/hg_rpcs.cpp b/src/client/rpc/hg_rpcs.cpp index fcfdb787a..848559195 100644 --- a/src/client/rpc/hg_rpcs.cpp +++ b/src/client/rpc/hg_rpcs.cpp @@ -27,6 +27,7 @@ register_user_request_types() { (void) registered_requests().add(); (void) registered_requests().add(); (void) registered_requests().add(); + (void) registered_requests().add(); } }} // namespace hermes::detail diff --git a/src/client/rpc/ld_rpc_metadentry.cpp b/src/client/rpc/ld_rpc_metadentry.cpp index 0b87e16c0..61f97c139 100644 --- a/src/client/rpc/ld_rpc_metadentry.cpp +++ b/src/client/rpc/ld_rpc_metadentry.cpp @@ -297,43 +297,33 @@ int update_metadentry_size(const string& path, const size_t size, const off64_t } int get_metadentry_size(const std::string& path, off64_t& ret_size) { - hg_handle_t handle; - rpc_path_only_in_t in{}; - rpc_get_metadentry_size_out_t out{}; - // add data - in.path = path.c_str(); - int err = EUNKNOWN; - CTX->log()->debug("{}() Creating Mercury handle ...", __func__); - auto ret = margo_create_wrap(rpc_get_metadentry_size_id, path, handle); - if (ret != HG_SUCCESS) { - errno = EBUSY; - return -1; - } - // Send rpc - ret = margo_forward_timed_wrap(handle, &in); - // Get response - if (ret == HG_SUCCESS) { - CTX->log()->trace("{}() Waiting for response", __func__); - ret = margo_get_output(handle, &out); - if (ret == HG_SUCCESS) { - CTX->log()->debug("{}() Got response success: {}", __func__, out.err); - err = out.err; - ret_size = out.ret_size; - } else { - // something is wrong - errno = EBUSY; - ret_size = 0; - CTX->log()->error("{}() while getting rpc output", __func__); - } - /* clean up resources consumed by this rpc */ - margo_free_output(handle, &out); - } else { - CTX->log()->warn("{}() timed out", __func__); + auto endp = CTX->hosts2().at( + CTX->distributor()->locate_file_metadata(path)); + + try { + + CTX->log()->debug("{}() Sending RPC ...", __func__); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can + // retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = + ld_network_service->post( + endp, path).get().at(0); + + CTX->log()->debug("{}() Got response success: {}", __func__, out.err()); + + ret_size = out.ret_size(); + return out.err(); + + } catch(const std::exception& ex) { + CTX->log()->error("{}() while getting rpc output", __func__); errno = EBUSY; + ret_size = 0; + return EUNKNOWN; } - margo_destroy(handle); - return err; } /** -- GitLab From 30a8922807c6dc02b5fc4674ee9888fdcd5623c9 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Sun, 8 Sep 2019 12:58:11 +0200 Subject: [PATCH 15/71] update_metadentry_size RPC now uses Hermes instead of Margo --- include/client/rpc/hg_rpcs.hpp | 137 +++++++++++++++++++++++++++ src/client/preload.cpp | 1 + src/client/rpc/hg_rpcs.cpp | 1 + src/client/rpc/ld_rpc_metadentry.cpp | 74 ++++++--------- 4 files changed, 170 insertions(+), 43 deletions(-) diff --git a/include/client/rpc/hg_rpcs.hpp b/include/client/rpc/hg_rpcs.hpp index b8e93a57e..33ca8810c 100644 --- a/include/client/rpc/hg_rpcs.hpp +++ b/include/client/rpc/hg_rpcs.hpp @@ -1026,6 +1026,143 @@ struct get_metadentry_size { int64_t m_ret_size; }; }; + +//============================================================================== +// definitions for update_metadentry_size +struct update_metadentry_size { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = update_metadentry_size; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_update_metadentry_size_in_t; + using mercury_output_type = rpc_update_metadentry_size_out_t; + + // RPC public identifier + constexpr static const uint64_t public_id = 2760900608; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::update_metadentry_size; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_update_metadentry_size_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_update_metadentry_size_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, + uint64_t size, + int64_t offset, + bool append) : + m_path(path), + m_size(size), + m_offset(offset), + m_append(append) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + uint64_t + size() const { + return m_size; + } + + int64_t + offset() const { + return m_offset; + } + + bool + append() const { + return m_append; + } + + explicit + input(const rpc_update_metadentry_size_in_t& other) : + m_path(other.path), + m_size(other.size), + m_offset(other.offset), + m_append(other.append) { } + + explicit + operator rpc_update_metadentry_size_in_t() { + return {m_path.c_str(), m_size, m_offset, m_append}; + } + + private: + std::string m_path; + uint64_t m_size; + int64_t m_offset; + bool m_append; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err(), + m_ret_size() {} + + output(int32_t err, int64_t ret_size) : + m_err(err), + m_ret_size(ret_size) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_update_metadentry_size_out_t& out) { + m_err = out.err; + m_ret_size = out.ret_size; + } + + int32_t + err() const { + return m_err; + } + + int64_t + ret_size() const { + return m_ret_size; + } + + private: + int32_t m_err; + int64_t m_ret_size; + }; +}; + } // namespace rpc } // namespace gkfs diff --git a/src/client/preload.cpp b/src/client/preload.cpp index b11ff09b9..5ab324f40 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -197,6 +197,7 @@ bool init_hermes_client(const std::string& transport_prefix) { rpc_decr_size_id = gkfs::rpc::decr_size::public_id; rpc_update_metadentry_id = gkfs::rpc::update_metadentry::public_id; rpc_get_metadentry_size_id = gkfs::rpc::get_metadentry_size::public_id; + rpc_update_metadentry_size_id = gkfs::rpc::update_metadentry::public_id; return true; } diff --git a/src/client/rpc/hg_rpcs.cpp b/src/client/rpc/hg_rpcs.cpp index 848559195..86767e3d3 100644 --- a/src/client/rpc/hg_rpcs.cpp +++ b/src/client/rpc/hg_rpcs.cpp @@ -28,6 +28,7 @@ register_user_request_types() { (void) registered_requests().add(); (void) registered_requests().add(); (void) registered_requests().add(); + (void) registered_requests().add(); } }} // namespace hermes::detail diff --git a/src/client/rpc/ld_rpc_metadentry.cpp b/src/client/rpc/ld_rpc_metadentry.cpp index 61f97c139..164097240 100644 --- a/src/client/rpc/ld_rpc_metadentry.cpp +++ b/src/client/rpc/ld_rpc_metadentry.cpp @@ -247,53 +247,41 @@ int update_metadentry(const string& path, const Metadata& md, const MetadentryUp int update_metadentry_size(const string& path, const size_t size, const off64_t offset, const bool append_flag, off64_t& ret_size) { - hg_handle_t handle; - rpc_update_metadentry_size_in_t in{}; - rpc_update_metadentry_size_out_t out{}; - // add data - in.path = path.c_str(); - in.size = size; - in.offset = offset; - if (append_flag) - in.append = HG_TRUE; - else - in.append = HG_FALSE; - int err = EUNKNOWN; - CTX->log()->debug("{}() Creating Mercury handle ...", __func__); - auto ret = margo_create_wrap(rpc_update_metadentry_size_id, path, handle); - if (ret != HG_SUCCESS) { - ret_size = 0; - errno = EBUSY; - margo_destroy(handle); - return -1; - } - // Send rpc - ret = margo_forward_timed_wrap(handle, &in); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() margo forward failed: {}", __func__, HG_Error_to_string(ret)); - ret_size = 0; - errno = EBUSY; - margo_destroy(handle); - return -1; - } + auto endp = CTX->hosts2().at( + CTX->distributor()->locate_file_metadata(path)); - ret = margo_get_output(handle, &out); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() failed to get rpc ouptut: {}", __func__, HG_Error_to_string(ret)); - ret_size = 0; - errno = EBUSY; - margo_free_output(handle, &out); - margo_destroy(handle); - } + try { - CTX->log()->debug("{}() Got response: {}", __func__, out.err); - err = out.err; - ret_size = out.ret_size; + CTX->log()->debug("{}() Sending RPC ...", __func__); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can + // retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = + ld_network_service->post( + endp, path, size, offset, + bool_to_merc_bool(append_flag)).get().at(0); - margo_free_output(handle, &out); - margo_destroy(handle); - return err; + CTX->log()->debug("{}() Got response success: {}", __func__, out.err()); + + if(out.err() != 0) { + errno = out.err(); + return -1; + } + + ret_size = out.ret_size(); + return out.err(); + + return 0; + + } catch(const std::exception& ex) { + CTX->log()->error("{}() while getting rpc output", __func__); + errno = EBUSY; + ret_size = 0; + return EUNKNOWN; + } } int get_metadentry_size(const std::string& path, off64_t& ret_size) { -- GitLab From f0e87d939f0a05db64563106c89e7d982da25e6a Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Sun, 8 Sep 2019 13:26:43 +0200 Subject: [PATCH 16/71] mk_symlink RPC now uses Hermes instead of Margo --- include/client/rpc/hg_rpcs.hpp | 113 +++++++++++++++++++++++++++ src/client/preload.cpp | 4 + src/client/rpc/hg_rpcs.cpp | 6 ++ src/client/rpc/ld_rpc_metadentry.cpp | 61 +++++++-------- 4 files changed, 150 insertions(+), 34 deletions(-) diff --git a/include/client/rpc/hg_rpcs.hpp b/include/client/rpc/hg_rpcs.hpp index 33ca8810c..515c9ef79 100644 --- a/include/client/rpc/hg_rpcs.hpp +++ b/include/client/rpc/hg_rpcs.hpp @@ -1163,6 +1163,119 @@ struct update_metadentry_size { }; }; +#ifdef HAS_SYMLINKS + +//============================================================================== +// definitions for mk_symlink +struct mk_symlink { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = mk_symlink; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_mk_symlink_in_t; + using mercury_output_type = rpc_err_out_t; + + // RPC public identifier + constexpr static const uint64_t public_id = 3207004160; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::mk_symlink; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_mk_symlink_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_err_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, + const std::string& target_path) : + m_path(path), + m_target_path(target_path) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + std::string + target_path() const { + return m_target_path; + } + + explicit + input(const rpc_mk_symlink_in_t& other) : + m_path(other.path), + m_target_path(other.target_path) { } + + explicit + operator rpc_mk_symlink_in_t() { + return {m_path.c_str(), m_target_path.c_str()}; + } + + private: + std::string m_path; + std::string m_target_path; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err() {} + + output(int32_t err) : + m_err(err) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_err_out_t& out) { + m_err = out.err; + } + + int32_t + err() const { + return m_err; + } + + private: + int32_t m_err; + }; +}; + +#endif // HAS_SYMLINKS + } // namespace rpc } // namespace gkfs diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 5ab324f40..d91de9160 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -199,6 +199,10 @@ bool init_hermes_client(const std::string& transport_prefix) { rpc_get_metadentry_size_id = gkfs::rpc::get_metadentry_size::public_id; rpc_update_metadentry_size_id = gkfs::rpc::update_metadentry::public_id; +#ifdef HAS_SYMLINKS + rpc_mk_symlink_id = gkfs::rpc::mk_symlink::public_id; +#endif // HAS_SYMLINKS + return true; } diff --git a/src/client/rpc/hg_rpcs.cpp b/src/client/rpc/hg_rpcs.cpp index 86767e3d3..894ef8a9b 100644 --- a/src/client/rpc/hg_rpcs.cpp +++ b/src/client/rpc/hg_rpcs.cpp @@ -29,6 +29,12 @@ register_user_request_types() { (void) registered_requests().add(); (void) registered_requests().add(); (void) registered_requests().add(); + +#ifdef HAS_SYMLINKS + (void) registered_requests().add(); +#endif // HAS_SYMLINKS + + } }} // namespace hermes::detail diff --git a/src/client/rpc/ld_rpc_metadentry.cpp b/src/client/rpc/ld_rpc_metadentry.cpp index 164097240..b441dcf25 100644 --- a/src/client/rpc/ld_rpc_metadentry.cpp +++ b/src/client/rpc/ld_rpc_metadentry.cpp @@ -416,43 +416,36 @@ void get_dirents(OpenDir& open_dir){ #ifdef HAS_SYMLINKS int mk_symlink(const std::string& path, const std::string& target_path) { - hg_handle_t handle; - rpc_mk_symlink_in_t in{}; - rpc_err_out_t out{}; - int err = EUNKNOWN; - // fill in - in.path = path.c_str(); - in.target_path = target_path.c_str(); - // Create handle - CTX->log()->debug("{}() Creating Mercury handle ...", __func__); - auto ret = margo_create_wrap(rpc_mk_symlink_id, path, handle); - if (ret != HG_SUCCESS) { - errno = EBUSY; - return -1; - } - // Send rpc - CTX->log()->debug("{}() About to send RPC ...", __func__); - ret = margo_forward_timed_wrap(handle, &in); - // Get response - if (ret == HG_SUCCESS) { - CTX->log()->trace("{}() Waiting for response", __func__); - ret = margo_get_output(handle, &out); - if (ret == HG_SUCCESS) { - CTX->log()->debug("{}() Got response success: {}", __func__, out.err); - err = out.err; - } else { - // something is wrong - errno = EBUSY; - CTX->log()->error("{}() while getting rpc output", __func__); + + auto endp = CTX->hosts2().at( + CTX->distributor()->locate_file_metadata(path)); + + try { + + CTX->log()->debug("{}() Sending RPC ...", __func__); + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can + // retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + auto out = + ld_network_service->post( + endp, path, target_path).get().at(0); + + CTX->log()->debug("{}() Got response success: {}", __func__, out.err()); + + if(out.err() != 0) { + errno = out.err(); + return -1; } - /* clean up resources consumed by this rpc */ - margo_free_output(handle, &out); - } else { - CTX->log()->warn("{}() timed out"); + + return 0; + + } catch(const std::exception& ex) { + CTX->log()->error("{}() while getting rpc output", __func__); errno = EBUSY; + return -1; } - margo_destroy(handle); - return err; } #endif -- GitLab From 5dfc9820fbd96eaf7e314842dcf12360e1b3a5f9 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 9 Sep 2019 09:44:57 +0200 Subject: [PATCH 17/71] write_data RPC now uses Hermes instead of Margo --- include/client/rpc/hg_rpcs.hpp | 191 +++++++++++++++++++++++++++ src/client/preload.cpp | 2 + src/client/rpc/hg_rpcs.cpp | 1 + src/client/rpc/ld_rpc_data_ws.cpp | 209 ++++++++++++++++++------------ 4 files changed, 322 insertions(+), 81 deletions(-) diff --git a/include/client/rpc/hg_rpcs.hpp b/include/client/rpc/hg_rpcs.hpp index 515c9ef79..3a13cd0c2 100644 --- a/include/client/rpc/hg_rpcs.hpp +++ b/include/client/rpc/hg_rpcs.hpp @@ -1276,6 +1276,197 @@ struct mk_symlink { #endif // HAS_SYMLINKS +//============================================================================== +// definitions for write_data +struct write_data { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = write_data; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_write_data_in_t; + using mercury_output_type = rpc_data_out_t; + + // RPC public identifier + constexpr static const uint64_t public_id = 3716481024; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::write_data; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_write_data_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_data_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, + int64_t offset, + uint64_t host_id, + uint64_t host_size, + uint64_t chunk_n, + uint64_t chunk_start, + uint64_t chunk_end, + uint64_t total_chunk_size, + const hermes::exposed_memory& buffers) : + m_path(path), + m_offset(offset), + m_host_id(host_id), + m_host_size(host_size), + m_chunk_n(chunk_n), + m_chunk_start(chunk_start), + m_chunk_end(chunk_end), + m_total_chunk_size(total_chunk_size), + m_buffers(buffers) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + int64_t + offset() const { + return m_offset; + } + + uint64_t + host_id() const { + return m_host_id; + } + + uint64_t + host_size() const { + return m_host_size; + } + + uint64_t + chunk_n() const { + return m_chunk_n; + } + + uint64_t + chunk_start() const { + return m_chunk_start; + } + + uint64_t + chunk_end() const { + return m_chunk_end; + } + + uint64_t + total_chunk_size() const { + return m_total_chunk_size; + } + + hermes::exposed_memory + buffers() const { + return m_buffers; + } + + explicit + input(const rpc_write_data_in_t& other) : + m_path(other.path), + m_offset(other.offset), + m_host_id(other.host_id), + m_host_size(other.host_size), + m_chunk_n(other.chunk_n), + m_chunk_start(other.chunk_start), + m_chunk_end(other.chunk_end), + m_total_chunk_size(other.total_chunk_size), + m_buffers(other.bulk_handle) { } + + explicit + operator rpc_write_data_in_t() { + return { + m_path.c_str(), + m_offset, + m_host_id, + m_host_size, + m_chunk_n, + m_chunk_start, + m_chunk_end, + m_total_chunk_size, + hg_bulk_t(m_buffers) + }; + } + + private: + std::string m_path; + int64_t m_offset; + uint64_t m_host_id; + uint64_t m_host_size; + uint64_t m_chunk_n; + uint64_t m_chunk_start; + uint64_t m_chunk_end; + uint64_t m_total_chunk_size; + hermes::exposed_memory m_buffers; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err(), + m_io_size() {} + + output(int32_t err, size_t io_size) : + m_err(err), + m_io_size(io_size) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_data_out_t& out) { + m_err = out.err; + m_io_size = out.io_size; + } + + int32_t + err() const { + return m_err; + } + + int64_t + io_size() const { + return m_io_size; + } + + private: + int32_t m_err; + size_t m_io_size; + }; +}; + } // namespace rpc } // namespace gkfs diff --git a/src/client/preload.cpp b/src/client/preload.cpp index d91de9160..bc8883098 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -203,6 +203,8 @@ bool init_hermes_client(const std::string& transport_prefix) { rpc_mk_symlink_id = gkfs::rpc::mk_symlink::public_id; #endif // HAS_SYMLINKS + rpc_write_data_id = gkfs::rpc::write_data::public_id; + return true; } diff --git a/src/client/rpc/hg_rpcs.cpp b/src/client/rpc/hg_rpcs.cpp index 894ef8a9b..a8dc2d9f0 100644 --- a/src/client/rpc/hg_rpcs.cpp +++ b/src/client/rpc/hg_rpcs.cpp @@ -34,6 +34,7 @@ register_user_request_types() { (void) registered_requests().add(); #endif // HAS_SYMLINKS + (void) registered_requests().add(); } diff --git a/src/client/rpc/ld_rpc_data_ws.cpp b/src/client/rpc/ld_rpc_data_ws.cpp index 5b13c7dd0..7651a67ea 100644 --- a/src/client/rpc/ld_rpc_data_ws.cpp +++ b/src/client/rpc/ld_rpc_data_ws.cpp @@ -17,6 +17,7 @@ #include "global/rpc/rpc_types.hpp" #include #include +#include #include @@ -32,122 +33,168 @@ using namespace std; /** * Sends an RPC request to a specific node to pull all chunks that belong to him */ -ssize_t write(const string& path, const void* buf, const bool append_flag, const off64_t in_offset, - const size_t write_size, const int64_t updated_metadentry_size) { +ssize_t write(const string& path, const void* buf, const bool append_flag, + const off64_t in_offset, const size_t write_size, + const int64_t updated_metadentry_size) { + assert(write_size > 0); - // Calculate chunkid boundaries and numbers so that daemons know in which interval to look for chunks - off64_t offset = in_offset; - if (append_flag) - offset = updated_metadentry_size - write_size; + + // Calculate chunkid boundaries and numbers so that daemons know in + // which interval to look for chunks + off64_t offset = append_flag ? + in_offset : + (updated_metadentry_size - write_size); auto chnk_start = chnk_id_for_offset(offset, CHUNKSIZE); auto chnk_end = chnk_id_for_offset((offset + write_size) - 1, CHUNKSIZE); - // Collect all chunk ids within count that have the same destination so that those are send in one rpc bulk transfer - map> target_chnks{}; - // contains the target ids, used to access the target_chnks map. First idx is chunk with potential offset - vector targets{}; + // Collect all chunk ids within count that have the same destination so + // that those are send in one rpc bulk transfer + std::map> target_chnks{}; + // contains the target ids, used to access the target_chnks map. + // First idx is chunk with potential offset + std::vector targets{}; + // targets for the first and last chunk as they need special treatment uint64_t chnk_start_target = 0; uint64_t chnk_end_target = 0; + for (uint64_t chnk_id = chnk_start; chnk_id <= chnk_end; chnk_id++) { auto target = CTX->distributor()->locate_data(path, chnk_id); + if (target_chnks.count(target) == 0) { - target_chnks.insert(make_pair(target, vector{chnk_id})); + target_chnks.insert( + std::make_pair(target, std::vector{chnk_id})); targets.push_back(target); - } else + } else { target_chnks[target].push_back(chnk_id); + } + // set first and last chnk targets - if (chnk_id == chnk_start) + if (chnk_id == chnk_start) { chnk_start_target = target; - if (chnk_id == chnk_end) + } + + if (chnk_id == chnk_end) { chnk_end_target = target; + } } + // some helper variables for async RPC - auto target_n = targets.size(); - vector rpc_handles(target_n); - vector rpc_waiters(target_n); - vector rpc_in(target_n); - // register local target buffer for bulk access for margo instance - auto bulk_buf = const_cast(buf); - hg_bulk_t rpc_bulk_handle = nullptr; - auto size = make_shared(write_size); - auto ret = margo_bulk_create(ld_margo_rpc_id, 1, &bulk_buf, size.get(), HG_BULK_READ_ONLY, &rpc_bulk_handle); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Failed to create rpc bulk handle", __func__); + + std::vector bufseq{ + hermes::mutable_buffer{const_cast(buf), write_size}, + }; + + hermes::exposed_memory local_buffers; + + try { + local_buffers = + ld_network_service->expose(bufseq, hermes::access_mode::read_only); + + } catch (const std::exception& ex) { + CTX->log()->error("{}() Failed to expose buffers for RMA", __func__); errno = EBUSY; return -1; } + std::vector> handles; + // Issue non-blocking RPC requests and wait for the result later - for (uint64_t i = 0; i < target_n; i++) { - auto target = targets[i]; - auto total_chunk_size = target_chnks[target].size() * CHUNKSIZE; // total chunk_size for target - if (target == chnk_start_target) // receiver of first chunk must subtract the offset from first chunk + for(const auto& target : targets) { + + // total chunk_size for target + auto total_chunk_size = target_chnks[target].size() * CHUNKSIZE; + + // receiver of first chunk must subtract the offset from first chunk + if (target == chnk_start_target) { total_chunk_size -= chnk_lpad(offset, CHUNKSIZE); - if (target == chnk_end_target) // receiver of last chunk must subtract + } + + // receiver of last chunk must subtract + if (target == chnk_end_target) { total_chunk_size -= chnk_rpad(offset + write_size, CHUNKSIZE); - // Fill RPC input - rpc_in[i].path = path.c_str(); - rpc_in[i].host_id = target; - rpc_in[i].host_size = CTX->hosts().size(); - rpc_in[i].offset = chnk_lpad(offset, CHUNKSIZE);// first offset in targets is the chunk with a potential offset - rpc_in[i].chunk_n = target_chnks[target].size(); // number of chunks handled by that destination - rpc_in[i].chunk_start = chnk_start; // chunk start id of this write - rpc_in[i].chunk_end = chnk_end; // chunk end id of this write - rpc_in[i].total_chunk_size = total_chunk_size; // total size to write - rpc_in[i].bulk_handle = rpc_bulk_handle; - margo_create_wrap_helper(rpc_write_data_id, target, rpc_handles[i]); - // Send RPC - CTX->log()->trace("{}() host: {}, path: {}, chunks: {}, size: {}, offset: {}", __func__, - target, path, rpc_in[i].chunk_n, total_chunk_size, rpc_in[i].offset); - ret = margo_iforward(rpc_handles[i], &rpc_in[i], &rpc_waiters[i]); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Unable to send non-blocking rpc for path {} and recipient {}", __func__, path, - target); + } + + auto endp = CTX->hosts2().at( + CTX->distributor()->locate_file_metadata(path)); + + try { + + CTX->log()->debug("{}() Sending RPC ...", __func__); + + gkfs::rpc::write_data::input in( + path, + // first offset in targets is the chunk with + // a potential offset + chnk_lpad(offset, CHUNKSIZE), + target, + CTX->hosts2().size(), + // number of chunks handled by that destination + target_chnks[target].size(), + // chunk start id of this write + chnk_start, + // chunk end id of this write + chnk_end, + // total size to write + total_chunk_size, + local_buffers); + + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that + // we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + handles.emplace_back( + ld_network_service->post(endp, in)); + + CTX->log()->trace("{}() host: {}, path: {}, chunks: {}, size: {}, " + "offset: {}", __func__, + target, path, in.chunk_n(), + total_chunk_size, in.offset()); + + } catch(const std::exception& ex) { + CTX->log()->error("{}() Unable to send non-blocking rpc for " + "path {} and recipient {}", __func__, path, + target); errno = EBUSY; - for (uint64_t j = 0; j < i + 1; j++) { - margo_destroy(rpc_handles[j]); - } - // free bulk handles for buffer - margo_bulk_free(rpc_bulk_handle); return -1; } } - // Wait for RPC responses and then get response and add it to out_size which is the written size - // All potential outputs are served to free resources regardless of errors, although an errorcode is set. - ssize_t out_size = 0; + // Wait for RPC responses and then get response and add it to out_size + // which is the written size All potential outputs are served to free + // resources regardless of errors, although an errorcode is set. bool error = false; - for (unsigned int i = 0; i < target_n; i++) { - // XXX We might need a timeout here to not wait forever for an output that never comes? - ret = margo_wait(rpc_waiters[i]); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Unable to wait for margo_request handle for path {} recipient {}", __func__, path, - targets[i]); - error = true; - errno = EBUSY; - } - // decode response - rpc_data_out_t out{}; - ret = margo_get_output(rpc_handles[i], &out); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Failed to get rpc output for path {} recipient {}", __func__, path, targets[i]); + ssize_t out_size = 0; + std::size_t idx = 0; + + for(const auto& h : handles) { + try { + // XXX We might need a timeout here to not wait forever for an + // output that never comes? + auto out = h.get().at(0); + + if(out.err() != 0) { + CTX->log()->error("{}() Daemon reported error: {}", + __func__, out.err()); + error = true; + errno = out.err(); + } + + out_size += static_cast(out.io_size()); + + } catch(const std::exception& ex) { + CTX->log()->error("{}() Failed to get rpc output for path {} " + "recipient {}", __func__, path, targets[idx]); error = true; errno = EIO; } - if (out.err != 0) { - CTX->log()->error("{}() Daemon reported error: {}", __func__, out.err); - error = true; - errno = out.err; - } - out_size += static_cast(out.io_size); - margo_free_output(rpc_handles[i], &out); - margo_destroy(rpc_handles[i]); + + ++idx; } - // free bulk handles for buffer - margo_bulk_free(rpc_bulk_handle); - return (error) ? -1 : out_size; + + return error ? -1 : out_size; } /** -- GitLab From 105158f2cf65ea139171874879e1ff99d12d1080 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 9 Sep 2019 11:10:00 +0200 Subject: [PATCH 18/71] Fix HG_address leak due to wrong destructor order --- include/client/preload_context.hpp | 2 ++ src/client/preload.cpp | 4 ++++ src/client/preload_context.cpp | 5 +++++ src/client/preload_util.cpp | 4 ++++ src/client/rpc/ld_rpc_data_ws.cpp | 4 ++++ 5 files changed, 19 insertions(+) diff --git a/include/client/preload_context.hpp b/include/client/preload_context.hpp index 96b0d8ef7..345a61d2b 100644 --- a/include/client/preload_context.hpp +++ b/include/client/preload_context.hpp @@ -96,6 +96,8 @@ class PreloadContext { const std::vector& hosts2() const; void hosts2(const std::vector& addrs); + void clear_hosts(); + uint64_t local_host_id() const; void local_host_id(uint64_t id); diff --git a/src/client/preload.cpp b/src/client/preload.cpp index bc8883098..045be5997 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -308,6 +308,10 @@ void destroy_preload() { return; } cleanup_addresses(); + + CTX->log()->debug("{}() About to finalize the Hermes RPC client", __func__); + ld_network_service.reset(); + CTX->log()->debug("{}() About to finalize the margo RPC client", __func__); // XXX Sometimes this hangs on the cluster. Investigate. margo_finalize(ld_margo_rpc_id); diff --git a/src/client/preload_context.cpp b/src/client/preload_context.cpp index 530f665f7..7e466298e 100644 --- a/src/client/preload_context.cpp +++ b/src/client/preload_context.cpp @@ -66,6 +66,7 @@ const std::vector& PreloadContext::hosts() const { void PreloadContext::hosts(const std::vector& addrs) { hosts_ = addrs; } + #endif const std::vector& PreloadContext::hosts2() const { @@ -76,6 +77,10 @@ void PreloadContext::hosts2(const std::vector& endpoints) { hosts2_ = endpoints; } +void PreloadContext::clear_hosts() { + hosts2_.clear(); +} + uint64_t PreloadContext::local_host_id() const { return local_host_id_; } diff --git a/src/client/preload_util.cpp b/src/client/preload_util.cpp index 5a5b303ca..ac7b72b59 100644 --- a/src/client/preload_util.cpp +++ b/src/client/preload_util.cpp @@ -259,9 +259,13 @@ void load_hosts() { } void cleanup_addresses() { +#if 1 //TODO(amiranda) remove for (auto& addr: CTX->hosts()) { margo_addr_free(ld_margo_rpc_id, addr); } +#endif + + CTX->clear_hosts(); } diff --git a/src/client/rpc/ld_rpc_data_ws.cpp b/src/client/rpc/ld_rpc_data_ws.cpp index 7651a67ea..7bdfe5d84 100644 --- a/src/client/rpc/ld_rpc_data_ws.cpp +++ b/src/client/rpc/ld_rpc_data_ws.cpp @@ -101,6 +101,10 @@ ssize_t write(const string& path, const void* buf, const bool append_flag, std::vector> handles; // Issue non-blocking RPC requests and wait for the result later + // + // TODO(amiranda): This could be simplified by adding a vector of inputs + // to async_engine::broadcast(). This would allow us to avoid manually + // looping over handles as we do below for(const auto& target : targets) { // total chunk_size for target -- GitLab From e3fc9839e1f871a4275efe24b7fb10b0280e7f29 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 9 Sep 2019 12:00:56 +0200 Subject: [PATCH 19/71] read_data RPC now uses Hermes instead of Margo --- include/client/rpc/hg_rpcs.hpp | 213 ++++++++++++++++++++++++++++++ src/client/preload.cpp | 1 + src/client/rpc/hg_rpcs.cpp | 1 + src/client/rpc/ld_rpc_data_ws.cpp | 204 +++++++++++++++++----------- 4 files changed, 343 insertions(+), 76 deletions(-) diff --git a/include/client/rpc/hg_rpcs.hpp b/include/client/rpc/hg_rpcs.hpp index 3a13cd0c2..175a37e15 100644 --- a/include/client/rpc/hg_rpcs.hpp +++ b/include/client/rpc/hg_rpcs.hpp @@ -69,6 +69,8 @@ struct fs_config { using mercury_output_type = rpc_config_out_t; // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) constexpr static const uint64_t public_id = 3033006080; // RPC internal Mercury identifier @@ -246,6 +248,8 @@ struct create { using mercury_output_type = rpc_err_out_t; // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) constexpr static const uint64_t public_id = 796590080; // RPC internal Mercury identifier @@ -355,6 +359,8 @@ struct stat { using mercury_output_type = rpc_stat_out_t; // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) constexpr static const uint64_t public_id = 1396244480; // RPC internal Mercury identifier @@ -467,6 +473,8 @@ struct remove { using mercury_output_type = rpc_err_out_t; // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) constexpr static const uint64_t public_id = 2549415936; // RPC internal Mercury identifier @@ -567,6 +575,8 @@ struct decr_size { using mercury_output_type = rpc_err_out_t; // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) constexpr static const uint64_t public_id = 1291649024; // RPC internal Mercury identifier @@ -675,6 +685,8 @@ struct update_metadentry { using mercury_output_type = rpc_err_out_t; // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) constexpr static const uint64_t public_id = 99483648; // RPC internal Mercury identifier @@ -935,6 +947,8 @@ struct get_metadentry_size { using mercury_output_type = rpc_get_metadentry_size_out_t; // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) constexpr static const uint64_t public_id = 3426484224; // RPC internal Mercury identifier @@ -1044,6 +1058,8 @@ struct update_metadentry_size { using mercury_output_type = rpc_update_metadentry_size_out_t; // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) constexpr static const uint64_t public_id = 2760900608; // RPC internal Mercury identifier @@ -1182,6 +1198,8 @@ struct mk_symlink { using mercury_output_type = rpc_err_out_t; // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) constexpr static const uint64_t public_id = 3207004160; // RPC internal Mercury identifier @@ -1293,6 +1311,8 @@ struct write_data { using mercury_output_type = rpc_data_out_t; // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) constexpr static const uint64_t public_id = 3716481024; // RPC internal Mercury identifier @@ -1467,6 +1487,199 @@ struct write_data { }; }; +//============================================================================== +// definitions for read_data +struct read_data { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = read_data; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_read_data_in_t; + using mercury_output_type = rpc_data_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 892207104; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::read_data; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_read_data_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_data_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, + int64_t offset, + uint64_t host_id, + uint64_t host_size, + uint64_t chunk_n, + uint64_t chunk_start, + uint64_t chunk_end, + uint64_t total_chunk_size, + const hermes::exposed_memory& buffers) : + m_path(path), + m_offset(offset), + m_host_id(host_id), + m_host_size(host_size), + m_chunk_n(chunk_n), + m_chunk_start(chunk_start), + m_chunk_end(chunk_end), + m_total_chunk_size(total_chunk_size), + m_buffers(buffers) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + int64_t + offset() const { + return m_offset; + } + + uint64_t + host_id() const { + return m_host_id; + } + + uint64_t + host_size() const { + return m_host_size; + } + + uint64_t + chunk_n() const { + return m_chunk_n; + } + + uint64_t + chunk_start() const { + return m_chunk_start; + } + + uint64_t + chunk_end() const { + return m_chunk_end; + } + + uint64_t + total_chunk_size() const { + return m_total_chunk_size; + } + + hermes::exposed_memory + buffers() const { + return m_buffers; + } + + explicit + input(const rpc_read_data_in_t& other) : + m_path(other.path), + m_offset(other.offset), + m_host_id(other.host_id), + m_host_size(other.host_size), + m_chunk_n(other.chunk_n), + m_chunk_start(other.chunk_start), + m_chunk_end(other.chunk_end), + m_total_chunk_size(other.total_chunk_size), + m_buffers(other.bulk_handle) { } + + explicit + operator rpc_read_data_in_t() { + return { + m_path.c_str(), + m_offset, + m_host_id, + m_host_size, + m_chunk_n, + m_chunk_start, + m_chunk_end, + m_total_chunk_size, + hg_bulk_t(m_buffers) + }; + } + + private: + std::string m_path; + int64_t m_offset; + uint64_t m_host_id; + uint64_t m_host_size; + uint64_t m_chunk_n; + uint64_t m_chunk_start; + uint64_t m_chunk_end; + uint64_t m_total_chunk_size; + hermes::exposed_memory m_buffers; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err(), + m_io_size() {} + + output(int32_t err, size_t io_size) : + m_err(err), + m_io_size(io_size) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_data_out_t& out) { + m_err = out.err; + m_io_size = out.io_size; + } + + int32_t + err() const { + return m_err; + } + + int64_t + io_size() const { + return m_io_size; + } + + private: + int32_t m_err; + size_t m_io_size; + }; +}; + } // namespace rpc } // namespace gkfs diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 045be5997..d7b6e94f0 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -204,6 +204,7 @@ bool init_hermes_client(const std::string& transport_prefix) { #endif // HAS_SYMLINKS rpc_write_data_id = gkfs::rpc::write_data::public_id; + rpc_read_data_id = gkfs::rpc::read_data::public_id; return true; } diff --git a/src/client/rpc/hg_rpcs.cpp b/src/client/rpc/hg_rpcs.cpp index a8dc2d9f0..80aaab5ce 100644 --- a/src/client/rpc/hg_rpcs.cpp +++ b/src/client/rpc/hg_rpcs.cpp @@ -35,6 +35,7 @@ register_user_request_types() { #endif // HAS_SYMLINKS (void) registered_requests().add(); + (void) registered_requests().add(); } diff --git a/src/client/rpc/ld_rpc_data_ws.cpp b/src/client/rpc/ld_rpc_data_ws.cpp index 7bdfe5d84..c7d6a8a62 100644 --- a/src/client/rpc/ld_rpc_data_ws.cpp +++ b/src/client/rpc/ld_rpc_data_ws.cpp @@ -81,11 +81,12 @@ ssize_t write(const string& path, const void* buf, const bool append_flag, } // some helper variables for async RPC - std::vector bufseq{ hermes::mutable_buffer{const_cast(buf), write_size}, }; + // expose user buffers so that they can serve as RDMA data sources + // (these are automatically "unexposed" when the destructor is called) hermes::exposed_memory local_buffers; try { @@ -205,113 +206,164 @@ ssize_t write(const string& path, const void* buf, const bool append_flag, * Sends an RPC request to a specific node to push all chunks that belong to him */ ssize_t read(const string& path, void* buf, const off64_t offset, const size_t read_size) { - // Calculate chunkid boundaries and numbers so that daemons know in which interval to look for chunks - auto chnk_start = chnk_id_for_offset(offset, CHUNKSIZE); // first chunk number + + // Calculate chunkid boundaries and numbers so that daemons know in which + // interval to look for chunks + auto chnk_start = chnk_id_for_offset(offset, CHUNKSIZE); auto chnk_end = chnk_id_for_offset((offset + read_size - 1), CHUNKSIZE); - // Collect all chunk ids within count that have the same destination so that those are send in one rpc bulk transfer - map> target_chnks{}; - // contains the recipient ids, used to access the target_chnks map. First idx is chunk with potential offset - vector targets{}; + // Collect all chunk ids within count that have the same destination so + // that those are send in one rpc bulk transfer + std::map> target_chnks{}; + // contains the recipient ids, used to access the target_chnks map. + // First idx is chunk with potential offset + std::vector targets{}; + // targets for the first and last chunk as they need special treatment uint64_t chnk_start_target = 0; uint64_t chnk_end_target = 0; + for (uint64_t chnk_id = chnk_start; chnk_id <= chnk_end; chnk_id++) { auto target = CTX->distributor()->locate_data(path, chnk_id); + if (target_chnks.count(target) == 0) { - target_chnks.insert(make_pair(target, vector{chnk_id})); + target_chnks.insert( + std::make_pair(target, std::vector{chnk_id})); targets.push_back(target); - } else + } else { target_chnks[target].push_back(chnk_id); + } + // set first and last chnk targets - if (chnk_id == chnk_start) + if (chnk_id == chnk_start) { chnk_start_target = target; - if (chnk_id == chnk_end) + } + + if (chnk_id == chnk_end) { chnk_end_target = target; + } } - // some helper variables for async RPC - auto target_n = targets.size(); - vector rpc_handles(target_n); - vector rpc_waiters(target_n); - vector rpc_in(target_n); - // register local target buffer for bulk access for margo instance - auto bulk_buf = buf; - hg_bulk_t rpc_bulk_handle = nullptr; - auto size = make_shared(read_size); - auto ret = margo_bulk_create(ld_margo_rpc_id, 1, &bulk_buf, size.get(), HG_BULK_WRITE_ONLY, &rpc_bulk_handle); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Failed to create rpc bulk handle", __func__); + + // some helper variables for async RPCs + std::vector bufseq{ + hermes::mutable_buffer{buf, read_size}, + }; + + // expose user buffers so that they can serve as RDMA data targets + // (these are automatically "unexposed" when the destructor is called) + hermes::exposed_memory local_buffers; + + try { + local_buffers = + ld_network_service->expose(bufseq, hermes::access_mode::write_only); + + } catch (const std::exception& ex) { + CTX->log()->error("{}() Failed to expose buffers for RMA", __func__); errno = EBUSY; return -1; } + + std::vector> handles; + // Issue non-blocking RPC requests and wait for the result later - for (unsigned int i = 0; i < target_n; i++) { - auto target = targets[i]; + // + // TODO(amiranda): This could be simplified by adding a vector of inputs + // to async_engine::broadcast(). This would allow us to avoid manually + // looping over handles as we do below + for(const auto& target : targets) { + + // total chunk_size for target auto total_chunk_size = target_chnks[target].size() * CHUNKSIZE; - if (target == chnk_start_target) // receiver of first chunk must subtract the offset from first chunk + + // receiver of first chunk must subtract the offset from first chunk + if (target == chnk_start_target) { total_chunk_size -= chnk_lpad(offset, CHUNKSIZE); - if (target == chnk_end_target) // receiver of last chunk must subtract + } + + // receiver of last chunk must subtract + if (target == chnk_end_target) { total_chunk_size -= chnk_rpad(offset + read_size, CHUNKSIZE); + } - // Fill RPC input - rpc_in[i].path = path.c_str(); - rpc_in[i].host_id = target; - rpc_in[i].host_size = CTX->hosts().size(); - rpc_in[i].offset = chnk_lpad(offset, CHUNKSIZE);// first offset in targets is the chunk with a potential offset - rpc_in[i].chunk_n = target_chnks[target].size(); // number of chunks handled by that destination - rpc_in[i].chunk_start = chnk_start; // chunk start id of this write - rpc_in[i].chunk_end = chnk_end; // chunk end id of this write - rpc_in[i].total_chunk_size = total_chunk_size; // total size to write - rpc_in[i].bulk_handle = rpc_bulk_handle; - margo_create_wrap_helper(rpc_read_data_id, target, rpc_handles[i]); - // Send RPC - ret = margo_iforward(rpc_handles[i], &rpc_in[i], &rpc_waiters[i]); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Unable to send non-blocking rpc for path {} and recipient {}", __func__, path, - target); + auto endp = CTX->hosts2().at( + CTX->distributor()->locate_file_metadata(path)); + + try { + + CTX->log()->debug("{}() Sending RPC ...", __func__); + + gkfs::rpc::read_data::input in( + path, + // first offset in targets is the chunk with + // a potential offset + chnk_lpad(offset, CHUNKSIZE), + target, + CTX->hosts2().size(), + // number of chunks handled by that destination + target_chnks[target].size(), + // chunk start id of this write + chnk_start, + // chunk end id of this write + chnk_end, + // total size to write + total_chunk_size, + local_buffers); + + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that + // we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + handles.emplace_back( + ld_network_service->post(endp, in)); + + CTX->log()->trace("{}() host: {}, path: {}, chunks: {}, size: {}, " + "offset: {}", __func__, + target, path, in.chunk_n(), + total_chunk_size, in.offset()); + + } catch(const std::exception& ex) { + CTX->log()->error("{}() Unable to send non-blocking rpc for " + "path {} and recipient {}", __func__, path, + target); errno = EBUSY; - for (uint64_t j = 0; j < i + 1; j++) { - margo_destroy(rpc_handles[j]); - } - // free bulk handles for buffer - margo_bulk_free(rpc_bulk_handle); return -1; } } - // Wait for RPC responses and then get response and add it to out_size which is the read size - // All potential outputs are served to free resources regardless of errors, although an errorcode is set. - ssize_t out_size = 0; + // Wait for RPC responses and then get response and add it to out_size + // which is the read size. All potential outputs are served to free + // resources regardless of errors, although an errorcode is set. bool error = false; - for (unsigned int i = 0; i < target_n; i++) { - // XXX We might need a timeout here to not wait forever for an output that never comes? - ret = margo_wait(rpc_waiters[i]); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Unable to wait for margo_request handle for path {} recipient {}", __func__, path, - targets[i]); - error = true; - errno = EBUSY; - } - // decode response - rpc_data_out_t out{}; - ret = margo_get_output(rpc_handles[i], &out); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Failed to get rpc output for path {} recipient {}", __func__, path, targets[i]); + ssize_t out_size = 0; + std::size_t idx = 0; + + for(const auto& h : handles) { + try { + // XXX We might need a timeout here to not wait forever for an + // output that never comes? + auto out = h.get().at(0); + + if(out.err() != 0) { + CTX->log()->error("{}() Daemon reported error: {}", + __func__, out.err()); + error = true; + errno = out.err(); + } + + out_size += static_cast(out.io_size()); + + } catch(const std::exception& ex) { + CTX->log()->error("{}() Failed to get rpc output for path {} " + "recipient {}", __func__, path, targets[idx]); error = true; errno = EIO; } - if (out.err != 0) { - CTX->log()->error("{}() Daemon reported error: {}", __func__, out.err); - error = true; - errno = out.err; - } - out_size += static_cast(out.io_size); - margo_free_output(rpc_handles[i], &out); - margo_destroy(rpc_handles[i]); + + ++idx; } - // free bulk handles for buffer - margo_bulk_free(rpc_bulk_handle); - return (error) ? -1 : out_size; + + return error ? -1 : out_size; } int trunc_data(const std::string& path, size_t current_size, size_t new_size) { -- GitLab From 094fb3b888eafe3bcb75be0859d900a960039407 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 9 Sep 2019 12:21:07 +0200 Subject: [PATCH 20/71] Fix target lookup error in read() and write() --- src/client/rpc/ld_rpc_data_ws.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/client/rpc/ld_rpc_data_ws.cpp b/src/client/rpc/ld_rpc_data_ws.cpp index c7d6a8a62..74767cfe8 100644 --- a/src/client/rpc/ld_rpc_data_ws.cpp +++ b/src/client/rpc/ld_rpc_data_ws.cpp @@ -121,8 +121,7 @@ ssize_t write(const string& path, const void* buf, const bool append_flag, total_chunk_size -= chnk_rpad(offset + write_size, CHUNKSIZE); } - auto endp = CTX->hosts2().at( - CTX->distributor()->locate_file_metadata(path)); + auto endp = CTX->hosts2().at(target); try { @@ -285,8 +284,7 @@ ssize_t read(const string& path, void* buf, const off64_t offset, const size_t r total_chunk_size -= chnk_rpad(offset + read_size, CHUNKSIZE); } - auto endp = CTX->hosts2().at( - CTX->distributor()->locate_file_metadata(path)); + auto endp = CTX->hosts2().at(target); try { -- GitLab From ced527c41eaa1e3f14ed95765425ab038b19ebeb Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 9 Sep 2019 13:05:57 +0200 Subject: [PATCH 21/71] trunc_data RPC now uses Hermes instead of Margo --- include/client/rpc/hg_rpcs.hpp | 114 ++++++++++++++++++++++++++++++ src/client/preload.cpp | 1 + src/client/rpc/hg_rpcs.cpp | 1 + src/client/rpc/ld_rpc_data_ws.cpp | 101 ++++++++++++-------------- 4 files changed, 161 insertions(+), 56 deletions(-) diff --git a/include/client/rpc/hg_rpcs.hpp b/include/client/rpc/hg_rpcs.hpp index 175a37e15..8004fd3e1 100644 --- a/include/client/rpc/hg_rpcs.hpp +++ b/include/client/rpc/hg_rpcs.hpp @@ -1680,6 +1680,120 @@ struct read_data { }; }; +//============================================================================== +// definitions for trunc_data +struct trunc_data { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = trunc_data; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_trunc_in_t; + using mercury_output_type = rpc_err_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 1850933248; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::trunc_data; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_trunc_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_err_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, + uint64_t length) : + m_path(path), + m_length(length) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + uint64_t + length() const { + return m_length; + } + + explicit + input(const rpc_trunc_in_t& other) : + m_path(other.path), + m_length(other.length) { } + + explicit + operator rpc_trunc_in_t() { + return { + m_path.c_str(), + m_length, + }; + } + + private: + std::string m_path; + uint64_t m_length; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err() {} + + output(int32_t err) : + m_err(err) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_err_out_t& out) { + m_err = out.err; + } + + int32_t + err() const { + return m_err; + } + + private: + int32_t m_err; + }; +}; + } // namespace rpc } // namespace gkfs diff --git a/src/client/preload.cpp b/src/client/preload.cpp index d7b6e94f0..cf5a8aef9 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -205,6 +205,7 @@ bool init_hermes_client(const std::string& transport_prefix) { rpc_write_data_id = gkfs::rpc::write_data::public_id; rpc_read_data_id = gkfs::rpc::read_data::public_id; + rpc_trunc_data_id = gkfs::rpc::trunc_data::public_id; return true; } diff --git a/src/client/rpc/hg_rpcs.cpp b/src/client/rpc/hg_rpcs.cpp index 80aaab5ce..d90419adb 100644 --- a/src/client/rpc/hg_rpcs.cpp +++ b/src/client/rpc/hg_rpcs.cpp @@ -36,6 +36,7 @@ register_user_request_types() { (void) registered_requests().add(); (void) registered_requests().add(); + (void) registered_requests().add(); } diff --git a/src/client/rpc/ld_rpc_data_ws.cpp b/src/client/rpc/ld_rpc_data_ws.cpp index 74767cfe8..fa7e5376b 100644 --- a/src/client/rpc/ld_rpc_data_ws.cpp +++ b/src/client/rpc/ld_rpc_data_ws.cpp @@ -365,84 +365,73 @@ ssize_t read(const string& path, void* buf, const off64_t offset, const size_t r } int trunc_data(const std::string& path, size_t current_size, size_t new_size) { - assert(current_size > new_size); - hg_return_t ret; - rpc_trunc_in_t in; - in.path = path.c_str(); - in.length = new_size; + assert(current_size > new_size); bool error = false; - // Find out which data server needs to delete chunks in order to contact only them + // Find out which data servers need to delete data chunks in order to + // contact only them const unsigned int chunk_start = chnk_id_for_offset(new_size, CHUNKSIZE); - const unsigned int chunk_end = chnk_id_for_offset(current_size - new_size - 1, CHUNKSIZE); + const unsigned int chunk_end = + chnk_id_for_offset(current_size - new_size - 1, CHUNKSIZE); + std::unordered_set hosts; for(unsigned int chunk_id = chunk_start; chunk_id <= chunk_end; ++chunk_id) { hosts.insert(CTX->distributor()->locate_data(path, chunk_id)); } - std::vector rpc_handles(hosts.size()); - std::vector rpc_waiters(hosts.size()); - unsigned int req_num = 0; + std::vector> handles; + for (const auto& host: hosts) { - ret = margo_create_wrap_helper(rpc_trunc_data_id, host, rpc_handles[req_num]); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Unable to create Mercury handle for host: ", __func__, host); - break; - } - // send async rpc - ret = margo_iforward(rpc_handles[req_num], &in, &rpc_waiters[req_num]); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() Failed to send request to host: {}", __func__, host); - break; - } - ++req_num; - } + auto endp = CTX->hosts2().at(host); + + try { + CTX->log()->debug("{}() Sending RPC ...", __func__); + + gkfs::rpc::trunc_data::input in(path, new_size); + + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that + // we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + handles.emplace_back( + ld_network_service->post(endp, in)); - if(req_num < hosts.size()) { - // An error occurred. Cleanup and return - CTX->log()->error("{}() Error -> sent only some requests {}/{}. Cancelling request...", __func__, req_num, hosts.size()); - for(unsigned int i = 0; i < req_num; ++i) { - margo_destroy(rpc_handles[i]); + } catch (const std::exception& ex) { + // TODO(amiranda): we should cancel all previously posted requests + // here, unfortunately, Hermes does not support it yet :/ + CTX->log()->error("{}() Failed to send request to host: {}", + __func__, host); + errno = EIO; + return -1; } - errno = EIO; - return -1; + } - assert(req_num == hosts.size()); // Wait for RPC responses and then get response - rpc_err_out_t out{}; - for (unsigned int i = 0; i < hosts.size(); ++i) { - ret = margo_wait(rpc_waiters[i]); - if (ret == HG_SUCCESS) { - ret = margo_get_output(rpc_handles[i], &out); - if (ret == HG_SUCCESS) { - if(out.err){ - CTX->log()->error("{}() received error response: {}", __func__, out.err); - error = true; - } - } else { - // Get output failed - CTX->log()->error("{}() while getting rpc output", __func__); + for(const auto& h : handles) { + + try { + // XXX We might need a timeout here to not wait forever for an + // output that never comes? + auto out = h.get().at(0); + + if(out.err() != 0) { + CTX->log()->error("{}() received error response: {}", + __func__, out.err()); error = true; + errno = EIO; } - } else { - // Wait failed - CTX->log()->error("{}() Failed while waiting for response", __func__); + } catch(const std::exception& ex) { + CTX->log()->error("{}() while getting rpc output", __func__); error = true; + errno = EIO; } - - /* clean up resources consumed by this rpc */ - margo_free_output(rpc_handles[i], &out); - margo_destroy(rpc_handles[i]); } - if(error) { - errno = EIO; - return -1; - } - return 0; + return error ? -1 : 0; } ChunkStat chunk_stat() { -- GitLab From 378887d88148c789a0e68865e2a6e3cfab46023f Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 9 Sep 2019 16:38:22 +0200 Subject: [PATCH 22/71] get_dirents RPC now uses Hermes instead of Margo --- include/client/rpc/hg_rpcs.hpp | 123 ++++++++++++++++++++++ src/client/preload.cpp | 1 + src/client/rpc/hg_rpcs.cpp | 1 + src/client/rpc/ld_rpc_metadentry.cpp | 150 ++++++++++++++++----------- 4 files changed, 212 insertions(+), 63 deletions(-) diff --git a/include/client/rpc/hg_rpcs.hpp b/include/client/rpc/hg_rpcs.hpp index 8004fd3e1..40c1aa8e5 100644 --- a/include/client/rpc/hg_rpcs.hpp +++ b/include/client/rpc/hg_rpcs.hpp @@ -1794,6 +1794,129 @@ struct trunc_data { }; }; +//============================================================================== +// definitions for get_dirents +struct get_dirents { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = get_dirents; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_get_dirents_in_t; + using mercury_output_type = rpc_get_dirents_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 4121034752; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::get_dirents; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_get_dirents_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_get_dirents_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(const std::string& path, + const hermes::exposed_memory& buffers) : + m_path(path), + m_buffers(buffers) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + std::string + path() const { + return m_path; + } + + hermes::exposed_memory + buffers() const { + return m_buffers; + } + + explicit + input(const rpc_get_dirents_in_t& other) : + m_path(other.path), + m_buffers(other.bulk_handle) { } + + explicit + operator rpc_get_dirents_in_t() { + return { + m_path.c_str(), + hg_bulk_t(m_buffers) + }; + } + + private: + std::string m_path; + hermes::exposed_memory m_buffers; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_err(), + m_dirents_size() {} + + output(int32_t err, size_t dirents_size) : + m_err(err), + m_dirents_size(dirents_size) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_get_dirents_out_t& out) { + m_err = out.err; + m_dirents_size = out.dirents_size; + } + + int32_t + err() const { + return m_err; + } + + int64_t + dirents_size() const { + return m_dirents_size; + } + + private: + int32_t m_err; + size_t m_dirents_size; + }; +}; + } // namespace rpc } // namespace gkfs diff --git a/src/client/preload.cpp b/src/client/preload.cpp index cf5a8aef9..6031edec3 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -206,6 +206,7 @@ bool init_hermes_client(const std::string& transport_prefix) { rpc_write_data_id = gkfs::rpc::write_data::public_id; rpc_read_data_id = gkfs::rpc::read_data::public_id; rpc_trunc_data_id = gkfs::rpc::trunc_data::public_id; + rpc_get_dirents_id = gkfs::rpc::get_dirents::public_id; return true; } diff --git a/src/client/rpc/hg_rpcs.cpp b/src/client/rpc/hg_rpcs.cpp index d90419adb..46ca1fcb7 100644 --- a/src/client/rpc/hg_rpcs.cpp +++ b/src/client/rpc/hg_rpcs.cpp @@ -37,6 +37,7 @@ register_user_request_types() { (void) registered_requests().add(); (void) registered_requests().add(); (void) registered_requests().add(); + (void) registered_requests().add(); } diff --git a/src/client/rpc/ld_rpc_metadentry.cpp b/src/client/rpc/ld_rpc_metadentry.cpp index b441dcf25..d336ab379 100644 --- a/src/client/rpc/ld_rpc_metadentry.cpp +++ b/src/client/rpc/ld_rpc_metadentry.cpp @@ -318,14 +318,11 @@ int get_metadentry_size(const std::string& path, off64_t& ret_size) { * Sends an RPC request to a specific node to push all chunks that belong to him */ void get_dirents(OpenDir& open_dir){ + CTX->log()->trace("{}() called", __func__); auto const root_dir = open_dir.path(); - auto const targets = CTX->distributor()->locate_directory_metadata(root_dir); - auto const host_size = targets.size(); - std::vector rpc_handles(host_size); - std::vector rpc_waiters(host_size); - std::vector rpc_in(host_size); - std::vector recv_buffers(host_size); + auto const targets = + CTX->distributor()->locate_directory_metadata(root_dir); /* preallocate receiving buffer. The actual size is not known yet. * @@ -333,83 +330,110 @@ void get_dirents(OpenDir& open_dir){ * It turns out that this operation is increadibly slow for such a big * buffer. Moreover we don't need a zeroed buffer here. */ - auto recv_buff = std::unique_ptr(new char[RPC_DIRENTS_BUFF_SIZE]); - const unsigned long int per_host_buff_size = RPC_DIRENTS_BUFF_SIZE / host_size; - - hg_return_t hg_ret; + auto large_buffer = + std::unique_ptr(new char[RPC_DIRENTS_BUFF_SIZE]); - for(const auto& target_host: targets){ + //XXX there is a rounding error here depending on the number of targets... + const std::size_t per_host_buff_size = + RPC_DIRENTS_BUFF_SIZE / targets.size(); - CTX->log()->trace("{}() target_host: {}", __func__, target_host); - //Setup rpc input parameters for each host - rpc_in[target_host].path = root_dir.c_str(); - recv_buffers[target_host] = recv_buff.get() + (target_host * per_host_buff_size); + // expose local buffers for RMA from servers + std::vector exposed_buffers; + exposed_buffers.reserve(targets.size()); - hg_ret = margo_bulk_create( - ld_margo_rpc_id, 1, - reinterpret_cast(&recv_buffers[target_host]), - &per_host_buff_size, - HG_BULK_WRITE_ONLY, &(rpc_in[target_host].bulk_handle)); - if(hg_ret != HG_SUCCESS){ - throw std::runtime_error("Failed to create margo bulk handle"); + for(std::size_t i = 0; i < targets.size(); ++i) { + try { + exposed_buffers.emplace_back( + ld_network_service->expose( + std::vector{ + hermes::mutable_buffer{ + large_buffer.get() + (i * per_host_buff_size), + per_host_buff_size + } + }, + hermes::access_mode::write_only)); + } catch (const std::exception& ex) { + throw std::runtime_error("Failed to expose buffers for RMA"); } + } - hg_ret = margo_create_wrap_helper(rpc_get_dirents_id, target_host, rpc_handles[target_host]); - if (hg_ret != HG_SUCCESS) { - std::runtime_error("Failed to create margo handle"); - } - // Send RPC - CTX->log()->trace("{}() Sending RPC to host: {}", __func__, target_host); - hg_ret = margo_iforward(rpc_handles[target_host], - &rpc_in[target_host], - &rpc_waiters[target_host]); - if (hg_ret != HG_SUCCESS) { - CTX->log()->error("{}() Unable to send non-blocking get_dirents on {} to recipient {}", __func__, root_dir, target_host); - for (uint64_t i = 0; i <= target_host; i++) { - margo_bulk_free(rpc_in[i].bulk_handle); - margo_destroy(rpc_handles[i]); - } - throw std::runtime_error("Failed to forward non-blocking rpc request"); + // send RPCs + std::vector> handles; + + for(std::size_t i = 0; i < targets.size(); ++i) { + + CTX->log()->trace("{}() target_host: {}", __func__, targets[i]); + + // Setup rpc input parameters for each host + auto endp = CTX->hosts2().at(targets[i]); + + gkfs::rpc::get_dirents::input in(root_dir, exposed_buffers[i]); + + try { + + CTX->log()->trace("{}() Sending RPC to host: {}", + __func__, targets[i]); + handles.emplace_back( + ld_network_service->post(endp, in)); + } catch(const std::exception& ex) { + CTX->log()->error("{}() Unable to send non-blocking get_dirents " + "on {} to recipient {}", + __func__, root_dir, targets[i]); + throw std::runtime_error("Failed to post non-blocking RPC request"); } } - for(unsigned int target_host = 0; target_host < host_size; target_host++){ - hg_ret = margo_wait(rpc_waiters[target_host]); - if (hg_ret != HG_SUCCESS) { - throw std::runtime_error(fmt::format("Failed while waiting for rpc completion. [root dir: {}, target host: {}]", root_dir, target_host)); - } - rpc_get_dirents_out_t out{}; - hg_ret = margo_get_output(rpc_handles[target_host], &out); - if (hg_ret != HG_SUCCESS) { - throw std::runtime_error(fmt::format("Failed to get rpc output.. [path: {}, target host: {}]", root_dir, target_host)); - } + // wait for RPC responses + for(std::size_t i = 0; i < handles.size(); ++i) { + + gkfs::rpc::get_dirents::output out; + + try { + // XXX We might need a timeout here to not wait forever for an + // output that never comes? + out = handles[i].get().at(0); - if (out.err) { - CTX->log()->error("{}() Sending RPC to host: {}", __func__, target_host); - throw std::runtime_error(fmt::format("Failed to retrieve dir entries from host '{}'. " - "Error '{}', path '{}'", target_host, strerror(out.err), root_dir)); + if(out.err() != 0) { + throw std::runtime_error( + fmt::format("Failed to retrieve dir entries from " + "host '{}'. Error '{}', path '{}'", + targets[i], strerror(out.err()), root_dir)); + } + } catch(const std::exception& ex) { + throw std::runtime_error( + fmt::format("Failed to get rpc output.. [path: {}, " + "target host: {}]", root_dir, targets[i])); } - bool* bool_ptr = reinterpret_cast(recv_buffers[target_host]); - char* names_ptr = recv_buffers[target_host] + (out.dirents_size * sizeof(bool)); - for(unsigned int i = 0; i < out.dirents_size; i++){ + // each server wrote information to its pre-defined region in + // large_buffer, recover it by computing the base_address for each + // particular server and adding the appropriate offsets + assert(exposed_buffers[i].count() == 1); + void* base_ptr = exposed_buffers[i].begin()->data(); - FileType ftype = (*bool_ptr)? FileType::directory : FileType::regular; + bool* bool_ptr = reinterpret_cast(base_ptr); + char* names_ptr = reinterpret_cast(base_ptr) + + (out.dirents_size() * sizeof(bool)); + + for(std::size_t j = 0; j < out.dirents_size(); j++) { + + FileType ftype = (*bool_ptr) ? + FileType::directory : + FileType::regular; bool_ptr++; - //Check that we are not outside the recv_buff for this specific host - assert((names_ptr - recv_buffers[target_host]) > 0); - assert(static_cast(names_ptr - recv_buffers[target_host]) < per_host_buff_size); + // Check that we are not outside the recv_buff for this specific host + assert((names_ptr - reinterpret_cast(base_ptr)) > 0); + assert( + static_cast( + names_ptr - reinterpret_cast(base_ptr)) < + per_host_buff_size); auto name = std::string(names_ptr); names_ptr += name.size() + 1; open_dir.add(name, ftype); } - - margo_free_output(rpc_handles[target_host], &out); - margo_bulk_free(rpc_in[target_host].bulk_handle); - margo_destroy(rpc_handles[target_host]); } } -- GitLab From b223f6ebc52c44fb9a91550ef12b29bd77950970 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 9 Sep 2019 17:28:20 +0200 Subject: [PATCH 23/71] chunk_stat RPC now uses Hermes instead of Margo --- include/client/rpc/hg_rpcs.hpp | 120 ++++++++++++++++++++++++++++++ src/client/preload.cpp | 1 + src/client/rpc/hg_rpcs.cpp | 1 + src/client/rpc/ld_rpc_data_ws.cpp | 75 ++++++++++--------- 4 files changed, 160 insertions(+), 37 deletions(-) diff --git a/include/client/rpc/hg_rpcs.hpp b/include/client/rpc/hg_rpcs.hpp index 40c1aa8e5..5e1ee89be 100644 --- a/include/client/rpc/hg_rpcs.hpp +++ b/include/client/rpc/hg_rpcs.hpp @@ -1917,6 +1917,126 @@ struct get_dirents { }; }; +//============================================================================== +// definitions for chunk_stat +struct chunk_stat { + + // forward declarations of public input/output types for this RPC + class input; + class output; + + // traits used so that the engine knows what to do with the RPC + using self_type = chunk_stat; + using handle_type = hermes::rpc_handle; + using input_type = input; + using output_type = output; + using mercury_input_type = rpc_chunk_stat_in_t; + using mercury_output_type = rpc_chunk_stat_out_t; + + // RPC public identifier + // (N.B: we reuse the same IDs assigned by Margo so that the daemon + // understands Hermes RPCs) + constexpr static const uint64_t public_id = 532742144; + + // RPC internal Mercury identifier + constexpr static const hg_id_t mercury_id = public_id; + + // RPC name + constexpr static const auto name = hg_tag::chunk_stat; + + // requires response? + constexpr static const auto requires_response = true; + + // Mercury callback to serialize input arguments + constexpr static const auto mercury_in_proc_cb = + HG_GEN_PROC_NAME(rpc_chunk_stat_in_t); + + // Mercury callback to serialize output arguments + constexpr static const auto mercury_out_proc_cb = + HG_GEN_PROC_NAME(rpc_chunk_stat_out_t); + + class input { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + input(int32_t dummy) : + m_dummy(dummy) { } + + input(input&& rhs) = default; + input(const input& other) = default; + input& operator=(input&& rhs) = default; + input& operator=(const input& other) = default; + + int32_t + dummy() const { + return m_dummy; + } + + explicit + input(const rpc_chunk_stat_in_t& other) : + m_dummy(other.dummy) { } + + explicit + operator rpc_chunk_stat_in_t() { + return { m_dummy }; + } + + private: + int32_t m_dummy; + }; + + class output { + + template + friend hg_return_t hermes::detail::post_to_mercury(ExecutionContext*); + + public: + output() : + m_chunk_size(), + m_chunk_total(), + m_chunk_free() {} + + output(uint64_t chunk_size, uint64_t chunk_total, uint64_t chunk_free) : + m_chunk_size(chunk_size), + m_chunk_total(chunk_total), + m_chunk_free(chunk_free) {} + + output(output&& rhs) = default; + output(const output& other) = default; + output& operator=(output&& rhs) = default; + output& operator=(const output& other) = default; + + explicit + output(const rpc_chunk_stat_out_t& out) { + m_chunk_size = out.chunk_size; + m_chunk_total = out.chunk_total; + m_chunk_free = out.chunk_free; + } + + uint64_t + chunk_size() const { + return m_chunk_size; + } + + uint64_t + chunk_total() const { + return m_chunk_total; + } + + uint64_t + chunk_free() const { + return m_chunk_free; + } + + private: + uint64_t m_chunk_size; + uint64_t m_chunk_total; + uint64_t m_chunk_free; + }; +}; + } // namespace rpc } // namespace gkfs diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 6031edec3..1c4dcfb1d 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -207,6 +207,7 @@ bool init_hermes_client(const std::string& transport_prefix) { rpc_read_data_id = gkfs::rpc::read_data::public_id; rpc_trunc_data_id = gkfs::rpc::trunc_data::public_id; rpc_get_dirents_id = gkfs::rpc::get_dirents::public_id; + rpc_chunk_stat_id = gkfs::rpc::chunk_stat::public_id; return true; } diff --git a/src/client/rpc/hg_rpcs.cpp b/src/client/rpc/hg_rpcs.cpp index 46ca1fcb7..6e0264671 100644 --- a/src/client/rpc/hg_rpcs.cpp +++ b/src/client/rpc/hg_rpcs.cpp @@ -38,6 +38,7 @@ register_user_request_types() { (void) registered_requests().add(); (void) registered_requests().add(); (void) registered_requests().add(); + (void) registered_requests().add(); } diff --git a/src/client/rpc/ld_rpc_data_ws.cpp b/src/client/rpc/ld_rpc_data_ws.cpp index fa7e5376b..211a06ddd 100644 --- a/src/client/rpc/ld_rpc_data_ws.cpp +++ b/src/client/rpc/ld_rpc_data_ws.cpp @@ -435,56 +435,57 @@ int trunc_data(const std::string& path, size_t current_size, size_t new_size) { } ChunkStat chunk_stat() { + CTX->log()->trace("{}()", __func__); - rpc_chunk_stat_in_t in; - auto const host_size = CTX->hosts().size(); - std::vector rpc_handles(host_size); - std::vector rpc_waiters(host_size); + std::vector> handles; - hg_return_t hg_ret; + for (const auto& endp : CTX->hosts2()) { + try { + CTX->log()->trace("{}() Sending RPC to host: {}", + __func__, endp.to_string()); - for (unsigned int target_host = 0; target_host < host_size; ++target_host) { - //Setup rpc input parameters for each host - hg_ret = margo_create_wrap_helper(rpc_chunk_stat_id, target_host, - rpc_handles[target_host]); - if (hg_ret != HG_SUCCESS) { - throw std::runtime_error("Failed to create margo handle"); - } - // Send RPC - CTX->log()->trace("{}() Sending RPC to host: {}", __func__, target_host); - hg_ret = margo_iforward(rpc_handles[target_host], - &in, - &rpc_waiters[target_host]); - if (hg_ret != HG_SUCCESS) { - CTX->log()->error("{}() Unable to send non-blocking chunk_stat to recipient {}", __func__, target_host); - for (unsigned int i = 0; i <= target_host; i++) { - margo_destroy(rpc_handles[i]); - } + gkfs::rpc::chunk_stat::input in(0); + + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that + // we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + handles.emplace_back( + ld_network_service->post(endp, in)); + + } catch (const std::exception& ex) { + // TODO(amiranda): we should cancel all previously posted requests + // here, unfortunately, Hermes does not support it yet :/ + CTX->log()->error("{}() Failed to send request to host: {}", + __func__, endp.to_string()); throw std::runtime_error("Failed to forward non-blocking rpc request"); } } + unsigned long chunk_size = CHUNKSIZE; unsigned long chunk_total = 0; unsigned long chunk_free = 0; - for (unsigned int target_host = 0; target_host < host_size; ++target_host) { - hg_ret = margo_wait(rpc_waiters[target_host]); - if (hg_ret != HG_SUCCESS) { - throw std::runtime_error(fmt::format("Failed while waiting for rpc completion. target host: {}", target_host)); - } - rpc_chunk_stat_out_t out{}; - hg_ret = margo_get_output(rpc_handles[target_host], &out); - if (hg_ret != HG_SUCCESS) { - throw std::runtime_error(fmt::format("Failed to get rpc output for target host: {}", target_host)); - } + // wait for RPC responses + for(std::size_t i = 0; i < handles.size(); ++i) { - assert(out.chunk_size == chunk_size); - chunk_total += out.chunk_total; - chunk_free += out.chunk_free; + gkfs::rpc::chunk_stat::output out; - margo_free_output(rpc_handles[target_host], &out); - margo_destroy(rpc_handles[target_host]); + try { + // XXX We might need a timeout here to not wait forever for an + // output that never comes? + out = handles[i].get().at(0); + + assert(out.chunk_size() == chunk_size); + chunk_total += out.chunk_total(); + chunk_free += out.chunk_free(); + + } catch(const std::exception& ex) { + throw std::runtime_error( + fmt::format("Failed to get rpc output for target host: {}]", i)); + } } return {chunk_size, chunk_total, chunk_free}; -- GitLab From 145f53e65094c880f09308e5c93bfe0d60eb2f33 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 9 Sep 2019 17:56:37 +0200 Subject: [PATCH 24/71] Remove references to Margo in client code --- include/client/preload_util.hpp | 16 --- src/client/preload.cpp | 139 +++------------------------ src/client/preload_util.cpp | 84 ---------------- src/client/rpc/ld_rpc_metadentry.cpp | 5 - 4 files changed, 14 insertions(+), 230 deletions(-) diff --git a/include/client/preload_util.hpp b/include/client/preload_util.hpp index 5bfbae74b..4879aec76 100644 --- a/include/client/preload_util.hpp +++ b/include/client/preload_util.hpp @@ -22,10 +22,6 @@ #include #include -extern "C" { -#include -} - struct MetadentryUpdateFlags { bool atime = false; bool mtime = false; @@ -39,9 +35,6 @@ struct MetadentryUpdateFlags { bool path = false; }; -// Margo instances -extern margo_instance_id ld_margo_rpc_id; - // Hermes instance namespace hermes { class async_engine; } extern std::unique_ptr ld_network_service; @@ -77,13 +70,4 @@ hg_addr_t get_local_addr(); void load_hosts(); bool lookup_all_hosts(); -void cleanup_addresses(); - -hg_return margo_create_wrap_helper(const hg_id_t rpc_id, uint64_t recipient, - hg_handle_t& handle); - -hg_return margo_create_wrap(const hg_id_t rpc_id, const std::string&, - hg_handle_t& handle); - - #endif //IFS_PRELOAD_UTIL_HPP diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 1c4dcfb1d..e5b839743 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -30,8 +30,8 @@ using namespace std; -// -// thread to initialize the whole margo shazaam only once per process + +// make sure that things are only initialized once static pthread_once_t init_env_thread = PTHREAD_ONCE_INIT; // RPC IDs @@ -49,8 +49,6 @@ hg_id_t rpc_read_data_id; hg_id_t rpc_trunc_data_id; hg_id_t rpc_get_dirents_id; hg_id_t rpc_chunk_stat_id; -// Margo instances -margo_instance_id ld_margo_rpc_id; std::unique_ptr ld_network_service; @@ -61,91 +59,13 @@ static inline void exit_error_msg(int errcode, const string& msg) { } /** - * Registers a margo instance with all used RPC - * Note that the r(pc tags are redundant for rpc - * @param mid - * @param mode + * Initializes the Hermes client for a given transport prefix + * @param transport_prefix + * @return true if succesfully initialized; false otherwise */ -void register_client_rpcs(margo_instance_id mid) { - - rpc_config_id = MARGO_REGISTER(mid, - hg_tag::fs_config, - void, - rpc_config_out_t, - NULL); - - rpc_mk_node_id = MARGO_REGISTER(mid, hg_tag::create, rpc_mk_node_in_t, rpc_err_out_t, NULL); - rpc_stat_id = MARGO_REGISTER(mid, hg_tag::stat, rpc_path_only_in_t, rpc_stat_out_t, NULL); - rpc_rm_node_id = MARGO_REGISTER(mid, hg_tag::remove, rpc_rm_node_in_t, - rpc_err_out_t, NULL); - - rpc_decr_size_id = MARGO_REGISTER(mid, - hg_tag::decr_size, - rpc_trunc_in_t, - rpc_err_out_t, - NULL); - - rpc_update_metadentry_id = MARGO_REGISTER(mid, hg_tag::update_metadentry, rpc_update_metadentry_in_t, - rpc_err_out_t, NULL); - rpc_get_metadentry_size_id = MARGO_REGISTER(mid, hg_tag::get_metadentry_size, rpc_path_only_in_t, - rpc_get_metadentry_size_out_t, NULL); - rpc_update_metadentry_size_id = MARGO_REGISTER(mid, hg_tag::update_metadentry_size, - rpc_update_metadentry_size_in_t, - rpc_update_metadentry_size_out_t, - NULL); - -#ifdef HAS_SYMLINKS - rpc_mk_symlink_id = MARGO_REGISTER(mid, - hg_tag::mk_symlink, - rpc_mk_symlink_in_t, - rpc_err_out_t, - NULL); -#endif - - rpc_write_data_id = MARGO_REGISTER(mid, hg_tag::write_data, rpc_write_data_in_t, rpc_data_out_t, - NULL); - rpc_read_data_id = MARGO_REGISTER(mid, hg_tag::read_data, rpc_read_data_in_t, rpc_data_out_t, - NULL); - - rpc_trunc_data_id = MARGO_REGISTER(mid, - hg_tag::trunc_data, - rpc_trunc_in_t, - rpc_err_out_t, - NULL); - - rpc_get_dirents_id = MARGO_REGISTER(mid, hg_tag::get_dirents, rpc_get_dirents_in_t, rpc_get_dirents_out_t, - NULL); - - rpc_chunk_stat_id = MARGO_REGISTER(mid, - hg_tag::chunk_stat, - rpc_chunk_stat_in_t, - rpc_chunk_stat_out_t, - NULL); - - fmt::print(stdout, "rpc_config_id: {}\n", rpc_config_id); - fmt::print(stdout, "rpc_mk_node_id: {}\n", rpc_mk_node_id); - fmt::print(stdout, "rpc_stat_id: {}\n", rpc_stat_id); - fmt::print(stdout, "rpc_rm_node_id: {}\n", rpc_rm_node_id); - fmt::print(stdout, "rpc_decr_size_id: {}\n", rpc_decr_size_id); - fmt::print(stdout, "rpc_update_metadentry_id: {}\n", rpc_update_metadentry_id); - fmt::print(stdout, "rpc_get_metadentry_size_id: {}\n", rpc_get_metadentry_size_id); - fmt::print(stdout, "rpc_update_metadentry_size_id: {}\n", rpc_update_metadentry_size_id); - fmt::print(stdout, "rpc_mk_symlink_id: {}\n", rpc_mk_symlink_id); - fmt::print(stdout, "rpc_write_data_id: {}\n", rpc_write_data_id); - fmt::print(stdout, "rpc_read_data_id: {}\n", rpc_read_data_id); - fmt::print(stdout, "rpc_trunc_data_id: {}\n", rpc_trunc_data_id); - fmt::print(stdout, "rpc_get_dirents_id: {}\n", rpc_get_dirents_id); - fmt::print(stdout, "rpc_chunk_stat_id: {}\n", rpc_chunk_stat_id); - -} +bool init_hermes_client(const std::string& transport_prefix) { -/** - * Initializes the Margo client for a given na_plugin - * @param mode - * @param na_plugin - * @return - */ -bool init_margo_client(const std::string& na_plugin) { +#if 0 // IMPORTANT: this struct needs to be zeroed before use struct hg_init_info hg_options = {}; #if USE_SHM @@ -155,29 +75,7 @@ bool init_margo_client(const std::string& na_plugin) { #endif hg_options.stats = HG_FALSE; hg_options.na_class = nullptr; - - ld_margo_rpc_id = margo_init_opt(na_plugin.c_str(), - MARGO_CLIENT_MODE, - &hg_options, - HG_FALSE, - 1); - if (ld_margo_rpc_id == MARGO_INSTANCE_NULL) { - CTX->log()->error("{}() margo_init_pool failed to initialize the Margo client", __func__); - return false; - } - register_client_rpcs(ld_margo_rpc_id); - return true; -} - - - - -/** - * Initializes the Hermes client for a given transport prefix - * @param transport_prefix - * @return true if succesfully initialized; false otherwise - */ -bool init_hermes_client(const std::string& transport_prefix) { +#endif try { ld_network_service = @@ -214,15 +112,11 @@ bool init_hermes_client(const std::string& transport_prefix) { /** - * This function is only called in the preload constructor and initializes Argobots and Margo clients + * This function is only called in the preload constructor and initializes + * the file system client */ void init_ld_environment_() { - //use rpc_addresses here to avoid "static initialization order problem" - if (!init_margo_client(RPC_PROTOCOL)) { - exit_error_msg(EXIT_FAILURE, "Unable to initializa Margo RPC client"); - } - // initialize Hermes interface to Mercury if (!init_hermes_client(RPC_PROTOCOL)) { exit_error_msg(EXIT_FAILURE, "Unable to initialize Hermes RPC client"); @@ -305,20 +199,15 @@ void init_preload() { * Called last when preload library is used with the LD_PRELOAD environment variable */ void destroy_preload() { + stop_interception(); CTX->disable_interception(); - if (ld_margo_rpc_id == nullptr) { - CTX->log()->debug("{}() No services in preload library used. Nothing to shut down.", __func__); - return; - } - cleanup_addresses(); + CTX->clear_hosts(); CTX->log()->debug("{}() About to finalize the Hermes RPC client", __func__); + ld_network_service.reset(); - CTX->log()->debug("{}() About to finalize the margo RPC client", __func__); - // XXX Sometimes this hangs on the cluster. Investigate. - margo_finalize(ld_margo_rpc_id); - CTX->log()->debug("{}() Shut down Margo RPC client successful", __func__); + CTX->log()->debug("{}() Shut down Hermes RPC client successful", __func__); CTX->log()->info("All services shut down. Client shutdown complete."); } diff --git a/src/client/preload_util.cpp b/src/client/preload_util.cpp index ac7b72b59..2f61ab611 100644 --- a/src/client/preload_util.cpp +++ b/src/client/preload_util.cpp @@ -133,28 +133,6 @@ hermes::endpoint lookup_endpoint(const std::string& uri, uri, error_msg)); } -hg_addr_t margo_addr_lookup_retry(const std::string& uri) { - CTX->log()->debug("{}() Lookink up address '{}'", __func__, uri); - // try to look up 3 times before erroring out - hg_return_t ret; - hg_addr_t remote_addr = HG_ADDR_NULL; - ::random_device rd; // obtain a random number from hardware - unsigned int attempts = 0; - do { - ret = margo_addr_lookup(ld_margo_rpc_id, uri.c_str(), &remote_addr); - if (ret == HG_SUCCESS) { - return remote_addr; - } - CTX->log()->warn("{}() Failed to lookup address '{}'. Attempts [{}/3]", __func__, uri, attempts + 1); - // Wait a random amount of time and try again - ::mt19937 g(rd()); // seed the random generator - ::uniform_int_distribution<> distr(50, 50 * (attempts + 2)); // define the range - ::this_thread::sleep_for(std::chrono::milliseconds(distr(g))); - } while (++attempts < 3); - throw runtime_error( - fmt::format("Failed to lookup address '{}', error: {}", uri, HG_Error_to_string(ret))); -} - void load_hosts() { string hosts_file; try { @@ -210,9 +188,6 @@ void load_hosts() { auto it = std::next(addrs2.begin(), id); addrs2.emplace(it, endp); - auto addr = margo_addr_lookup_retry(uri); // TODO(amiranda) remove - addrs.at(id) = addr; // TODO(amiranda) remove - if (!local_host_found && hostname == local_hostname) { CTX->log()->debug("{}() Found local host: {}", __func__, hostname); CTX->local_host_id(id); @@ -220,32 +195,6 @@ void load_hosts() { } } -#if 0 - fmt::print(stdout, " YYY hi!\n"); - - std::for_each( - addrs.begin(), - addrs.end(), - [](hg_addr_t addr) { - hg_class_t* hg_class = margo_get_class(ld_margo_rpc_id); - hg_size_t bsize = 0; - hg_return ret = HG_Addr_to_string(hg_class, NULL, &bsize, addr); - - const auto buffer = std::make_unique(bsize); - HG_Addr_to_string(hg_class, buffer.get(), &bsize, addr); - fmt::print(stdout, " XXX {}\n", std::string(buffer.get())); - } - ); - - std::for_each( - addrs2.begin(), - addrs2.end(), - [](const hermes::endpoint& endp) { - fmt::print(stdout, " ZZZ {}\n", endp.to_string()); - } - ); -#endif - if (!local_host_found) { CTX->log()->warn("{}() Failed to find local host." "Fallback: use host id '0' as local host", __func__); @@ -257,36 +206,3 @@ void load_hosts() { #endif CTX->hosts2(addrs2); } - -void cleanup_addresses() { -#if 1 //TODO(amiranda) remove - for (auto& addr: CTX->hosts()) { - margo_addr_free(ld_margo_rpc_id, addr); - } -#endif - - CTX->clear_hosts(); -} - - - -hg_return -margo_create_wrap_helper(const hg_id_t rpc_id, uint64_t recipient, hg_handle_t& handle) { - auto ret = margo_create(ld_margo_rpc_id, CTX->hosts().at(recipient), rpc_id, &handle); - if (ret != HG_SUCCESS) { - CTX->log()->error("{}() creating handle FAILED", __func__); - return HG_OTHER_ERROR; - } - return ret; -} - -/** - * Wraps certain margo functions to create a Mercury handle - * @param path - * @param handle - * @return - */ -hg_return margo_create_wrap(const hg_id_t rpc_id, const std::string& path, hg_handle_t& handle) { - auto recipient = CTX->distributor()->locate_file_metadata(path); - return margo_create_wrap_helper(rpc_id, recipient, handle); -} diff --git a/src/client/rpc/ld_rpc_metadentry.cpp b/src/client/rpc/ld_rpc_metadentry.cpp index d336ab379..0cec92e6a 100644 --- a/src/client/rpc/ld_rpc_metadentry.cpp +++ b/src/client/rpc/ld_rpc_metadentry.cpp @@ -25,11 +25,6 @@ namespace rpc_send { using namespace std; -static inline hg_return_t -margo_forward_timed_wrap(const hg_handle_t& handle, void* in_struct) { - return margo_forward_timed(handle, in_struct, RPC_TIMEOUT); -} - int mk_node(const std::string& path, const mode_t mode) { int err = EUNKNOWN; -- GitLab From 667277a0db7e67ecfaf1446789b6ce8193f75ff3 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 9 Sep 2019 18:10:58 +0200 Subject: [PATCH 25/71] Code cleanup --- include/client/preload_context.hpp | 14 +++----------- src/client/preload_context.cpp | 19 ++++--------------- src/client/preload_util.cpp | 12 ++++-------- src/client/rpc/ld_rpc_data_ws.cpp | 12 ++++++------ src/client/rpc/ld_rpc_management.cpp | 2 +- src/client/rpc/ld_rpc_metadentry.cpp | 22 +++++++++++----------- 6 files changed, 29 insertions(+), 52 deletions(-) diff --git a/include/client/preload_context.hpp b/include/client/preload_context.hpp index 345a61d2b..1976b1a71 100644 --- a/include/client/preload_context.hpp +++ b/include/client/preload_context.hpp @@ -62,10 +62,7 @@ class PreloadContext { std::vector mountdir_components_; std::string mountdir_; -#if 1 // TODO(amiranda): remove - std::vector hosts_; -#endif - std::vector hosts2_; + std::vector hosts_; uint64_t local_host_id_; bool interception_enabled_; @@ -89,13 +86,8 @@ class PreloadContext { void cwd(const std::string& path); const std::string& cwd() const; -#if 1 // TODO(amiranda) remove - const std::vector& hosts() const; - void hosts(const std::vector& addrs); -#endif - - const std::vector& hosts2() const; - void hosts2(const std::vector& addrs); + const std::vector& hosts() const; + void hosts(const std::vector& addrs); void clear_hosts(); diff --git a/src/client/preload_context.cpp b/src/client/preload_context.cpp index 7e466298e..454e2e8ef 100644 --- a/src/client/preload_context.cpp +++ b/src/client/preload_context.cpp @@ -58,27 +58,16 @@ const std::string& PreloadContext::cwd() const { return cwd_; } -#if 1 // TODO(amiranda) remove -const std::vector& PreloadContext::hosts() const { +const std::vector& PreloadContext::hosts() const { return hosts_; } -void PreloadContext::hosts(const std::vector& addrs) { - hosts_ = addrs; -} - -#endif - -const std::vector& PreloadContext::hosts2() const { - return hosts2_; -} - -void PreloadContext::hosts2(const std::vector& endpoints) { - hosts2_ = endpoints; +void PreloadContext::hosts(const std::vector& endpoints) { + hosts_ = endpoints; } void PreloadContext::clear_hosts() { - hosts2_.clear(); + hosts_.clear(); } uint64_t PreloadContext::local_host_id() const { diff --git a/src/client/preload_util.cpp b/src/client/preload_util.cpp index 2f61ab611..164f765b0 100644 --- a/src/client/preload_util.cpp +++ b/src/client/preload_util.cpp @@ -160,10 +160,9 @@ void load_hosts() { auto local_hostname = get_my_hostname(true); bool local_host_found = false; - vector addrs(hosts.size()); // TODO(amiranda) remove - std::vector addrs2; - addrs2.reserve(hosts.size()); + std::vector addrs; + addrs.reserve(hosts.size()); vector host_ids(hosts.size()); // populate vector with [0, ..., host_size - 1] @@ -185,8 +184,8 @@ void load_hosts() { auto endp = ::lookup_endpoint(uri); - auto it = std::next(addrs2.begin(), id); - addrs2.emplace(it, endp); + auto it = std::next(addrs.begin(), id); + addrs.emplace(it, endp); if (!local_host_found && hostname == local_hostname) { CTX->log()->debug("{}() Found local host: {}", __func__, hostname); @@ -201,8 +200,5 @@ void load_hosts() { CTX->local_host_id(0); } -#if 1 // TODO(amiranda) remove CTX->hosts(addrs); -#endif - CTX->hosts2(addrs2); } diff --git a/src/client/rpc/ld_rpc_data_ws.cpp b/src/client/rpc/ld_rpc_data_ws.cpp index 211a06ddd..ebd4de556 100644 --- a/src/client/rpc/ld_rpc_data_ws.cpp +++ b/src/client/rpc/ld_rpc_data_ws.cpp @@ -121,7 +121,7 @@ ssize_t write(const string& path, const void* buf, const bool append_flag, total_chunk_size -= chnk_rpad(offset + write_size, CHUNKSIZE); } - auto endp = CTX->hosts2().at(target); + auto endp = CTX->hosts().at(target); try { @@ -133,7 +133,7 @@ ssize_t write(const string& path, const void* buf, const bool append_flag, // a potential offset chnk_lpad(offset, CHUNKSIZE), target, - CTX->hosts2().size(), + CTX->hosts().size(), // number of chunks handled by that destination target_chnks[target].size(), // chunk start id of this write @@ -284,7 +284,7 @@ ssize_t read(const string& path, void* buf, const off64_t offset, const size_t r total_chunk_size -= chnk_rpad(offset + read_size, CHUNKSIZE); } - auto endp = CTX->hosts2().at(target); + auto endp = CTX->hosts().at(target); try { @@ -296,7 +296,7 @@ ssize_t read(const string& path, void* buf, const off64_t offset, const size_t r // a potential offset chnk_lpad(offset, CHUNKSIZE), target, - CTX->hosts2().size(), + CTX->hosts().size(), // number of chunks handled by that destination target_chnks[target].size(), // chunk start id of this write @@ -384,7 +384,7 @@ int trunc_data(const std::string& path, size_t current_size, size_t new_size) { for (const auto& host: hosts) { - auto endp = CTX->hosts2().at(host); + auto endp = CTX->hosts().at(host); try { CTX->log()->debug("{}() Sending RPC ...", __func__); @@ -440,7 +440,7 @@ ChunkStat chunk_stat() { std::vector> handles; - for (const auto& endp : CTX->hosts2()) { + for (const auto& endp : CTX->hosts()) { try { CTX->log()->trace("{}() Sending RPC to host: {}", __func__, endp.to_string()); diff --git a/src/client/rpc/ld_rpc_management.cpp b/src/client/rpc/ld_rpc_management.cpp index 4937d9f52..544043318 100644 --- a/src/client/rpc/ld_rpc_management.cpp +++ b/src/client/rpc/ld_rpc_management.cpp @@ -30,7 +30,7 @@ namespace rpc_send { */ bool get_fs_config() { - auto endp = CTX->hosts2().at(CTX->local_host_id()); + auto endp = CTX->hosts().at(CTX->local_host_id()); gkfs::rpc::fs_config::output out; try { diff --git a/src/client/rpc/ld_rpc_metadentry.cpp b/src/client/rpc/ld_rpc_metadentry.cpp index 0cec92e6a..5fd8fce94 100644 --- a/src/client/rpc/ld_rpc_metadentry.cpp +++ b/src/client/rpc/ld_rpc_metadentry.cpp @@ -28,7 +28,7 @@ using namespace std; int mk_node(const std::string& path, const mode_t mode) { int err = EUNKNOWN; - auto endp = CTX->hosts2().at( + auto endp = CTX->hosts().at( CTX->distributor()->locate_file_metadata(path)); try { @@ -54,7 +54,7 @@ int mk_node(const std::string& path, const mode_t mode) { int stat(const std::string& path, string& attr) { - auto endp = CTX->hosts2().at( + auto endp = CTX->hosts().at( CTX->distributor()->locate_file_metadata(path)); try { @@ -87,7 +87,7 @@ int stat(const std::string& path, string& attr) { int decr_size(const std::string& path, size_t length) { - auto endp = CTX->hosts2().at( + auto endp = CTX->hosts().at( CTX->distributor()->locate_file_metadata(path)); try { @@ -125,7 +125,7 @@ int rm_node(const std::string& path, const bool remove_metadentry_only) { // else, send an rpc to all hosts and thus broadcast chunk_removal. if(remove_metadentry_only) { - auto endp = CTX->hosts2().at( + auto endp = CTX->hosts().at( CTX->distributor()->locate_file_metadata(path)); try { @@ -161,8 +161,8 @@ int rm_node(const std::string& path, const bool remove_metadentry_only) { hermes::endpoint_set endps; - std::copy(CTX->hosts2().begin(), - CTX->hosts2().end(), + std::copy(CTX->hosts().begin(), + CTX->hosts().end(), std::back_inserter(endps)); try { @@ -192,7 +192,7 @@ int rm_node(const std::string& path, const bool remove_metadentry_only) { int update_metadentry(const string& path, const Metadata& md, const MetadentryUpdateFlags& md_flags) { - auto endp = CTX->hosts2().at( + auto endp = CTX->hosts().at( CTX->distributor()->locate_file_metadata(path)); try { @@ -243,7 +243,7 @@ int update_metadentry(const string& path, const Metadata& md, const MetadentryUp int update_metadentry_size(const string& path, const size_t size, const off64_t offset, const bool append_flag, off64_t& ret_size) { - auto endp = CTX->hosts2().at( + auto endp = CTX->hosts().at( CTX->distributor()->locate_file_metadata(path)); try { @@ -281,7 +281,7 @@ int update_metadentry_size(const string& path, const size_t size, const off64_t int get_metadentry_size(const std::string& path, off64_t& ret_size) { - auto endp = CTX->hosts2().at( + auto endp = CTX->hosts().at( CTX->distributor()->locate_file_metadata(path)); try { @@ -360,7 +360,7 @@ void get_dirents(OpenDir& open_dir){ CTX->log()->trace("{}() target_host: {}", __func__, targets[i]); // Setup rpc input parameters for each host - auto endp = CTX->hosts2().at(targets[i]); + auto endp = CTX->hosts().at(targets[i]); gkfs::rpc::get_dirents::input in(root_dir, exposed_buffers[i]); @@ -436,7 +436,7 @@ void get_dirents(OpenDir& open_dir){ int mk_symlink(const std::string& path, const std::string& target_path) { - auto endp = CTX->hosts2().at( + auto endp = CTX->hosts().at( CTX->distributor()->locate_file_metadata(path)); try { -- GitLab From 75989c8f71f79cc3b6834fc759f987a3dde035cf Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 9 Sep 2019 18:27:42 +0200 Subject: [PATCH 26/71] Remove warnings --- CMakeLists.txt | 2 +- include/client/rpc/hg_rpcs.hpp | 2 +- src/daemon/handler/h_metadentry.cpp | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index eee0c3e3c..63852192b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,7 @@ ENDIF (NOT CMAKE_BUILD_TYPE) message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") # Compiler flags for various cmake build types -set(WARNINGS_FLAGS "-Wall -Wextra --pedantic -Wno-unused-parameter") +set(WARNINGS_FLAGS "-Wall -Wextra --pedantic -Wno-unused-parameter -Wno-missing-field-initializers") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG -O3") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${WARNINGS_FLAGS} -g -O0") set(CMAKE_CXX_FLAGS_MEMCHECK "${WARNINGS_FLAGS} -g -O0 -fsanitize=address -fno-omit-frame-pointer") diff --git a/include/client/rpc/hg_rpcs.hpp b/include/client/rpc/hg_rpcs.hpp index 5e1ee89be..240a82433 100644 --- a/include/client/rpc/hg_rpcs.hpp +++ b/include/client/rpc/hg_rpcs.hpp @@ -1906,7 +1906,7 @@ struct get_dirents { return m_err; } - int64_t + size_t dirents_size() const { return m_dirents_size; } diff --git a/src/daemon/handler/h_metadentry.cpp b/src/daemon/handler/h_metadentry.cpp index c30e2327f..aa1b556de 100644 --- a/src/daemon/handler/h_metadentry.cpp +++ b/src/daemon/handler/h_metadentry.cpp @@ -22,8 +22,8 @@ using namespace std; static hg_return_t rpc_srv_mk_node(hg_handle_t handle) { - rpc_mk_node_in_t in{}; - rpc_err_out_t out{}; + rpc_mk_node_in_t in{0}; + rpc_err_out_t out{0}; auto ret = margo_get_input(handle, &in); if (ret != HG_SUCCESS) -- GitLab From 75ea3f08144de91fba4806cbbb3ece9a05c71866 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 9 Sep 2019 20:33:04 +0200 Subject: [PATCH 27/71] Update Hermes submodule --- external/hermes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/hermes b/external/hermes index 8bcdee34c..5c72fe06d 160000 --- a/external/hermes +++ b/external/hermes @@ -1 +1 @@ -Subproject commit 8bcdee34c2702a46243cfc3e527d2b34acb31608 +Subproject commit 5c72fe06d390868bf1345a8767033c249997d2b7 -- GitLab From 81794345b4f6fb73488a5de2e445774c46342864 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 9 Sep 2019 20:33:40 +0200 Subject: [PATCH 28/71] Pass Mercury options to Hermes engine --- src/client/preload.cpp | 16 +++++----------- src/daemon/handler/h_metadentry.cpp | 4 ++-- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/src/client/preload.cpp b/src/client/preload.cpp index e5b839743..0d689a335 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -65,22 +65,16 @@ static inline void exit_error_msg(int errcode, const string& msg) { */ bool init_hermes_client(const std::string& transport_prefix) { -#if 0 - // IMPORTANT: this struct needs to be zeroed before use - struct hg_init_info hg_options = {}; + try { + + hermes::engine_options opts; #if USE_SHM - hg_options.auto_sm = HG_TRUE; -#else - hg_options.auto_sm = HG_FALSE; -#endif - hg_options.stats = HG_FALSE; - hg_options.na_class = nullptr; + opts |= hermes::use_auto_sm; #endif - try { ld_network_service = std::make_unique( - hermes::get_transport_type(transport_prefix)); + hermes::get_transport_type(transport_prefix), opts); ld_network_service->run(); } catch (const std::exception& ex) { fmt::print(stderr, "Failed to initialize Hermes RPC client {}\n", diff --git a/src/daemon/handler/h_metadentry.cpp b/src/daemon/handler/h_metadentry.cpp index aa1b556de..52e48c43d 100644 --- a/src/daemon/handler/h_metadentry.cpp +++ b/src/daemon/handler/h_metadentry.cpp @@ -22,8 +22,8 @@ using namespace std; static hg_return_t rpc_srv_mk_node(hg_handle_t handle) { - rpc_mk_node_in_t in{0}; - rpc_err_out_t out{0}; + rpc_mk_node_in_t in; + rpc_err_out_t out; auto ret = margo_get_input(handle, &in); if (ret != HG_SUCCESS) -- GitLab From 176074d0ef28660dd57918de296f86c1969f0426 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 10 Sep 2019 11:33:54 +0200 Subject: [PATCH 29/71] Enable top-level submodules in CI --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 62f247711..4fbcc4943 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -17,6 +17,7 @@ variables: GKFS_LOG_LEVEL: "100" GKFS_DAEMON_LOG_PATH: "${CI_PROJECT_DIR}/logs/daemon.log" GKFS_PRELOAD_LOG_PATH: "${CI_PROJECT_DIR}/logs/preload.log" + GIT_SUBMODULE_STRATEGY: recursive image: gekkofs/gekkofs:build_env -- GitLab From 58055d57777efc08fb427e66f8e67ee9c09b804b Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Sat, 14 Sep 2019 20:00:28 +0200 Subject: [PATCH 30/71] Add tracking of internal fds Fixes an issue where client applications such as ssh might attempt to close all open fds, therefore also closing the fds used by the internal components of the library such as Mercury. --- CMakeLists.txt | 19 + include/client/preload_context.hpp | 15 + src/client/hooks.cpp | 7 + src/client/intercept.cpp | 781 +++++++++++++++++++---------- src/client/preload.cpp | 53 ++ src/client/preload_context.cpp | 36 ++ 6 files changed, 643 insertions(+), 268 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 63852192b..31e8231c9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,6 +112,25 @@ if(SYMLINK_SUPPORT) endif() message(STATUS "Symlink support: ${SYMLINK_SUPPORT}") +option(USE_BITSET_FOR_INTERNAL_FDS "Use std::bitset to track internal fds" ON) +if(USE_BITSET_FOR_INTERNAL_FDS) + add_definitions(-DUSE_BITSET_FOR_INTERNAL_FDS) + execute_process(COMMAND getconf OPEN_MAX + OUTPUT_VARIABLE GETCONF_MAX_FDS + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET) + if(NOT GETCONF_MAX_FDS) + set(GETCONF_MAX_FDS=512) + endif() + add_definitions(-DMAX_OPEN_FDS=${GETCONF_MAX_FDS}) +endif() + +message(STATUS "Use std::bitset for internal fd tracking: ${USE_BITSET_FOR_INTERNAL_FDS}") + +if(USE_BITSET_FOR_INTERNAL_FDS) + message(STATUS "Max open files: ${GETCONF_MAX_FDS}") +endif() + configure_file(include/global/configure.hpp.in include/global/configure.hpp) # Imported target diff --git a/include/client/preload_context.hpp b/include/client/preload_context.hpp index 1976b1a71..16ad7f610 100644 --- a/include/client/preload_context.hpp +++ b/include/client/preload_context.hpp @@ -22,6 +22,10 @@ #include #include +#ifdef USE_BITSET_FOR_INTERNAL_FDS +#include +#endif // USE_BITSET_FOR_INTERNAL_FDS + /* Forward declarations */ class OpenFileMap; class Distributor; @@ -67,6 +71,13 @@ class PreloadContext { bool interception_enabled_; +#ifdef USE_BITSET_FOR_INTERNAL_FDS + std::bitset internal_fds_; +#else + std::set internal_fds_; +#endif // USE_BITSET_FOR_INTERNAL_FDS + + public: static PreloadContext* getInstance() { static PreloadContext instance; @@ -109,6 +120,10 @@ class PreloadContext { void enable_interception(); void disable_interception(); bool interception_enabled() const; + + void register_internal_fd(int fd); + void unregister_internal_fd(int fd); + bool is_internal_fd(int fd) const; }; diff --git a/src/client/hooks.cpp b/src/client/hooks.cpp index 79e7125d6..a426fdad4 100644 --- a/src/client/hooks.cpp +++ b/src/client/hooks.cpp @@ -61,6 +61,13 @@ int hook_close(int fd) { CTX->file_map()->remove(fd); return 0; } + + if(CTX->is_internal_fd(fd)) { + // the client application (for some reason) is trying to close an + // internal fd: ignore it + return 0; + } + return syscall_no_intercept(SYS_close, fd); } diff --git a/src/client/intercept.cpp b/src/client/intercept.cpp index af8a59ccb..3f17a41af 100644 --- a/src/client/intercept.cpp +++ b/src/client/intercept.cpp @@ -27,8 +27,47 @@ #define NOT_HOOKED 1 #define HOOKED 0 +#if 0 +static void +log_write(const char *fmt, ...) +{ + int log_fd = 2; + if (log_fd < 0) + return; + + char buf[0x1000]; + int len; + va_list ap; + + va_start(ap, fmt); + len = vsnprintf(buf, sizeof(buf) - 1, fmt, ap); + va_end(ap); + + + if (len < 1) + return; + + buf[len++] = '\n'; + + syscall_no_intercept(SYS_write, log_fd, buf, len); +} +#endif + +static __thread bool reentrance_guard_flag; -static inline int hook(long syscall_number, + +/* + * hook_internal -- interception hook for internal syscalls + * + * This hook is basically used to keep track of file descriptors created + * internally by the library itself. This is important because some + * applications (e.g. ssh) may attempt to close all open file descriptors + * which would leave the library internals in an inconsistent state. + * We forward syscalls to the kernel but we keep track of any syscalls that may + * create or destroy a file descriptor so that we can mark them as 'internal'. + */ +static inline int +hook_internal(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5, long *result) @@ -36,277 +75,460 @@ static inline int hook(long syscall_number, switch (syscall_number) { - case SYS_open: - *result = hook_openat(AT_FDCWD, - reinterpret_cast(arg0), - static_cast(arg1), - static_cast(arg2)); - break; - - case SYS_creat: - *result = hook_openat(AT_FDCWD, - reinterpret_cast(arg0), - O_WRONLY | O_CREAT | O_TRUNC, - static_cast(arg1)); - break; - - case SYS_openat: - *result = hook_openat(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2), - static_cast(arg3)); - break; - - case SYS_close: - *result = hook_close(static_cast(arg0)); - break; - - case SYS_stat: - *result = hook_stat(reinterpret_cast(arg0), - reinterpret_cast(arg1)); - break; - - case SYS_lstat: - *result = hook_lstat(reinterpret_cast(arg0), - reinterpret_cast(arg1)); - break; - - case SYS_fstat: - *result = hook_fstat(static_cast(arg0), - reinterpret_cast(arg1)); - break; - - case SYS_newfstatat: - *result = hook_fstatat(static_cast(arg0), - reinterpret_cast(arg1), - reinterpret_cast(arg2), - static_cast(arg3)); - break; - - case SYS_read: - *result = hook_read(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2)); - break; - - case SYS_pread64: - *result = hook_pread(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2), - static_cast(arg3)); - break; - - case SYS_pwrite64: - *result = hook_pwrite(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2), - static_cast(arg3)); - break; - case SYS_write: - *result = hook_write(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2)); - break; - - case SYS_writev: - *result = hook_writev(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2)); - break; - - case SYS_pwritev: - *result = hook_pwritev(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2), - static_cast(arg3), - static_cast(arg4)); - break; - - case SYS_unlink: - *result = hook_unlinkat(AT_FDCWD, - reinterpret_cast(arg0), - 0); - break; + case SYS_open: + *result = syscall_no_intercept(syscall_number, + reinterpret_cast(arg0), + static_cast(arg1), + static_cast(arg2)); + + if(*result != -1) { + CTX->register_internal_fd(*result); + } + break; + + case SYS_creat: + *result = syscall_no_intercept(syscall_number, + reinterpret_cast(arg0), + O_WRONLY | O_CREAT | O_TRUNC, + static_cast(arg1)); + + if(*result != -1) { + CTX->register_internal_fd(*result); + } + break; - case SYS_unlinkat: - *result = hook_unlinkat(static_cast(arg0), + case SYS_openat: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), reinterpret_cast(arg1), + static_cast(arg2), + static_cast(arg3)); + + if(*result != -1) { + CTX->register_internal_fd(*result); + } + break; + + // epoll_create and epoll_create1 have the same prototype + case SYS_epoll_create: + case SYS_epoll_create1: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0)); + + if(*result != -1) { + CTX->register_internal_fd(*result); + } + break; + + case SYS_dup: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0)); + + if(*result != -1) { + CTX->register_internal_fd(*result); + } + break; + + case SYS_dup2: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + static_cast(arg1)); + + if(*result != -1) { + CTX->register_internal_fd(*result); + } + break; + + case SYS_dup3: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + static_cast(arg1), static_cast(arg2)); - break; - case SYS_rmdir: - *result = hook_unlinkat(AT_FDCWD, - reinterpret_cast(arg0), - AT_REMOVEDIR); - break; - - case SYS_symlink: - *result = hook_symlinkat(reinterpret_cast(arg0), - AT_FDCWD, - reinterpret_cast(arg1)); - break; - - case SYS_symlinkat: - *result = hook_symlinkat(reinterpret_cast(arg0), - static_cast(arg1), - reinterpret_cast(arg2)); - break; - - case SYS_access: - *result = hook_access(reinterpret_cast(arg0), - static_cast(arg1)); - break; - - case SYS_faccessat: - *result = hook_faccessat(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2)); - break; - - case SYS_lseek: - *result = hook_lseek(static_cast(arg0), - static_cast(arg1), - static_cast(arg2)); - break; - - case SYS_truncate: - *result = hook_truncate(reinterpret_cast(arg0), - static_cast(arg1)); - break; - - case SYS_ftruncate: - *result = hook_ftruncate(static_cast(arg0), - static_cast(arg1)); - break; - - case SYS_dup: - *result = hook_dup(static_cast(arg0)); - break; - - case SYS_dup2: - *result = hook_dup2(static_cast(arg0), - static_cast(arg1)); - break; - - case SYS_dup3: - *result = hook_dup3(static_cast(arg0), - static_cast(arg1), - static_cast(arg2)); - break; - - case SYS_getdents: - *result = hook_getdents(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2)); - break; - - case SYS_mkdirat: - *result = hook_mkdirat(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2)); - break; - - case SYS_mkdir: - *result = hook_mkdirat(AT_FDCWD, - reinterpret_cast(arg0), - static_cast(arg1)); - break; - - case SYS_chmod: - *result = hook_fchmodat(AT_FDCWD, - reinterpret_cast(arg0), - static_cast(arg1)); - break; + if(*result != -1) { + CTX->register_internal_fd(*result); + } + break; + + case SYS_inotify_init: + *result = syscall_no_intercept(syscall_number); + + if(*result != -1) { + CTX->register_internal_fd(*result); + } + break; + + case SYS_inotify_init1: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0)); + + if(*result != -1) { + CTX->register_internal_fd(*result); + } + break; - case SYS_fchmod: - *result = hook_fchmod(static_cast(arg0), - static_cast(arg1)); - break; + case SYS_perf_event_open: + *result = syscall_no_intercept(syscall_number, + reinterpret_cast(arg0), + static_cast(arg1), + static_cast(arg2), + static_cast(arg3), + static_cast(arg4)); + + if(*result != -1) { + CTX->register_internal_fd(*result); + } + break; + + case SYS_signalfd: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + reinterpret_cast(arg1)); + + if(*result != -1) { + CTX->register_internal_fd(*result); + } + break; + + case SYS_signalfd4: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + + if(*result != -1) { + CTX->register_internal_fd(*result); + } + break; + + case SYS_timerfd_create: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + static_cast(arg1)); + + if(*result != -1) { + CTX->register_internal_fd(*result); + } + break; + + case SYS_close: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0)); + CTX->unregister_internal_fd(*result); + break; + + default: + /* + * Ignore any other syscalls + * i.e.: pass them on to the kernel + * as would normally happen. + */ + + #ifndef NDEBUG + CTX->log()->trace("Syscall [{}, {}] Passthrough", + syscall_names[syscall_number], syscall_number); + #endif + return NOT_HOOKED; + } + + #ifndef NDEBUG + CTX->log()->trace("Syscall [{}, {}] Intercepted", + syscall_names[syscall_number], syscall_number); + #endif + +#if 0 + log_write("Internal syscall [%s, %d] = %d", syscall_names[syscall_number]);//, syscall_number, *result); +#endif - case SYS_fchmodat: - *result = hook_fchmodat(static_cast(arg0), - reinterpret_cast(arg1), + return HOOKED; + +} + +/* + * hook -- interception hook for application syscalls + * + * This hook is used to implement any application filesystem-related syscalls. + */ +static inline +int hook(long syscall_number, + long arg0, long arg1, long arg2, + long arg3, long arg4, long arg5, + long *result) +{ + + switch (syscall_number) { + + case SYS_open: + *result = hook_openat(AT_FDCWD, + reinterpret_cast(arg0), + static_cast(arg1), static_cast(arg2)); - break; - - case SYS_chdir: - *result = hook_chdir(reinterpret_cast(arg0)); - break; - - case SYS_fchdir: - *result = hook_fchdir(static_cast(arg0)); - break; - - case SYS_getcwd: - *result = hook_getcwd(reinterpret_cast(arg0), - static_cast(arg1)); - break; - - case SYS_readlink: - *result = hook_readlinkat(AT_FDCWD, - reinterpret_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2)); - break; - - case SYS_readlinkat: - *result = hook_readlinkat(static_cast(arg0), - reinterpret_cast(arg1), - reinterpret_cast(arg2), - static_cast(arg3)); - break; - - case SYS_fcntl: - *result = hook_fcntl(static_cast(arg0), - static_cast(arg1), - static_cast(arg2)); - break; - - case SYS_rename: - *result = hook_renameat(AT_FDCWD, - reinterpret_cast(arg0), - AT_FDCWD, - reinterpret_cast(arg1), - 0); - break; + break; - case SYS_renameat: - *result = hook_renameat(static_cast(arg0), - reinterpret_cast(arg1), + case SYS_creat: + *result = hook_openat(AT_FDCWD, + reinterpret_cast(arg0), + O_WRONLY | O_CREAT | O_TRUNC, + static_cast(arg1)); + break; + + case SYS_openat: + *result = hook_openat(static_cast(arg0), + reinterpret_cast(arg1), static_cast(arg2), - reinterpret_cast(arg3), - 0); - break; + static_cast(arg3)); + break; + + case SYS_close: + *result = hook_close(static_cast(arg0)); + break; + + case SYS_stat: + *result = hook_stat(reinterpret_cast(arg0), + reinterpret_cast(arg1)); + break; + + case SYS_lstat: + *result = hook_lstat(reinterpret_cast(arg0), + reinterpret_cast(arg1)); + break; + + case SYS_fstat: + *result = hook_fstat(static_cast(arg0), + reinterpret_cast(arg1)); + break; + + case SYS_newfstatat: + *result = hook_fstatat(static_cast(arg0), + reinterpret_cast(arg1), + reinterpret_cast(arg2), + static_cast(arg3)); + break; + + case SYS_read: + *result = hook_read(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + break; + + case SYS_pread64: + *result = hook_pread(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2), + static_cast(arg3)); + break; + + case SYS_pwrite64: + *result = hook_pwrite(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2), + static_cast(arg3)); + break; + case SYS_write: + *result = hook_write(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + break; + + case SYS_writev: + *result = hook_writev(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + break; + + case SYS_pwritev: + *result = hook_pwritev(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2), + static_cast(arg3), + static_cast(arg4)); + break; + + case SYS_unlink: + *result = hook_unlinkat(AT_FDCWD, + reinterpret_cast(arg0), + 0); + break; + + case SYS_unlinkat: + *result = hook_unlinkat(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + break; + + case SYS_rmdir: + *result = hook_unlinkat(AT_FDCWD, + reinterpret_cast(arg0), + AT_REMOVEDIR); + break; + + case SYS_symlink: + *result = hook_symlinkat(reinterpret_cast(arg0), + AT_FDCWD, + reinterpret_cast(arg1)); + break; + + case SYS_symlinkat: + *result = hook_symlinkat(reinterpret_cast(arg0), + static_cast(arg1), + reinterpret_cast(arg2)); + break; + + case SYS_access: + *result = hook_access(reinterpret_cast(arg0), + static_cast(arg1)); + break; + + case SYS_faccessat: + *result = hook_faccessat(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + break; + + case SYS_lseek: + *result = hook_lseek(static_cast(arg0), + static_cast(arg1), + static_cast(arg2)); + break; + + case SYS_truncate: + *result = hook_truncate(reinterpret_cast(arg0), + static_cast(arg1)); + break; + + case SYS_ftruncate: + *result = hook_ftruncate(static_cast(arg0), + static_cast(arg1)); + break; + + case SYS_dup: + *result = hook_dup(static_cast(arg0)); + break; + + case SYS_dup2: + *result = hook_dup2(static_cast(arg0), + static_cast(arg1)); + break; + + case SYS_dup3: + *result = hook_dup3(static_cast(arg0), + static_cast(arg1), + static_cast(arg2)); + break; + + case SYS_getdents: + *result = hook_getdents(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + break; - case SYS_renameat2: - *result = hook_renameat(static_cast(arg0), + case SYS_mkdirat: + *result = hook_mkdirat(static_cast(arg0), reinterpret_cast(arg1), - static_cast(arg2), - reinterpret_cast(arg3), - static_cast(arg4)); - break; - - case SYS_fstatfs: - *result = hook_fstatfs(static_cast(arg0), - reinterpret_cast(arg1)); - break; - - case SYS_statfs: - *result = hook_statfs(reinterpret_cast(arg0), - reinterpret_cast(arg1)); - break; - - default: - /* - * Ignore any other syscalls - * i.e.: pass them on to the kernel - * as would normally happen. - */ - - #ifndef NDEBUG - CTX->log()->trace("Syscall [{}, {}] Passthrough", syscall_names[syscall_number], syscall_number); - #endif - return NOT_HOOKED; + static_cast(arg2)); + break; + + case SYS_mkdir: + *result = hook_mkdirat(AT_FDCWD, + reinterpret_cast(arg0), + static_cast(arg1)); + break; + + case SYS_chmod: + *result = hook_fchmodat(AT_FDCWD, + reinterpret_cast(arg0), + static_cast(arg1)); + break; + + case SYS_fchmod: + *result = hook_fchmod(static_cast(arg0), + static_cast(arg1)); + break; + + case SYS_fchmodat: + *result = hook_fchmodat(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + break; + + case SYS_chdir: + *result = hook_chdir(reinterpret_cast(arg0)); + break; + + case SYS_fchdir: + *result = hook_fchdir(static_cast(arg0)); + break; + + case SYS_getcwd: + *result = hook_getcwd(reinterpret_cast(arg0), + static_cast(arg1)); + break; + + case SYS_readlink: + *result = hook_readlinkat(AT_FDCWD, + reinterpret_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + break; + + case SYS_readlinkat: + *result = hook_readlinkat(static_cast(arg0), + reinterpret_cast(arg1), + reinterpret_cast(arg2), + static_cast(arg3)); + break; + + case SYS_fcntl: + *result = hook_fcntl(static_cast(arg0), + static_cast(arg1), + static_cast(arg2)); + break; + + case SYS_rename: + *result = hook_renameat(AT_FDCWD, + reinterpret_cast(arg0), + AT_FDCWD, + reinterpret_cast(arg1), + 0); + break; + + case SYS_renameat: + *result = hook_renameat(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2), + reinterpret_cast(arg3), + 0); + break; + + case SYS_renameat2: + *result = hook_renameat(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2), + reinterpret_cast(arg3), + static_cast(arg4)); + break; + + case SYS_fstatfs: + *result = hook_fstatfs(static_cast(arg0), + reinterpret_cast(arg1)); + break; + + case SYS_statfs: + *result = hook_statfs(reinterpret_cast(arg0), + reinterpret_cast(arg1)); + break; + + default: + /* + * Ignore any other syscalls + * i.e.: pass them on to the kernel + * as would normally happen. + */ + + #ifndef NDEBUG + CTX->log()->trace("Syscall [{}, {}] Passthrough", syscall_names[syscall_number], syscall_number); + #endif + return NOT_HOOKED; } #ifndef NDEBUG @@ -316,8 +538,20 @@ static inline int hook(long syscall_number, } -static __thread bool guard_flag; - +/* + * hook_guard_wrapper -- a wrapper which can notice reentrance. + * + * The reentrance_guard_flag flag allows the library to distinguish the hooking + * of its own syscalls. E.g. while handling an open() syscall, + * libgkfs_intercept might call fopen(), which in turn uses an open() + * syscall internally. This internally used open() syscall is once again + * forwarded to libgkfs_intercept, but using this flag we can notice this + * case of reentering itself. + * + * XXX This approach still contains a very significant bug, as libgkfs_intercept + * being called inside a signal handler might easily forward a mock fd to the + * kernel. + */ int hook_guard_wrapper(long syscall_number, long arg0, long arg1, long arg2, @@ -326,19 +560,30 @@ hook_guard_wrapper(long syscall_number, { assert(CTX->interception_enabled()); - if (guard_flag) { - return NOT_HOOKED; - } +#if 0 + log_write("syscall %s called from %s", + syscall_names[syscall_number], + reentrance_guard_flag ? "gkfs" : "client"); +#endif int is_hooked; - guard_flag = true; + if (reentrance_guard_flag) { + int oerrno = errno; + is_hooked = hook_internal(syscall_number, + arg0, arg1, arg2, arg3, arg4, arg5, + syscall_return_value); + errno = oerrno; + return is_hooked; + } + + reentrance_guard_flag = true; int oerrno = errno; is_hooked = hook(syscall_number, arg0, arg1, arg2, arg3, arg4, arg5, syscall_return_value); errno = oerrno; - guard_flag = false; + reentrance_guard_flag = false; return is_hooked; } diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 0d689a335..2f712b574 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -26,6 +26,9 @@ #include #include +#include +#include + #include @@ -104,6 +107,33 @@ bool init_hermes_client(const std::string& transport_prefix) { return true; } +static inline std::set +query_open_fds() { + + std::set fds; + const std::string path{"/proc/self/fd"}; + + std::unique_ptr dirp( + ::opendir(path.c_str()), + closedir); + + struct dirent entry; + struct dirent *result; + + while (::readdir_r(dirp.get(), &entry, &result) == 0 && result != NULL) { + const std::string name{entry.d_name}; + + if(name == "." || name == ".." || + std::stoi(name) == dirfd(dirp.get())) { + continue; + } + + fds.insert(std::stoi(name)); + } + + return fds; +} + /** * This function is only called in the preload constructor and initializes @@ -111,6 +141,17 @@ bool init_hermes_client(const std::string& transport_prefix) { */ void init_ld_environment_() { + // Client applications such as ssh attempt to close all open file + // descriptors, which causes havoc with the interception library's internal + // state. To account for this, in the interception code we keep track of + // internal fds by distinguishing between internal syscalls (i.e. those + // coming from internal code) application syscalls. The problem is that + // at this point in initialization we have not enabled interception yet, + // but the initialization process itself needs to create file descriptors. + // To solve this problem, we find out which fds are created by the + // initialization process and manually protect them at this point + auto pre_init_fds = query_open_fds(); + // initialize Hermes interface to Mercury if (!init_hermes_client(RPC_PROTOCOL)) { exit_error_msg(EXIT_FAILURE, "Unable to initialize Hermes RPC client"); @@ -130,6 +171,17 @@ void init_ld_environment_() { exit_error_msg(EXIT_FAILURE, "Unable to fetch file system configurations from daemon process through RPC."); } + auto post_init_fds = query_open_fds(); + std::set internal_fds{3}; // fd 3 is created by the logging system + + std::set_difference(post_init_fds.begin(), post_init_fds.end(), + pre_init_fds.begin(), pre_init_fds.end(), + std::inserter(internal_fds, internal_fds.end())); + + for(const auto& fd : internal_fds) { + CTX->register_internal_fd(fd); + } + CTX->log()->info("{}() Environment initialization successful.", __func__); } @@ -178,6 +230,7 @@ void log_prog_name() { * Called initially ONCE when preload library is used with the LD_PRELOAD environment variable */ void init_preload() { + init_logging(); CTX->log()->debug("Initialized logging subsystem"); log_prog_name(); diff --git a/src/client/preload_context.cpp b/src/client/preload_context.cpp index 454e2e8ef..d8a5e87bc 100644 --- a/src/client/preload_context.cpp +++ b/src/client/preload_context.cpp @@ -172,3 +172,39 @@ bool PreloadContext::interception_enabled() const { return interception_enabled_; } +void PreloadContext::register_internal_fd(int fd) { + +#ifdef USE_BITSET_FOR_INTERNAL_FDS + internal_fds_.set(fd); +#else + decltype(internal_fds_)::iterator it; + bool inserted; + + std::tie(it, inserted) = internal_fds_.insert(fd); + assert(inserted); +#endif // USE_BITSET_FOR_INTERNAL_FDS + +} + +void PreloadContext::unregister_internal_fd(int fd) { + +#ifdef USE_BITSET_FOR_INTERNAL_FDS + internal_fds_.reset(fd); +#else + std::size_t n = internal_fds_.erase(fd); + assert(n == 1); +#endif // USE_BITSET_FOR_INTERNAL_FDS + +} + +bool PreloadContext::is_internal_fd(int fd) const { + +#ifdef USE_BITSET_FOR_INTERNAL_FDS + return internal_fds_[fd]; +#else + return internal_fds_.count(fd) != 0; +#endif // USE_BITSET_FOR_INTERNAL_FDS + +} + + -- GitLab From 96665c42605b253d3593101b1b54277c47af2bb7 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Sat, 14 Sep 2019 20:04:29 +0200 Subject: [PATCH 31/71] Remove Margo/Argobots from client dependencies --- src/client/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/client/CMakeLists.txt b/src/client/CMakeLists.txt index 4807923dc..e053728c0 100644 --- a/src/client/CMakeLists.txt +++ b/src/client/CMakeLists.txt @@ -49,10 +49,8 @@ target_link_libraries(gkfs_intercept # external Syscall_intercept::Syscall_intercept dl - ${ABT_LIBRARIES} mercury hermes - ${MARGO_LIBRARIES} Boost::boost # needed for tokenizer header Threads::Threads ) -- GitLab From 23cdcf1c195d6df1431483ea29124dde9f5867b9 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Wed, 18 Sep 2019 14:47:01 +0200 Subject: [PATCH 32/71] Update Hermes submodule --- external/hermes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/hermes b/external/hermes index 5c72fe06d..38b6bbfe7 160000 --- a/external/hermes +++ b/external/hermes @@ -1 +1 @@ -Subproject commit 5c72fe06d390868bf1345a8767033c249997d2b7 +Subproject commit 38b6bbfe77806b527f4e4b4157feea658f555be6 -- GitLab From 139fcc1fe89f2e2c56681b6e6e3db679558182fc Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Wed, 18 Sep 2019 14:48:26 +0200 Subject: [PATCH 33/71] Fix std::vector bug when determining hosts --- src/client/preload_util.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/client/preload_util.cpp b/src/client/preload_util.cpp index 164f765b0..40294cf11 100644 --- a/src/client/preload_util.cpp +++ b/src/client/preload_util.cpp @@ -162,7 +162,7 @@ void load_hosts() { bool local_host_found = false; std::vector addrs; - addrs.reserve(hosts.size()); + addrs.resize(hosts.size()); vector host_ids(hosts.size()); // populate vector with [0, ..., host_size - 1] @@ -179,13 +179,10 @@ void load_hosts() { // lookup addresses and put abstract server addresses into rpc_addressesre for (const auto& id: host_ids) { - const auto& hostname = hosts.at(id).first; - const auto& uri = hosts.at(id).second; + const auto& hostname = hosts.at(id).first; + const auto& uri = hosts.at(id).second; - auto endp = ::lookup_endpoint(uri); - - auto it = std::next(addrs.begin(), id); - addrs.emplace(it, endp); + addrs[id] = ::lookup_endpoint(uri); if (!local_host_found && hostname == local_hostname) { CTX->log()->debug("{}() Found local host: {}", __func__, hostname); -- GitLab From ebd1173e1d3c7646b755fefee468d6b8face9ce2 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Wed, 18 Sep 2019 16:32:42 +0200 Subject: [PATCH 34/71] Fix bug in rm_node due to async_engine::broadcast --- src/client/rpc/ld_rpc_metadentry.cpp | 55 +++++++++++++++++++--------- src/daemon/handler/h_metadentry.cpp | 2 +- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/src/client/rpc/ld_rpc_metadentry.cpp b/src/client/rpc/ld_rpc_metadentry.cpp index 5fd8fce94..bdac7bfa4 100644 --- a/src/client/rpc/ld_rpc_metadentry.cpp +++ b/src/client/rpc/ld_rpc_metadentry.cpp @@ -159,34 +159,55 @@ int rm_node(const std::string& path, const bool remove_metadentry_only) { std::vector> handles; - hermes::endpoint_set endps; + for (const auto& endp : CTX->hosts()) { + try { + CTX->log()->trace("{}() Sending RPC to host: {}", + __func__, endp.to_string()); - std::copy(CTX->hosts().begin(), - CTX->hosts().end(), - std::back_inserter(endps)); + gkfs::rpc::remove::input in(path); - try { + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that + // we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + handles.emplace_back( + ld_network_service->post(endp, in)); + + } catch (const std::exception& ex) { + // TODO(amiranda): we should cancel all previously posted requests + // here, unfortunately, Hermes does not support it yet :/ + CTX->log()->error("{}() Failed to send request to host: {}", + __func__, endp.to_string()); + throw std::runtime_error("Failed to forward non-blocking rpc request"); + } + } - auto output_set = - ld_network_service->broadcast(endps, path).get(); + // wait for RPC responses + bool got_error = false; - // Wait for RPC responses and then get response - for (const auto& out : output_set) { - CTX->log()->debug("{}() Got response success: {}", __func__, out.err()); + for(const auto& h : handles) { + + try { + // XXX We might need a timeout here to not wait forever for an + // output that never comes? + auto out = h.get().at(0); if(out.err() != 0) { + CTX->log()->error("{}() received error response: {}", + __func__, out.err()); + got_error = true; errno = out.err(); - return -1; } + } catch(const std::exception& ex) { + CTX->log()->error("{}() while getting rpc output", __func__); + got_error = true; + errno = EBUSY; } + } - return 0; + return got_error ? -1 : 0; - } catch(const std::exception& ex) { - CTX->log()->error("{}() while getting rpc output", __func__); - errno = EBUSY; - return -1; - } } diff --git a/src/daemon/handler/h_metadentry.cpp b/src/daemon/handler/h_metadentry.cpp index 52e48c43d..fad762fbb 100644 --- a/src/daemon/handler/h_metadentry.cpp +++ b/src/daemon/handler/h_metadentry.cpp @@ -132,7 +132,7 @@ static hg_return_t rpc_srv_rm_node(hg_handle_t handle) { if (ret != HG_SUCCESS) ADAFS_DATA->spdlogger()->error("{}() Failed to retrieve input from handle", __func__); assert(ret == HG_SUCCESS); - ADAFS_DATA->spdlogger()->debug("Got remove node RPC with path {}", in.path); + ADAFS_DATA->spdlogger()->debug("Got remove node RPC with path '{}'", in.path); try { // Remove metadentry if exists on the node -- GitLab From b7b88817975df94c5e6e0112aff1158932729313 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Wed, 18 Sep 2019 16:43:53 +0200 Subject: [PATCH 35/71] Add missing header to hooks.hpp --- include/client/hooks.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/client/hooks.hpp b/include/client/hooks.hpp index bf5d621dc..98c0aae94 100644 --- a/include/client/hooks.hpp +++ b/include/client/hooks.hpp @@ -14,6 +14,7 @@ #ifndef IFS_HOOKS_HPP #define IFS_HOOKS_HPP +#include #include -- GitLab From 158d1db34efa7d04bb6c999140e1afe43478cf4d Mon Sep 17 00:00:00 2001 From: Ramon Nou Date: Thu, 3 Oct 2019 13:03:55 +0000 Subject: [PATCH 36/71] Resolve "Improve delete performance for small files" --- include/client/rpc/ld_rpc_metadentry.hpp | 2 +- src/client/adafs_functions.cpp | 4 +-- src/client/rpc/ld_rpc_metadentry.cpp | 36 ++++++++++++++++++++++-- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/include/client/rpc/ld_rpc_metadentry.hpp b/include/client/rpc/ld_rpc_metadentry.hpp index 1abb33086..fe260a5ea 100644 --- a/include/client/rpc/ld_rpc_metadentry.hpp +++ b/include/client/rpc/ld_rpc_metadentry.hpp @@ -29,7 +29,7 @@ int mk_node(const std::string& path, mode_t mode); int stat(const std::string& path, std::string& attr); -int rm_node(const std::string& path, const bool remove_metadentry_only); +int rm_node(const std::string& path, const bool remove_metadentry_only, const ssize_t size); int decr_size(const std::string& path, size_t length); diff --git a/src/client/adafs_functions.cpp b/src/client/adafs_functions.cpp index 2331aeab1..47dc1c4bf 100644 --- a/src/client/adafs_functions.cpp +++ b/src/client/adafs_functions.cpp @@ -173,7 +173,7 @@ int adafs_rm_node(const std::string& path) { return -1; } bool has_data = S_ISREG(md->mode()) && (md->size() != 0); - return rpc_send::rm_node(path, !has_data); + return rpc_send::rm_node(path, !has_data, md->size()); } int adafs_access(const std::string& path, const int mask, bool follow_links) { @@ -517,7 +517,7 @@ int adafs_rmdir(const std::string& path) { errno = ENOTEMPTY; return -1; } - return rpc_send::rm_node(path, true); + return rpc_send::rm_node(path, true, 0); } diff --git a/src/client/rpc/ld_rpc_metadentry.cpp b/src/client/rpc/ld_rpc_metadentry.cpp index bdac7bfa4..b41faf8a9 100644 --- a/src/client/rpc/ld_rpc_metadentry.cpp +++ b/src/client/rpc/ld_rpc_metadentry.cpp @@ -118,7 +118,7 @@ int decr_size(const std::string& path, size_t length) { } } -int rm_node(const std::string& path, const bool remove_metadentry_only) { +int rm_node(const std::string& path, const bool remove_metadentry_only, const ssize_t size) { // if only the metadentry should be removed, send one rpc to the // metadentry's responsible node to remove the metadata @@ -159,6 +159,35 @@ int rm_node(const std::string& path, const bool remove_metadentry_only) { std::vector> handles; + if ((size / CHUNKSIZE) < CTX->hosts().size()) { // Small files + auto endp = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(path)); + + try { + CTX->log()->trace("{}() Sending RPC to host: {}", + __func__, endp.to_string()); + gkfs::rpc::remove::input in(path); + handles.emplace_back(ld_network_service->post(endp,in)); + + auto chnk_start = 0; + auto chnk_end = size/CHUNKSIZE; + + for (uint64_t chnk_id = chnk_start; chnk_id <= chnk_end; chnk_id++) { + auto target = CTX->hosts().at(CTX->distributor()->locate_data(path, chnk_id)); + + CTX->log()->trace("{}() Sending RPC to host: {}", + __func__, target.to_string()); + + handles.emplace_back( + ld_network_service->post(target, in)); + } + } catch (const std::exception & ex) { + CTX->log()->error("{}() Failed to send reduced remove requests", + __func__); + throw std::runtime_error("Failed to forward non-blocking rpc request"); + } + } + else { // "Big" files for (const auto& endp : CTX->hosts()) { try { CTX->log()->trace("{}() Sending RPC to host: {}", @@ -171,6 +200,9 @@ int rm_node(const std::string& path, const bool remove_metadentry_only) { // TODO(amiranda): hermes will eventually provide a post(endpoint) // returning one result and a broadcast(endpoint_set) returning a // result_set. When that happens we can remove the .at(0) :/ + // + // + handles.emplace_back( ld_network_service->post(endp, in)); @@ -182,7 +214,7 @@ int rm_node(const std::string& path, const bool remove_metadentry_only) { throw std::runtime_error("Failed to forward non-blocking rpc request"); } } - + } // wait for RPC responses bool got_error = false; -- GitLab From b36494f320e211ee2b91117ca5a54621f834d1c4 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Thu, 10 Oct 2019 14:58:29 +0200 Subject: [PATCH 37/71] Cherry pick getdents64() implementation --- include/client/adafs_functions.hpp | 4 ++ include/client/hooks.hpp | 2 +- src/client/adafs_functions.cpp | 63 +++++++++++++++++++++++++++++- src/client/hooks.cpp | 10 +++++ src/client/intercept.cpp | 6 +++ 5 files changed, 82 insertions(+), 3 deletions(-) diff --git a/include/client/adafs_functions.hpp b/include/client/adafs_functions.hpp index 2c304f616..6e82913e8 100644 --- a/include/client/adafs_functions.hpp +++ b/include/client/adafs_functions.hpp @@ -69,6 +69,10 @@ int getdents(unsigned int fd, struct linux_dirent *dirp, unsigned int count); +int getdents64(unsigned int fd, + struct linux_dirent64 *dirp, + unsigned int count); + int adafs_rmdir(const std::string& path); #endif //IFS_ADAFS_FUNCTIONS_HPP diff --git a/include/client/hooks.hpp b/include/client/hooks.hpp index 98c0aae94..4e978a795 100644 --- a/include/client/hooks.hpp +++ b/include/client/hooks.hpp @@ -17,7 +17,6 @@ #include #include - int hook_openat(int dirfd, const char *cpath, int flags, mode_t mode); int hook_close(int fd); int hook_stat(const char* path, struct stat* buf); @@ -42,6 +41,7 @@ int hook_dup(unsigned int fd); int hook_dup2(unsigned int oldfd, unsigned int newfd); int hook_dup3(unsigned int oldfd, unsigned int newfd, int flags); int hook_getdents(unsigned int fd, struct linux_dirent *dirp, unsigned int count); +int hook_getdents64(unsigned int fd, struct linux_dirent64 *dirp, unsigned int count); int hook_mkdirat(int dirfd, const char * cpath, mode_t mode); int hook_fchmodat(int dirfd, const char* path, mode_t mode); int hook_fchmod(unsigned int dirfd, mode_t mode); diff --git a/src/client/adafs_functions.cpp b/src/client/adafs_functions.cpp index 2331aeab1..5834e94f3 100644 --- a/src/client/adafs_functions.cpp +++ b/src/client/adafs_functions.cpp @@ -23,8 +23,6 @@ #include #include -#include - #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) @@ -37,6 +35,14 @@ struct linux_dirent { char d_name[1]; }; +struct linux_dirent64 { + unsigned long long d_ino; + unsigned long long d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[1]; +}; + using namespace std; int adafs_open(const std::string& path, mode_t mode, int flags) { @@ -572,6 +578,59 @@ int getdents(unsigned int fd, return written; } + +int getdents64(unsigned int fd, + struct linux_dirent64 *dirp, + unsigned int count) { + CTX->log()->trace("{}() called on fd: {}, count {}", __func__, fd, count); + auto open_dir = CTX->file_map()->get_dir(fd); + if(open_dir == nullptr){ + //Cast did not succeeded: open_file is a regular file + errno = EBADF; + return -1; + } + + auto pos = open_dir->pos(); + if (pos >= open_dir->size()) { + return 0; + } + + unsigned int written = 0; + struct linux_dirent64 * current_dirp = nullptr; + while(pos < open_dir->size()) { + DirEntry de = open_dir->getdent(pos); + auto total_size = ALIGN(offsetof(struct linux_dirent64, d_name) + + de.name().size() + 3, sizeof(long)); + if (total_size > (count - written)) { + //no enough space left on user buffer to insert next dirent + break; + } + current_dirp = reinterpret_cast( + reinterpret_cast(dirp) + written); + current_dirp->d_ino = std::hash()( + open_dir->path() + "/" + de.name()); + + current_dirp->d_reclen = total_size; + current_dirp->d_type = ((de.type() == FileType::regular)? DT_REG : DT_DIR); + + + + CTX->log()->trace("{}() name {}: {}", __func__, pos, de.name()); + std::strcpy(&(current_dirp->d_name[0]), de.name().c_str()); + ++pos; + current_dirp->d_off = pos; + written += total_size; + } + + if (written == 0) { + errno = EINVAL; + return -1; + } + open_dir->pos(pos); + return written; +} + + #ifdef HAS_SYMLINKS int adafs_mk_symlink(const std::string& path, const std::string& target_path) { diff --git a/src/client/hooks.cpp b/src/client/hooks.cpp index a426fdad4..40f5d254f 100644 --- a/src/client/hooks.cpp +++ b/src/client/hooks.cpp @@ -354,6 +354,16 @@ int hook_getdents(unsigned int fd, struct linux_dirent *dirp, unsigned int count return syscall_no_intercept(SYS_getdents, fd, dirp, count); } + +int hook_getdents64(unsigned int fd, struct linux_dirent64 *dirp, unsigned int count) { + CTX->log()->trace("{}() called with fd {}, count {}", __func__, fd, count); + if (CTX->file_map()->exist(fd)) { + return with_errno(getdents64(fd, dirp, count)); + } + return syscall_no_intercept(SYS_getdents64, fd, dirp, count); +} + + int hook_mkdirat(int dirfd, const char * cpath, mode_t mode) { CTX->log()->trace("{}() called with fd: {}, path: {}, mode: {}", __func__, dirfd, cpath, mode); diff --git a/src/client/intercept.cpp b/src/client/intercept.cpp index 3f17a41af..530553b98 100644 --- a/src/client/intercept.cpp +++ b/src/client/intercept.cpp @@ -422,6 +422,12 @@ int hook(long syscall_number, static_cast(arg2)); break; + case SYS_getdents64: + *result = hook_getdents64(static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + break; + case SYS_mkdirat: *result = hook_mkdirat(static_cast(arg0), reinterpret_cast(arg1), -- GitLab From 59e08a5d5dac8f5206a7e97eb0e5865706e7992c Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 4 Nov 2019 11:23:32 +0100 Subject: [PATCH 38/71] Update hermes --- external/hermes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/hermes b/external/hermes index 38b6bbfe7..2e578554d 160000 --- a/external/hermes +++ b/external/hermes @@ -1 +1 @@ -Subproject commit 38b6bbfe77806b527f4e4b4157feea658f555be6 +Subproject commit 2e578554d52d734eec83b5def2602dde7b6ce570 -- GitLab From 9ab384e8ce12c29181546e86423ec1a884af0ffe Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 4 Nov 2019 11:46:18 +0100 Subject: [PATCH 39/71] Replace spdlog with internal logging framework --- CMakeLists.txt | 19 +- include/client/env.hpp | 37 + include/client/intercept.hpp | 7 + include/client/logging.hpp | 495 ++++++++++++ include/client/make_array.hpp | 35 + include/client/preload_context.hpp | 19 +- include/client/syscall_names.hpp | 353 --------- include/client/syscalls.hpp | 21 + include/client/syscalls/args.hpp | 716 ++++++++++++++++++ include/client/syscalls/decoder.hpp | 110 +++ include/client/syscalls/detail/syscall_info.h | 80 ++ include/client/syscalls/errno.hpp | 581 ++++++++++++++ include/client/syscalls/rets.hpp | 136 ++++ include/client/syscalls/syscall.hpp | 207 +++++ include/daemon/env.hpp | 34 + include/global/configure.hpp.in | 7 +- include/global/env_util.hpp | 15 +- scripts/compile_dep.sh | 283 ++++--- scripts/dl_dep.sh | 2 + src/client/CMakeLists.txt | 15 +- src/client/adafs_functions.cpp | 88 +-- src/client/hooks.cpp | 240 ++++-- src/client/intercept.cpp | 471 ++++++++---- src/client/logging.cpp | 310 ++++++++ src/client/open_file_map.cpp | 5 +- src/client/preload.cpp | 144 ++-- src/client/preload_context.cpp | 118 ++- src/client/preload_util.cpp | 37 +- src/client/resolve.cpp | 55 +- src/client/rpc/ld_rpc_data_ws.cpp | 64 +- src/client/rpc/ld_rpc_management.cpp | 9 +- src/client/rpc/ld_rpc_metadentry.cpp | 163 ++-- src/client/syscalls/detail/syscall_info.c | 498 ++++++++++++ src/daemon/main.cpp | 12 +- src/global/env_util.cpp | 27 +- 35 files changed, 4395 insertions(+), 1018 deletions(-) create mode 100644 include/client/env.hpp create mode 100644 include/client/logging.hpp create mode 100644 include/client/make_array.hpp delete mode 100644 include/client/syscall_names.hpp create mode 100644 include/client/syscalls.hpp create mode 100644 include/client/syscalls/args.hpp create mode 100644 include/client/syscalls/decoder.hpp create mode 100644 include/client/syscalls/detail/syscall_info.h create mode 100644 include/client/syscalls/errno.hpp create mode 100644 include/client/syscalls/rets.hpp create mode 100644 include/client/syscalls/syscall.hpp create mode 100644 include/daemon/env.hpp create mode 100644 src/client/logging.cpp create mode 100644 src/client/syscalls/detail/syscall_info.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 31e8231c9..ae28bd595 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,9 +27,13 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") # Compiler flags for various cmake build types set(WARNINGS_FLAGS "-Wall -Wextra --pedantic -Wno-unused-parameter -Wno-missing-field-initializers") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG -O3") -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${WARNINGS_FLAGS} -g -O0") +set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${WARNINGS_FLAGS} -g -O0 -DGKFS_DEBUG_BUILD") set(CMAKE_CXX_FLAGS_MEMCHECK "${WARNINGS_FLAGS} -g -O0 -fsanitize=address -fno-omit-frame-pointer") set(CMAKE_CXX_FLAGS_MAINTAINER "${WARNINGS_FLAGS} -g -O0 -pg -no-pie") +set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -DNDEBUG -O3") +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${WARNINGS_FLAGS} -g -O0 -DGKFS_DEBUG_BUILD") +set(CMAKE_C_FLAGS_MEMCHECK "${WARNINGS_FLAGS} -g -O0 -fsanitize=address -fno-omit-frame-pointer") +set(CMAKE_C_FLAGS_MAINTAINER "${WARNINGS_FLAGS} -g -O0 -pg -no-pie") mark_as_advanced(CMAKE_CXX_FLAGS_MAINTAINER) # Project version @@ -94,6 +98,8 @@ find_package(Boost 1.53 REQUIRED find_package(Threads REQUIRED) +find_package(Date REQUIRED) + set(RPC_PROTOCOL "ofi+tcp" CACHE STRING "Communication plugin used for RPCs") set_property(CACHE RPC_PROTOCOL PROPERTY STRINGS "bmi+tcp" @@ -112,6 +118,10 @@ if(SYMLINK_SUPPORT) endif() message(STATUS "Symlink support: ${SYMLINK_SUPPORT}") +set(MAX_INTERNAL_FDS 256 CACHE STRING "Number of file descriptors reserved for internal use") +add_definitions(-DMAX_INTERNAL_FDS=${MAX_INTERNAL_FDS}) +message(STATUS "File descriptors reserved for internal use: ${MAX_INTERNAL_FDS}") + option(USE_BITSET_FOR_INTERNAL_FDS "Use std::bitset to track internal fds" ON) if(USE_BITSET_FOR_INTERNAL_FDS) add_definitions(-DUSE_BITSET_FOR_INTERNAL_FDS) @@ -131,6 +141,13 @@ if(USE_BITSET_FOR_INTERNAL_FDS) message(STATUS "Max open files: ${GETCONF_MAX_FDS}") endif() + +option(ENABLE_LOGGING "Disable all logging messages" ON) +if(NOT ENABLE_LOGGING) + add_definitions(-DGKFS_DISABLE_LOGGING) +endif() +message(STATUS "Logging output: ${ENABLE_LOGGING}") + configure_file(include/global/configure.hpp.in include/global/configure.hpp) # Imported target diff --git a/include/client/env.hpp b/include/client/env.hpp new file mode 100644 index 000000000..61f92981b --- /dev/null +++ b/include/client/env.hpp @@ -0,0 +1,37 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GKFS_CLIENT_ENV +#define GKFS_CLIENT_ENV + +#include + +#define ADD_PREFIX(str) CLIENT_ENV_PREFIX str + +/* Environment variables for the GekkoFS client */ +namespace gkfs { +namespace env { + +static constexpr auto LOG = ADD_PREFIX("LOG"); +static constexpr auto LOG_OUTPUT = ADD_PREFIX("LOG_OUTPUT"); +static constexpr auto LOG_OUTPUT_TRUNC = ADD_PREFIX("LOG_OUTPUT_TRUNC"); +static constexpr auto CWD = ADD_PREFIX("CWD"); +static constexpr auto HOSTS_FILE = ADD_PREFIX("HOSTS_FILE"); + +} // namespace env +} // namespace gkfs + +#undef ADD_PREFIX + +#endif // GKFS_CLIENT_ENV + diff --git a/include/client/intercept.hpp b/include/client/intercept.hpp index 9ba21eeb5..f3b590d92 100644 --- a/include/client/intercept.hpp +++ b/include/client/intercept.hpp @@ -14,12 +14,19 @@ #ifndef IFS_INTERCEPT_HPP #define IFS_INTERCEPT_HPP +int +internal_hook_guard_wrapper(long syscall_number, + long arg0, long arg1, long arg2, + long arg3, long arg4, long arg5, + long *syscall_return_value); + int hook_guard_wrapper(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5, long *syscall_return_value); +void start_self_interception(); void start_interception(); void stop_interception(); diff --git a/include/client/logging.hpp b/include/client/logging.hpp new file mode 100644 index 000000000..04fbdd032 --- /dev/null +++ b/include/client/logging.hpp @@ -0,0 +1,495 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef LIBGKFS_LOGGING_HPP +#define LIBGKFS_LOGGING_HPP + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace gkfs { +namespace log { + +enum class log_level : short { + print_syscalls = 1 << 0, + print_syscalls_entry = 1 << 1, + print_info = 1 << 2, + print_critical = 1 << 3, + print_errors = 1 << 4, + print_warnings = 1 << 5, + print_mercury = 1 << 6, + print_debug = 1 << 7, + + // for internal use + print_none = 0, + print_all = print_syscalls | print_syscalls_entry | print_info | + print_critical | print_errors | print_warnings | + print_mercury | print_debug, + print_most = print_all & ~print_syscalls_entry, + print_help = 1 << 10 +}; + +inline constexpr log_level +operator&(log_level l1, log_level l2) { + return log_level(static_cast(l1) & + static_cast(l2)); +} + +inline constexpr log_level +operator|(log_level l1, log_level l2) { + return log_level(static_cast(l1) | + static_cast(l2)); +} + +inline constexpr log_level +operator^(log_level l1, log_level l2) { + return log_level(static_cast(l1) ^ + static_cast(l2)); +} + +inline constexpr log_level +operator~(log_level l1) { + return log_level(~static_cast(l1)); +} + +inline constexpr bool +operator!(log_level dm) { + return static_cast(dm) == 0; +} + +inline const log_level& +operator|=(log_level& l1, log_level l2) { + return l1 = l1 | l2; +} + +inline const log_level& +operator&=(log_level& l1, log_level l2) { + return l1 = l1 & l2; +} + +inline const log_level& +operator^=(log_level& l1, log_level l2) { + return l1 = l1 ^ l2; +} + + +static const auto constexpr syscall = log_level::print_syscalls; +static const auto constexpr syscall_at_entry = log_level::print_syscalls_entry; +static const auto constexpr info = log_level::print_info; +static const auto constexpr critical = log_level::print_critical; +static const auto constexpr error = log_level::print_errors; +static const auto constexpr warning = log_level::print_warnings; +static const auto constexpr mercury = log_level::print_mercury; +static const auto constexpr debug = log_level::print_debug; +static const auto constexpr none = log_level::print_none; +static const auto constexpr most = log_level::print_most; +static const auto constexpr all = log_level::print_all; +static const auto constexpr help = log_level::print_help; + +static const auto constexpr level_names = + utils::make_array( + "syscall", + "syscall", // sycall_entry uses the same name as syscall + "info", + "critical", + "error", + "warning", + "mercury", + "debug" +); + +inline constexpr auto +lookup_level_name(log_level l) { + + assert(l != log::none && l != log::help); + + // since all log levels are powers of 2, we can find a name + // very efficiently by counting the number of trailing 0-bits in l + const auto i = __builtin_ctz(static_cast(l)); + assert(i >= 0 && static_cast(i) < level_names.size()); + + return level_names.at(i); +} + + +// forward declaration +struct logger; + +namespace detail { + +enum { inline_buffer_size = 0x1000 }; + +using safe_buffer = fmt::basic_memory_buffer; + +template +static inline void +log_buffer(std::FILE* fp, + Buffer&& buffer) { + log_buffer(::fileno(fp), std::forward(buffer)); +} + +template +static inline void +log_buffer(int fd, + Buffer&& buffer) { + + if(fd < 0) { + throw std::runtime_error("Invalid file descriptor"); + } + + ::syscall_no_intercept(SYS_write, fd, buffer.data(), buffer.size()); +} + +static inline void +log_buffer(int fd, + const void* buffer, + std::size_t length) { + if(fd < 0) { + throw std::runtime_error("Invalid file descriptor"); + } + + ::syscall_no_intercept(SYS_write, fd, buffer, length); +} + +/** + * format_timestamp_to - safely format a timestamp for logging messages + * + * This function produes a timestamp that can be used to prefix logging + * messages. Since we are actively intercepting system calls, the formatting + * MUST NOT rely on internal system calls, otherwise we risk recursively + * calling ourselves for each syscall generated. Also, we cannot rely on + * the C formatting functions asctime, ctime, gmtime, localtime, mktime, + * asctime_r, ctime_r, gmtime_r, localtime_r, since they acquire a + * non-reentrant lock to determine the caller's timezone (yes, the assumedly + * reentrant *_r versions of the functions exhibit this problem as well, + * see https://sourceware.org/bugzilla/show_bug.cgi?id=16145). To solve this + * issue and still get readable timestamps, we determine and cache the + * timezone when the logger is created so that the lock is only held once, by + * one thread exactly, and we pass it as an argument whenever we need to + * format a timestamp. If no timezone is provided, we just format the epoch. + * + * NOTE: we use the date C++ library to query the timezone database and + * to format the timestamps. + */ +template +static inline void +format_timestamp_to(Buffer&& buffer, + const date::time_zone * const timezone = nullptr) { + + struct ::timeval tv; + + int rv = ::syscall_no_intercept(SYS_gettimeofday, &tv, NULL); + + if(::syscall_error_code(rv) != 0) { + return; + } + + date::sys_time now{ + std::chrono::seconds{tv.tv_sec} + + std::chrono::microseconds{tv.tv_usec}}; + + if(!timezone) { + fmt::format_to(buffer, "[{}] ", now.time_since_epoch().count()); + return; + } + + fmt::format_to(buffer, "[{}] ", + date::zoned_time{timezone, now}); +} + +template +static inline void +format_syscall_info_to(Buffer&& buffer, + gkfs::syscall::info info) { + + const auto ttid = syscall_no_intercept(SYS_gettid); + fmt::format_to(buffer, "[{}] [syscall] ", ttid); + + char o; + char t; + + switch(gkfs::syscall::origin(info)) { + case gkfs::syscall::from_internal_code: + o = 'i'; + break; + case gkfs::syscall::from_external_code: + o = 'a'; + break; + default: + o = '?'; + break; + } + + switch(gkfs::syscall::target(info)) { + case gkfs::syscall::to_hook: + t = 'h'; + break; + case gkfs::syscall::to_kernel: + t = 'k'; + break; + default: + t = '?'; + break; + } + + const std::array tmp = {'[', o, t, ']', ' '}; + fmt::format_to(buffer, fmt::string_view(tmp.data(), tmp.size())); +} + + + +} // namespace detail + +struct logger { + + logger(const std::string& opts, + const std::string& path, + bool trunc); + + ~logger(); + + template + inline void + log(log_level level, + const char * const func, + const int lineno, + Args&&... args) { + + if(!(level & log_mask_)) { + return; + } + + detail::safe_buffer buffer; + detail::format_timestamp_to(buffer, timezone_); + fmt::format_to(buffer, "[{}] [{}] ", + ::syscall_no_intercept(SYS_gettid), + lookup_level_name(level)); + + if(!!(level & log::debug)) { + fmt::format_to(buffer, "<{}():{}> ", func, lineno); + } + + fmt::format_to(buffer, std::forward(args)...); + fmt::format_to(buffer, "\n"); + detail::log_buffer(log_fd_, buffer); + } + + inline int + log(log_level level, + const char *fmt, + va_list ap) { + + if(!(level & log_mask_)) { + return 0; + } + + // we use buffer views to compose the logging messages to + // avoid copying buffers as much as possible + struct buffer_view { + const void* addr; + std::size_t size; + }; + + // helper lambda to print an iterable of buffer_views + const auto log_buffer_views = + [this](const auto& buffers) { + + std::size_t n = 0; + + for(const auto& bv : buffers) { + if(bv.addr != nullptr) { + detail::log_buffer(log_fd_, bv.addr, bv.size); + n += bv.size; + } + } + + return n; + }; + + + char buffer[detail::inline_buffer_size]; + + detail::safe_buffer prefix; + detail::format_timestamp_to(prefix); + fmt::format_to(prefix, "[{}] [{}] ", + ::syscall_no_intercept(SYS_gettid), + lookup_level_name(level)); + + const int n = vsnprintf(buffer, sizeof(buffer), fmt, ap); + + std::array buffers{}; + + int i = 0; + int m = 0; + const char* addr = buffer; + const char* p = nullptr; + while((p = std::strstr(addr, "\n")) != nullptr) { + buffers[0] = buffer_view{prefix.data(), prefix.size()}; + buffers[1] = buffer_view{addr, static_cast(p - addr) + 1}; + + m += log_buffer_views(buffers); + addr = p + 1; + ++i; + } + + // original line might not end with (or include) '\n' + if(buffer[n-1] != '\n') { + buffers[0] = buffer_view{prefix.data(), prefix.size()}; + buffers[1] = buffer_view{addr, static_cast(&buffer[n] - addr)}; + buffers[2] = buffer_view{"\n", 1}; + + m += log_buffer_views(buffers); + } + + return m; + } + + template + static inline void + log_message(std::FILE* fp, Args&&... args) { + log_message(::fileno(fp), std::forward(args)...); + } + + template + static inline void + log_message(int fd, Args&&... args) { + + if(fd < 0) { + throw std::runtime_error("Invalid file descriptor"); + } + + detail::safe_buffer buffer; + fmt::format_to(buffer, std::forward(args)...); + fmt::format_to(buffer, "\n"); + detail::log_buffer(fd, buffer); + } + + void + log_syscall(syscall::info info, + const long syscall_number, + const long args[6], + boost::optional result = boost::none); + + static std::shared_ptr& global_logger() { + static std::shared_ptr s_global_logger; + return s_global_logger; + } + + int log_fd_; + log_level log_mask_; + const date::time_zone * const timezone_; +}; + + +// the following static functions can be used to interact +// with a globally registered logger instance + +template +static inline void +create_global_logger(Args&&... args) { + + auto foo = std::make_shared(std::forward(args)...); + logger::global_logger() = foo; + +} + +static inline void +register_global_logger(logger&& lg) { + logger::global_logger() = std::make_shared(std::move(lg)); +} + +static inline std::shared_ptr& +get_global_logger() { + return logger::global_logger(); +} + +static inline void +destroy_global_logger() { + logger::global_logger().reset(); +} + +} // namespace log +} // namespace gkfs + +#define LOG(XXX, ...) LOG_##XXX(__VA_ARGS__) + +#ifdef GKFS_DISABLE_LOGGING + +#define LOG_INFO(...) do {} while(0); +#define LOG_WARNING(...) do {} while(0); +#define LOG_ERROR(...) do {} while(0); +#define LOG_CRITICAL(...) do {} while(0); +#define LOG_SYSCALL(...) do {} while(0); +#define LOG_DEBUG(...) do {} while(0); + +#else // !GKFS_DISABLE_LOGGING + +#define LOG_INFO(...) do { \ + if(gkfs::log::get_global_logger()) { \ + gkfs::log::get_global_logger()->log( \ + gkfs::log::info, __func__, __LINE__, __VA_ARGS__); \ + } \ +} while(0); + +#define LOG_WARNING(...) do { \ + if(gkfs::log::get_global_logger()) { \ + gkfs::log::get_global_logger()->log( \ + gkfs::log::warning, __func__, __LINE__, __VA_ARGS__); \ + } \ +} while(0); + +#define LOG_ERROR(...) do { \ + if(gkfs::log::get_global_logger()) { \ + gkfs::log::get_global_logger()->log( \ + gkfs::log::error, __func__, __LINE__, __VA_ARGS__); \ + } \ +} while(0); + +#define LOG_CRITICAL(...) do { \ + if(gkfs::log::get_global_logger()) { \ + gkfs::log::get_global_logger()->log( \ + gkfs::log::critical, __func__, __LINE__, __VA_ARGS__); \ + } \ +} while(0); + +#ifdef GKFS_DEBUG_BUILD + +#define LOG_SYSCALL(...) do { \ +if(gkfs::log::get_global_logger()) { \ + gkfs::log::get_global_logger()->log_syscall(__VA_ARGS__); \ + } \ +} while(0); + +#define LOG_DEBUG(...) do { \ + if(gkfs::log::get_global_logger()) { \ + gkfs::log::get_global_logger()->log( \ + gkfs::log::debug, __func__, __LINE__, __VA_ARGS__); \ + } \ +} while(0); + +#else // ! GKFS_DEBUG_BUILD + +#define LOG_SYSCALL(...) do {} while(0); +#define LOG_DEBUG(...) do {} while(0); + +#endif // ! GKFS_DEBUG_BUILD +#endif // !GKFS_DISABLE_LOGGING + +#endif // LIBGKFS_LOGGING_HPP diff --git a/include/client/make_array.hpp b/include/client/make_array.hpp new file mode 100644 index 000000000..0c1a84e0a --- /dev/null +++ b/include/client/make_array.hpp @@ -0,0 +1,35 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef LIBGKFS_UTILS_MAKE_ARRAY_HPP +#define LIBGKFS_UTILS_MAKE_ARRAY_HPP + +namespace gkfs { +namespace utils { + +template +constexpr auto make_array(T&&... values) -> + std::array< + typename std::decay< + typename std::common_type::type>::type, + sizeof...(T)> { + return std::array< + typename std::decay< + typename std::common_type::type>::type, + sizeof...(T)>{std::forward(values)...}; +} + +} // namespace utils +} // namespace gkfs + +#endif // LIBGKFS_UTILS_MAKE_ARRAY_HPP diff --git a/include/client/preload_context.hpp b/include/client/preload_context.hpp index 16ad7f610..32e1fced3 100644 --- a/include/client/preload_context.hpp +++ b/include/client/preload_context.hpp @@ -15,7 +15,6 @@ #define IFS_PRELOAD_CTX_HPP #include -#include #include #include #include @@ -30,6 +29,10 @@ class OpenFileMap; class Distributor; +namespace gkfs { namespace log { + struct logger; +}} + struct FsConfig { // configurable metadata @@ -54,10 +57,13 @@ enum class RelativizeStatus { }; class PreloadContext { + + static const auto constexpr INTERNAL_FD_BASE = + MAX_OPEN_FDS - MAX_INTERNAL_FDS; + private: PreloadContext(); - std::shared_ptr log_; std::shared_ptr ofm_; std::shared_ptr distributor_; std::shared_ptr fs_conf_; @@ -72,7 +78,8 @@ class PreloadContext { bool interception_enabled_; #ifdef USE_BITSET_FOR_INTERNAL_FDS - std::bitset internal_fds_; + std::bitset internal_fds_; + mutable std::mutex internal_fds_mutex_; #else std::set internal_fds_; #endif // USE_BITSET_FOR_INTERNAL_FDS @@ -87,9 +94,7 @@ class PreloadContext { PreloadContext(PreloadContext const&) = delete; void operator=(PreloadContext const&) = delete; - void log(std::shared_ptr logger); - std::shared_ptr log() const; - + void init_logging(); void mountdir(const std::string& path); const std::string& mountdir() const; const std::vector& mountdir_components() const; @@ -121,7 +126,7 @@ class PreloadContext { void disable_interception(); bool interception_enabled() const; - void register_internal_fd(int fd); + int register_internal_fd(int fd); void unregister_internal_fd(int fd); bool is_internal_fd(int fd) const; }; diff --git a/include/client/syscall_names.hpp b/include/client/syscall_names.hpp deleted file mode 100644 index 823d859a5..000000000 --- a/include/client/syscall_names.hpp +++ /dev/null @@ -1,353 +0,0 @@ -/* - Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain - Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany - - This software was partially supported by the - EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). - - This software was partially supported by the - ADA-FS project under the SPPEXA project funded by the DFG. - - SPDX-License-Identifier: MIT -*/ - -#ifndef IFS_SYSCALL_NAMES_HPP -#define IFS_SYSCALL_NAMES_HPP - -const char* syscall_names[] = { -"read", -"write", -"open", -"close", -"stat", -"fstat", -"lstat", -"poll", -"lseek", -"mmap", -"mprotect", -"munmap", -"brk", -"rt_sigaction", -"rt_sigprocmask", -"rt_sigreturn", -"ioctl", -"pread64", -"pwrite64", -"readv", -"writev", -"access", -"pipe", -"select", -"sched_yield", -"mremap", -"msync", -"mincore", -"madvise", -"shmget", -"shmat", -"shmctl", -"dup", -"dup2", -"pause", -"nanosleep", -"getitimer", -"alarm", -"setitimer", -"getpid", -"sendfile", -"socket", -"connect", -"accept", -"sendto", -"recvfrom", -"sendmsg", -"recvmsg", -"shutdown", -"bind", -"listen", -"getsockname", -"getpeername", -"socketpair", -"setsockopt", -"getsockopt", -"clone", -"fork", -"vfork", -"execve", -"exit", -"wait4", -"kill", -"uname", -"semget", -"semop", -"semctl", -"shmdt", -"msgget", -"msgsnd", -"msgrcv", -"msgctl", -"fcntl", -"flock", -"fsync", -"fdatasync", -"truncate", -"ftruncate", -"getdents", -"getcwd", -"chdir", -"fchdir", -"rename", -"mkdir", -"rmdir", -"creat", -"link", -"unlink", -"symlink", -"readlink", -"chmod", -"fchmod", -"chown", -"fchown", -"lchown", -"umask", -"gettimeofday", -"getrlimit", -"getrusage", -"sysinfo", -"times", -"ptrace", -"getuid", -"syslog", -"getgid", -"setuid", -"setgid", -"geteuid", -"getegid", -"setpgid", -"getppid", -"getpgrp", -"setsid", -"setreuid", -"setregid", -"getgroups", -"setgroups", -"setresuid", -"getresuid", -"setresgid", -"getresgid", -"getpgid", -"setfsuid", -"setfsgid", -"getsid", -"capget", -"capset", -"rt_sigpending", -"rt_sigtimedwait", -"rt_sigqueueinfo", -"rt_sigsuspend", -"sigaltstack", -"utime", -"mknod", -"uselib", -"personality", -"ustat", -"statfs", -"fstatfs", -"sysfs", -"getpriority", -"setpriority", -"sched_setparam", -"sched_getparam", -"sched_setscheduler", -"sched_getscheduler", -"sched_get_priority_max", -"sched_get_priority_min", -"sched_rr_get_interval", -"mlock", -"munlock", -"mlockall", -"munlockall", -"vhangup", -"modify_ldt", -"pivot_root", -"_sysctl", -"prctl", -"arch_prctl", -"adjtimex", -"setrlimit", -"chroot", -"sync", -"acct", -"settimeofday", -"mount", -"umount2", -"swapon", -"swapoff", -"reboot", -"sethostname", -"setdomainname", -"iopl", -"ioperm", -"create_module", -"init_module", -"delete_module", -"get_kernel_syms", -"query_module", -"quotactl", -"nfsservctl", -"getpmsg", -"putpmsg", -"afs_syscall", -"tuxcall", -"security", -"gettid", -"readahead", -"setxattr", -"lsetxattr", -"fsetxattr", -"getxattr", -"lgetxattr", -"fgetxattr", -"listxattr", -"llistxattr", -"flistxattr", -"removexattr", -"lremovexattr", -"fremovexattr", -"tkill", -"time", -"futex", -"sched_setaffinity", -"sched_getaffinity", -"set_thread_area", -"io_setup", -"io_destroy", -"io_getevents", -"io_submit", -"io_cancel", -"get_thread_area", -"lookup_dcookie", -"epoll_create", -"epoll_ctl_old", -"epoll_wait_old", -"remap_file_pages", -"getdents64", -"set_tid_address", -"restart_syscall", -"semtimedop", -"fadvise64", -"timer_create", -"timer_settime", -"timer_gettime", -"timer_getoverrun", -"timer_delete", -"clock_settime", -"clock_gettime", -"clock_getres", -"clock_nanosleep", -"exit_group", -"epoll_wait", -"epoll_ctl", -"tgkill", -"utimes", -"vserver", -"mbind", -"set_mempolicy", -"get_mempolicy", -"mq_open", -"mq_unlink", -"mq_timedsend", -"mq_timedreceive", -"mq_notify", -"mq_getsetattr", -"kexec_load", -"waitid", -"add_key", -"request_key", -"keyctl", -"ioprio_set", -"ioprio_get", -"inotify_init", -"inotify_add_watch", -"inotify_rm_watch", -"migrate_pages", -"openat", -"mkdirat", -"mknodat", -"fchownat", -"futimesat", -"newfstatat", -"unlinkat", -"renameat", -"linkat", -"symlinkat", -"readlinkat", -"fchmodat", -"faccessat", -"pselect6", -"ppoll", -"unshare", -"set_robust_list", -"get_robust_list", -"splice", -"tee", -"sync_file_range", -"vmsplice", -"move_pages", -"utimensat", -"epoll_pwait", -"signalfd", -"timerfd_create", -"eventfd", -"fallocate", -"timerfd_settime", -"timerfd_gettime", -"accept4", -"signalfd4", -"eventfd2", -"epoll_create1", -"dup3", -"pipe2", -"inotify_init1", -"preadv", -"pwritev", -"rt_tgsigqueueinfo", -"perf_event_open", -"recvmmsg", -"fanotify_init", -"fanotify_mark", -"prlimit64", -"name_to_handle_at", -"open_by_handle_at", -"clock_adjtime", -"syncfs", -"sendmmsg", -"setns", -"getcpu", -"process_vm_readv", -"process_vm_writev", -"kcmp", -"finit_module", -"sched_setattr", -"sched_getattr", -"renameat2", -"seccomp", -"getrandom", -"memfd_create", -"kexec_file_load", -"bpf", -"execveat", -"userfaultfd", -"membarrier", -"mlock2", -"copy_file_range", -"preadv2", -"pwritev2", -"pkey_mprotect", -"pkey_alloc", -"pkey_free", -"statx" -}; - -#endif diff --git a/include/client/syscalls.hpp b/include/client/syscalls.hpp new file mode 100644 index 000000000..74067a489 --- /dev/null +++ b/include/client/syscalls.hpp @@ -0,0 +1,21 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef LIBGKFS_SYSCALLS_HPP +#define LIBGKFS_SYSCALLS_HPP + +#include +#include +#include + +#endif // LIBGKFS_SYSCALLS_HPP diff --git a/include/client/syscalls/args.hpp b/include/client/syscalls/args.hpp new file mode 100644 index 000000000..f089ef43f --- /dev/null +++ b/include/client/syscalls/args.hpp @@ -0,0 +1,716 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GKFS_SYSCALLS_ARGS_HPP +#define GKFS_SYSCALLS_ARGS_HPP + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace gkfs { +namespace syscall { +namespace arg { + +/** Allowed arg types (based on the values of the corresponding C enum) */ +enum class type { + none = ::arg_type_t::none, + fd = ::arg_type_t::fd, + atfd = ::arg_type_t::atfd, + cstr = ::arg_type_t::cstr, + open_flags = ::arg_type_t::open_flags, + octal_mode = ::arg_type_t::octal_mode, + ptr = ::arg_type_t::ptr, + dec = ::arg_type_t::dec, + dec32 = ::arg_type_t::dec32, + offset = ::arg_type_t::offset, + whence = ::arg_type_t::whence, + mmap_prot = ::arg_type_t::mmap_prot, + mmap_flags = ::arg_type_t::mmap_flags, + clone_flags = ::arg_type_t::clone_flags, + signum = ::arg_type_t::signum, + sigproc_how = ::arg_type_t::sigproc_how, + generic = ::arg_type_t::arg, +}; + +/* Some constant definitions for convenience */ +static constexpr auto none = type::none; +static constexpr auto fd = type::fd; +static constexpr auto atfd = type::atfd; +static constexpr auto cstr = type::cstr; +static constexpr auto open_flags = type::open_flags; +static constexpr auto octal_mode = type::octal_mode; +static constexpr auto ptr = type::ptr; +static constexpr auto dec = type::dec; +static constexpr auto dec32 = type::dec32; +static constexpr auto offset = type::offset; +static constexpr auto whence = type::whence; +static constexpr auto mmap_prot = type::mmap_prot; +static constexpr auto mmap_flags = type::mmap_flags; +static constexpr auto clone_flags = type::clone_flags; +static constexpr auto signum = type::signum; +static constexpr auto sigproc_how = type::sigproc_how; +static constexpr auto generic = type::generic; + + +/** An argument value with an optional size */ +struct printable_arg { + const char * const name; + const long value; + boost::optional size; +}; + + +/** All arg formatters must follow this prototype */ +template +using formatter = + std::add_pointer_t; + + + +/** forward declare formatters */ +template inline void +format_none_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_fd_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_atfd_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_cstr_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_open_flags_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_octal_mode_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_ptr_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_dec_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_dec32_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_whence_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_mmap_prot_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_mmap_flags_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_clone_flags_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_signum_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_sigproc_how_arg_to(FmtBuffer& buffer, const printable_arg& parg); + +template inline void +format_arg_to(FmtBuffer& buffer, const printable_arg& parg); + + +/** Known formatters */ +template +static const constexpr +std::array, arg_type_max> formatters = { + /* [none] = */ format_none_arg_to, + /* [fd] = */ format_fd_arg_to, + /* [atfd] = */ format_atfd_arg_to, + /* [cstr] = */ format_cstr_arg_to, + /* [open_flags] = */ format_open_flags_to, + /* [octal_mode] = */ format_octal_mode_to, + /* [ptr] = */ format_ptr_arg_to, + /* [dec] = */ format_dec_arg_to, + /* [dec32] = */ format_dec32_arg_to, + /* [offset] = */ format_dec_arg_to, + /* [whence] = */ format_whence_arg_to, + /* [mmap_prot] = */ format_mmap_prot_arg_to, + /* [mmap_flags] = */ format_mmap_flags_arg_to, + /* [clone_flags] = */ format_clone_flags_arg_to, + /* [signum] = */ format_signum_arg_to, + /* [sigproc_how] = */ format_sigproc_how_arg_to, + /* [arg] = */ format_arg_to, +}; + +/** An argument descriptor */ +struct desc { + arg::type type_; + const char* name_; + + arg::type + type() const { + return type_; + } + + const char* + name() const { + return name_; + } + + template + formatter + formatter() const { + const auto idx = static_cast(type_); + + // if the type is unknown fall back to the default formatter + if(idx < 0 || idx >= static_cast(formatters.size())) { + return format_arg_to; + } + + assert(formatters.at(idx) != nullptr); + + return formatters.at(idx); + } +}; + + +/** Specific formatter implementations follow */ + +/** Flag descriptor */ +typedef struct { + long flag_; + const char * const name_; +} flag_desc; + +#define FLAG_ENTRY(f) flag_desc{ f, #f } + +#define LIKELY(x) __builtin_expect(!!(x), 1) +#define UNLIKELY(x) __builtin_expect(!!(x), 0) + +template +static void +format_flag(FmtBuffer& buffer, + long flag, + FlagDescriptorArray&& desc) { + + // we assume that if a flag value is zero, its printable + // name will always be at position 0 in the array + if(flag == 0 && desc[0].flag_ == 0) { + fmt::format_to(buffer, "{}", desc[0].name_); + return; + } + + for(std::size_t i = 0; i < desc.size(); ++i) { + + if(desc[i].name_ == nullptr) { + continue; + } + + if((flag == desc[i].flag_)) { + fmt::format_to(buffer, "{}", desc[i].name_); + return; + } + } + + fmt::format_to(buffer, "{:#x}", flag); +} + +template +static void +format_flag_set(FmtBuffer& buffer, + long flags, + FlagDescriptorArray&& desc) { + + // we assume that if a flag value is zero, its printable + // name will always be at position 0 in the array + if(flags == 0 && desc[0].flag_ == 0) { + fmt::format_to(buffer, "{}", desc[0].name_); + return; + } + + std::size_t i = 0; + const auto buffer_start = buffer.size(); + + while(flags != 0 && i < desc.size()) { + + if(desc[i].name_ == nullptr) { + ++i; + continue; + } + + if((flags & desc[i].flag_) != 0) { + fmt::format_to(buffer, "{}{}", + buffer.size() != buffer_start ? "|" : "", + desc[i].name_); + flags &= ~desc[i].flag_; + } + + ++i; + } + + if(flags != 0) { + if(buffer.size() != buffer_start) { + fmt::format_to(buffer, "|"); + } + + fmt::format_to(buffer, "{:#x}", flags); + return; + } + + if(buffer_start == buffer.size()) { + fmt::format_to(buffer, "0x0"); + } +} + +/** + * format_whence_arg_to - format a 'whence' argument + * + * Format a 'whence' argument from the lseek() syscall, modifying the provided + * buffer by appending to it a string representation of the form: + * name = formatted_val + */ +template +inline void +format_whence_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + + /* Names for lseek() whence arg */ + const auto flag_names = + utils::make_array( + FLAG_ENTRY(SEEK_SET), + FLAG_ENTRY(SEEK_CUR), + FLAG_ENTRY(SEEK_END) + ); + + fmt::format_to(buffer, "{}=", parg.name); + format_flag_set(buffer, parg.value, flag_names); +} + + +/** + * format_mmap_prot_arg_to - format a 'prot' argument + * + * Format a 'prot' argument (such as those passed to mmap()) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_mmap_prot_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + + /* Names for mmap() prot arg */ + const auto flag_names = + utils::make_array( + FLAG_ENTRY(PROT_NONE), + FLAG_ENTRY(PROT_READ), + FLAG_ENTRY(PROT_WRITE), + FLAG_ENTRY(PROT_EXEC)); + + fmt::format_to(buffer, "{}=", parg.name); + format_flag_set(buffer, parg.value, flag_names); + + return; +} + + +/** + * format_mmap_flags_arg_to - format a 'flags' argument + * + * Format a 'flags' argument (such as those passed to mmap()) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_mmap_flags_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + + /* Names for mmap() flags arg */ + const auto flag_names = + utils::make_array( + FLAG_ENTRY(MAP_SHARED), + FLAG_ENTRY(MAP_PRIVATE), + FLAG_ENTRY(MAP_SHARED_VALIDATE), + FLAG_ENTRY(MAP_FIXED), + FLAG_ENTRY(MAP_ANONYMOUS), + FLAG_ENTRY(MAP_GROWSDOWN), + FLAG_ENTRY(MAP_DENYWRITE), + FLAG_ENTRY(MAP_EXECUTABLE), + FLAG_ENTRY(MAP_LOCKED), + FLAG_ENTRY(MAP_NORESERVE), + FLAG_ENTRY(MAP_POPULATE), + FLAG_ENTRY(MAP_NONBLOCK), + FLAG_ENTRY(MAP_STACK), + FLAG_ENTRY(MAP_HUGETLB), + FLAG_ENTRY(MAP_SYNC)); + + fmt::format_to(buffer, "{}=", parg.name); + format_flag_set(buffer, parg.value, flag_names); + return; +} + +/** + * format_clone_flags_arg_to - format a 'flags' argument + * + * Format a 'flags' argument (such as those passed to clone()) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_clone_flags_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + + /* Names for clone() flags arg */ + const auto flag_names = + utils::make_array( + FLAG_ENTRY(CLONE_VM), + FLAG_ENTRY(CLONE_FS), + FLAG_ENTRY(CLONE_FILES), + FLAG_ENTRY(CLONE_SIGHAND), + FLAG_ENTRY(CLONE_PTRACE), + FLAG_ENTRY(CLONE_VFORK), + FLAG_ENTRY(CLONE_PARENT), + FLAG_ENTRY(CLONE_THREAD), + FLAG_ENTRY(CLONE_NEWNS), + FLAG_ENTRY(CLONE_SYSVSEM), + FLAG_ENTRY(CLONE_SETTLS), + FLAG_ENTRY(CLONE_PARENT_SETTID), + FLAG_ENTRY(CLONE_CHILD_CLEARTID), + FLAG_ENTRY(CLONE_DETACHED), + FLAG_ENTRY(CLONE_UNTRACED), + FLAG_ENTRY(CLONE_CHILD_SETTID), + FLAG_ENTRY(CLONE_NEWCGROUP), + FLAG_ENTRY(CLONE_NEWUTS), + FLAG_ENTRY(CLONE_NEWIPC), + FLAG_ENTRY(CLONE_NEWUSER), + FLAG_ENTRY(CLONE_NEWPID), + FLAG_ENTRY(CLONE_NEWNET), + FLAG_ENTRY(CLONE_IO)); + + fmt::format_to(buffer, "{}=", parg.name); + + // the low byte in clone flags contains the number of the termination + // signal sent to the parent when the child dies + format_flag_set(buffer, parg.value & ~0x11l, flag_names); + + if((parg.value & 0x11l) != 0) { + fmt::format_to(buffer, "|", parg.name); + format_signum_arg_to(buffer, {"", parg.value & 0x11l}); + } + return; +} + +/** + * format_signum_arg_to - format a 'signum' argument + * + * Format a 'signum' argument (such as those passed to rt_sigaction()) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_signum_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + + /* Names for signum args */ + const auto flag_names = + utils::make_array( + FLAG_ENTRY(SIGHUP), + FLAG_ENTRY(SIGINT), + FLAG_ENTRY(SIGQUIT), + FLAG_ENTRY(SIGILL), + FLAG_ENTRY(SIGTRAP), + FLAG_ENTRY(SIGABRT), + FLAG_ENTRY(SIGBUS), + FLAG_ENTRY(SIGFPE), + FLAG_ENTRY(SIGKILL), + FLAG_ENTRY(SIGUSR1), + FLAG_ENTRY(SIGSEGV), + FLAG_ENTRY(SIGUSR2), + FLAG_ENTRY(SIGPIPE), + FLAG_ENTRY(SIGALRM), + FLAG_ENTRY(SIGTERM), + FLAG_ENTRY(SIGSTKFLT), + FLAG_ENTRY(SIGCHLD), + FLAG_ENTRY(SIGCONT), + FLAG_ENTRY(SIGSTOP), + FLAG_ENTRY(SIGTSTP), + FLAG_ENTRY(SIGTTIN), + FLAG_ENTRY(SIGTTOU), + FLAG_ENTRY(SIGURG), + FLAG_ENTRY(SIGXCPU), + FLAG_ENTRY(SIGXFSZ), + FLAG_ENTRY(SIGVTALRM), + FLAG_ENTRY(SIGPROF), + FLAG_ENTRY(SIGWINCH), + FLAG_ENTRY(SIGIO), + FLAG_ENTRY(SIGPWR), + FLAG_ENTRY(SIGSYS)); + + if(std::strcmp(parg.name, "")) { + fmt::format_to(buffer, "{}=", parg.name); + } + + format_flag(buffer, parg.value, flag_names); + return; +} + + +/** + * format_sigproc_how_arg_to - format a 'sigproc how' argument + * + * Format a 'sigproc how' argument (such as those passed to sigprocmask()) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_sigproc_how_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + + /* Names for sigproc how args */ + const auto flag_names = + utils::make_array( + FLAG_ENTRY(SIG_BLOCK), + FLAG_ENTRY(SIG_UNBLOCK), + FLAG_ENTRY(SIG_SETMASK)); + + fmt::format_to(buffer, "{}=", parg.name); + format_flag(buffer, parg.value, flag_names); + return; +} + +/** + * format_none_arg_to - format a 'none' argument + * + * Format a 'none' argument and append the resulting "void" string to the + * provided buffer. + */ +template +inline void +format_none_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + fmt::format_to(buffer, "void"); +} + + +/** + * format_fd_arg_to - format a 'fd' argument + * + * Format a 'fd' argument (such as those passed to read()) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_fd_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + fmt::format_to(buffer, "{}={}", parg.name, static_cast(parg.value)); +} + + +/** + * format_atfd_arg_to - format a 'at_fd' argument + * + * Format a 'at_fd' argument (such as those passed to openat()) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_atfd_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + + if(static_cast(parg.value) == AT_FDCWD) { + fmt::format_to(buffer, "{}=AT_FDCWD", parg.name); + return; + } + + fmt::format_to(buffer, "{}={}", parg.name, static_cast(parg.value)); +} + + +/** + * format_cstr_arg_to - format a 'cstr' argument + * + * Format a 'cstr' argument (i.e. a null-terminated C string) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_cstr_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + + if(LIKELY(reinterpret_cast(parg.value) != nullptr)) { + fmt::format_to(buffer, "{}=\"{}\"", parg.name, + reinterpret_cast(parg.value)); + return; + } + + fmt::format_to(buffer, "{}=NULL", parg.name); +} + +/** + * format_open_flags_to - format a 'flags' argument + * + * Format a 'flags' argument (such as those passed to open()) and append + * the resulting string to the provided buffer. + */ +template +inline void +format_open_flags_to(FmtBuffer& buffer, + const printable_arg& parg) { + + /* Names for O_ACCMODE args */ + const auto flag_names = + utils::make_array( + FLAG_ENTRY(O_RDONLY), + FLAG_ENTRY(O_WRONLY), + FLAG_ENTRY(O_RDWR)); + + const auto extra_flag_names = + utils::make_array( +#ifdef O_EXEC + FLAG_ENTRY(O_EXEC), +#endif +#ifdef O_SEARCH + FLAG_ENTRY(O_SEARCH), +#endif + FLAG_ENTRY(O_APPEND), + FLAG_ENTRY(O_CLOEXEC), + FLAG_ENTRY(O_CREAT), + FLAG_ENTRY(O_DIRECTORY), + FLAG_ENTRY(O_DSYNC), + FLAG_ENTRY(O_EXCL), + FLAG_ENTRY(O_NOCTTY), + FLAG_ENTRY(O_NOFOLLOW), + FLAG_ENTRY(O_NONBLOCK), + FLAG_ENTRY(O_RSYNC), + FLAG_ENTRY(O_SYNC), + FLAG_ENTRY(O_TRUNC) +#ifdef O_TTY_INIT + , FLAG_ENTRY(O_TTY_INIT) +#endif + ); + + long flags = parg.value; + + fmt::format_to(buffer, "{}=", parg.name); + format_flag(buffer, flags & O_ACCMODE, flag_names); + + flags &= ~O_ACCMODE; + +#ifdef O_TMPFILE + // processing it with the other flags can result in + // printing O_DIRECTORY when it should not be listed. + // + // See O_TMPFILE' definition in fcntl-linux.h : + // #define __O_TMPFILE (020000000 | __O_DIRECTORY) + if ((flags & O_TMPFILE) == O_TMPFILE) { + format_flag(buffer, O_TMPFILE, flag_names); + flags &= ~O_TMPFILE; + } +#endif // !O_TMPFILE + + if(flags != 0) { + fmt::format_to(buffer, "|", parg.name); + format_flag_set(buffer, flags, extra_flag_names); + } +} + +/** + * format_octal_mode_to - format a 'mode' argument + * + * Format a 'mode' argument (such as those passed to open()) and append the + * generated string to the provided buffer. + */ +template +inline void +format_octal_mode_to(FmtBuffer& buffer, + const printable_arg& parg) { + fmt::format_to(buffer, "{}={:#04o}", parg.name, parg.value); +} + +/** + * format_ptr_arg_to - format a 'ptr' argument + * + * Format a 'ptr' argument (i.e. a C pointer) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_ptr_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + + if(LIKELY(reinterpret_cast(parg.value) != nullptr)) { + fmt::format_to(buffer, "{}={}", parg.name, + reinterpret_cast(parg.value)); + return; + } + + fmt::format_to(buffer, "{}=NULL", parg.name); +} + + +/** + * format_dec_arg_to - format a 'dec' argument + * + * Format a 'dec' argument (i.e. an integer of unknwon size) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_dec_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + fmt::format_to(buffer, "{}={}", parg.name, parg.value); +} + + +/** + * format_dec32_arg_to - format a 'dec32' argument + * + * Format a 'dec32' argument (i.e. a 32-bit integer) + * and append the resulting string to the provided buffer. + */ +template +inline void +format_dec32_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + fmt::format_to(buffer, "{}={}", parg.name, static_cast(parg.value)); +} + + +/** + * format_arg_to - format an arbitrary argument + * + * Format an arbitrary argument and append the resulting + * string to the provided buffer. + */ +template +inline void +format_arg_to(FmtBuffer& buffer, + const printable_arg& parg) { + fmt::format_to(buffer, "{}={:#x}", parg.name, parg.value); +} + +#undef FLAG_ENTRY +#undef LIKELY +#undef UNLIKELY + +} // namespace arg +} // namespace syscall +} // namespace gkfs + +#endif // GKFS_SYSCALLS_ARGS_HPP diff --git a/include/client/syscalls/decoder.hpp b/include/client/syscalls/decoder.hpp new file mode 100644 index 000000000..cd69711bd --- /dev/null +++ b/include/client/syscalls/decoder.hpp @@ -0,0 +1,110 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GKFS_SYSCALLS_DECODER_HPP +#define GKFS_SYSCALLS_DECODER_HPP + +#include +#include +#include + +namespace gkfs { +namespace syscall { + +namespace detail { + +/** a RAII saver/restorer of errno values */ +struct errno_saver { + errno_saver(int errnum) : + saved_errno_(errnum) { } + + ~errno_saver() { + errno = saved_errno_; + } + + const int saved_errno_; +}; + +} // namespace detail + +template +inline void +decode(FmtBuffer& buffer, + const long syscall_number, + const long argv[MAX_ARGS]) { + + detail::errno_saver _(errno); + + const auto sc = lookup_syscall(syscall_number, argv); + + fmt::format_to(buffer, "{}(", sc.name()); + + for(int i = 0; i < sc.num_args(); ++i) { + const auto& arg = sc.args().at(i); + + arg.formatter()(buffer, {arg.name(), argv[i]}); + + if(i < sc.num_args() - 1) { + fmt::format_to(buffer, ", "); + } + } + + fmt::format_to(buffer, ") = ?"); +} + +template +inline void +decode(FmtBuffer& buffer, + const long syscall_number, + const long argv[MAX_ARGS], + const long result) { + + detail::errno_saver _(errno); + + const auto sc = lookup_syscall(syscall_number, argv); + + fmt::format_to(buffer, "{}(", sc.name()); + + for(int i = 0; i < sc.num_args(); ++i) { + const auto& arg = sc.args().at(i); + + arg.formatter()(buffer, {arg.name(), argv[i]}); + + if(i < sc.num_args() - 1) { + fmt::format_to(buffer, ", "); + } + } + + if(never_returns(syscall_number)) { + fmt::format_to(buffer, ") = ?"); + return; + } + + if(error_code(result) != 0) { + fmt::format_to(buffer, ") = {} {} ({})", + static_cast(-1), + errno_name(-result), + errno_message(-result)); + return; + } + + fmt::format_to(buffer, ") = "); + const auto& ret = sc.return_type(); + ret.formatter()(buffer, result); + +} + +} // namespace syscall +} // namespace gkfs + +#endif // GKFS_SYSCALLS_DECODER_HPP diff --git a/include/client/syscalls/detail/syscall_info.h b/include/client/syscalls/detail/syscall_info.h new file mode 100644 index 000000000..0ce0c6420 --- /dev/null +++ b/include/client/syscalls/detail/syscall_info.h @@ -0,0 +1,80 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef SYSCALLS_DETAIL_SYSCALL_INFO_H +#define SYSCALLS_DETAIL_SYSCALL_INFO_H + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#define MAX_SYSCALL_ARGS 6u + +typedef enum { + none = 0, /* no argument */ + fd, /* fd argument for non *at() syscalls */ + atfd, /* fd argument for *at() syscalls */ + cstr, /* a \0-terminated C string */ + open_flags, /* flags for open/create/mq_open ... */ + octal_mode, /* mode_t arguments */ + ptr, /* pointer arguments */ + dec, /* signed decimal number (aka. long) */ + dec32, /* signed 32-bit decimal number (aka. int) */ + offset, /* off_t arguments */ + whence, /* 'whence' argument in lseek-style syscalls */ + mmap_prot, /* protections for the mmap() family of syscalls */ + mmap_flags, /* flags for the mmap() family of syscalls */ + clone_flags, /* flags for the clone() syscall */ + signum, /* signal numbers */ + sigproc_how, /* sigprocmask argument */ + arg, /* generic argument, no special formatting */ + arg_type_max +} arg_type_t; + +typedef struct { + const arg_type_t a_type; + const char * const a_name; +} syscall_arg_t; + +typedef enum { + rnone, + rptr, + rdec, + ret_type_max, +} ret_type_t; + +typedef struct { + const ret_type_t r_type; +} syscall_ret_t; + +struct syscall_info { + const long s_nr; + const char * const s_name; + const int s_nargs; + const syscall_ret_t s_return_type; + const syscall_arg_t s_args[MAX_SYSCALL_ARGS]; +}; + +extern const struct syscall_info syscall_table[]; +extern const struct syscall_info* +get_syscall_info(const long syscall_number, + const long* argv); +extern bool +syscall_never_returns(long); + + +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif // SYSCALLS_DETAIL_SYSCALL_INFO_H diff --git a/include/client/syscalls/errno.hpp b/include/client/syscalls/errno.hpp new file mode 100644 index 000000000..f1131866b --- /dev/null +++ b/include/client/syscalls/errno.hpp @@ -0,0 +1,581 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GKFS_SYSCALLS_ERRNO_HPP +#define GKFS_SYSCALLS_ERRNO_HPP + +#include +#include + +namespace gkfs { +namespace syscall { + +static const std::array errno_names = { + /* [ 0] = */ NULL, + /* [ 1] = */ "EPERM", + /* [ 2] = */ "ENOENT", + /* [ 3] = */ "ESRCH", + /* [ 4] = */ "EINTR", + /* [ 5] = */ "EIO", + /* [ 6] = */ "ENXIO", + /* [ 7] = */ "E2BIG", + /* [ 8] = */ "ENOEXEC", + /* [ 9] = */ "EBADF", + /* [ 10] = */ "ECHILD", + /* [ 11] = */ "EAGAIN", + /* [ 12] = */ "ENOMEM", + /* [ 13] = */ "EACCES", + /* [ 14] = */ "EFAULT", + /* [ 15] = */ "ENOTBLK", + /* [ 16] = */ "EBUSY", + /* [ 17] = */ "EEXIST", + /* [ 18] = */ "EXDEV", + /* [ 19] = */ "ENODEV", + /* [ 20] = */ "ENOTDIR", + /* [ 21] = */ "EISDIR", + /* [ 22] = */ "EINVAL", + /* [ 23] = */ "ENFILE", + /* [ 24] = */ "EMFILE", + /* [ 25] = */ "ENOTTY", + /* [ 26] = */ "ETXTBSY", + /* [ 27] = */ "EFBIG", + /* [ 28] = */ "ENOSPC", + /* [ 29] = */ "ESPIPE", + /* [ 30] = */ "EROFS", + /* [ 31] = */ "EMLINK", + /* [ 32] = */ "EPIPE", + /* [ 33] = */ "EDOM", + /* [ 34] = */ "ERANGE", + /* [ 35] = */ "EDEADLK", + /* [ 36] = */ "ENAMETOOLONG", + /* [ 37] = */ "ENOLCK", + /* [ 38] = */ "ENOSYS", + /* [ 39] = */ "ENOTEMPTY", + /* [ 40] = */ "ELOOP", + /* [ 41] = */ NULL, + /* [ 42] = */ "ENOMSG", + /* [ 43] = */ "EIDRM", + /* [ 44] = */ "ECHRNG", + /* [ 45] = */ "EL2NSYNC", + /* [ 46] = */ "EL3HLT", + /* [ 47] = */ "EL3RST", + /* [ 48] = */ "ELNRNG", + /* [ 49] = */ "EUNATCH", + /* [ 50] = */ "ENOCSI", + /* [ 51] = */ "EL2HLT", + /* [ 52] = */ "EBADE", + /* [ 53] = */ "EBADR", + /* [ 54] = */ "EXFULL", + /* [ 55] = */ "ENOANO", + /* [ 56] = */ "EBADRQC", + /* [ 57] = */ "EBADSLT", + /* [ 58] = */ NULL, + /* [ 59] = */ "EBFONT", + /* [ 60] = */ "ENOSTR", + /* [ 61] = */ "ENODATA", + /* [ 62] = */ "ETIME", + /* [ 63] = */ "ENOSR", + /* [ 64] = */ "ENONET", + /* [ 65] = */ "ENOPKG", + /* [ 66] = */ "EREMOTE", + /* [ 67] = */ "ENOLINK", + /* [ 68] = */ "EADV", + /* [ 69] = */ "ESRMNT", + /* [ 70] = */ "ECOMM", + /* [ 71] = */ "EPROTO", + /* [ 72] = */ "EMULTIHOP", + /* [ 73] = */ "EDOTDOT", + /* [ 74] = */ "EBADMSG", + /* [ 75] = */ "EOVERFLOW", + /* [ 76] = */ "ENOTUNIQ", + /* [ 77] = */ "EBADFD", + /* [ 78] = */ "EREMCHG", + /* [ 79] = */ "ELIBACC", + /* [ 80] = */ "ELIBBAD", + /* [ 81] = */ "ELIBSCN", + /* [ 82] = */ "ELIBMAX", + /* [ 83] = */ "ELIBEXEC", + /* [ 84] = */ "EILSEQ", + /* [ 85] = */ "ERESTART", + /* [ 86] = */ "ESTRPIPE", + /* [ 87] = */ "EUSERS", + /* [ 88] = */ "ENOTSOCK", + /* [ 89] = */ "EDESTADDRREQ", + /* [ 90] = */ "EMSGSIZE", + /* [ 91] = */ "EPROTOTYPE", + /* [ 92] = */ "ENOPROTOOPT", + /* [ 93] = */ "EPROTONOSUPPORT", + /* [ 94] = */ "ESOCKTNOSUPPORT", + /* [ 95] = */ "EOPNOTSUPP", + /* [ 96] = */ "EPFNOSUPPORT", + /* [ 97] = */ "EAFNOSUPPORT", + /* [ 98] = */ "EADDRINUSE", + /* [ 99] = */ "EADDRNOTAVAIL", + /* [100] = */ "ENETDOWN", + /* [101] = */ "ENETUNREACH", + /* [102] = */ "ENETRESET", + /* [103] = */ "ECONNABORTED", + /* [104] = */ "ECONNRESET", + /* [105] = */ "ENOBUFS", + /* [106] = */ "EISCONN", + /* [107] = */ "ENOTCONN", + /* [108] = */ "ESHUTDOWN", + /* [109] = */ "ETOOMANYREFS", + /* [110] = */ "ETIMEDOUT", + /* [111] = */ "ECONNREFUSED", + /* [112] = */ "EHOSTDOWN", + /* [113] = */ "EHOSTUNREACH", + /* [114] = */ "EALREADY", + /* [115] = */ "EINPROGRESS", + /* [116] = */ "ESTALE", + /* [117] = */ "EUCLEAN", + /* [118] = */ "ENOTNAM", + /* [119] = */ "ENAVAIL", + /* [120] = */ "EISNAM", + /* [121] = */ "EREMOTEIO", + /* [122] = */ "EDQUOT", + /* [123] = */ "ENOMEDIUM", + /* [124] = */ "EMEDIUMTYPE", + /* [125] = */ "ECANCELED", + /* [126] = */ "ENOKEY", + /* [127] = */ "EKEYEXPIRED", + /* [128] = */ "EKEYREVOKED", + /* [129] = */ "EKEYREJECTED", + /* [130] = */ "EOWNERDEAD", + /* [131] = */ "ENOTRECOVERABLE", + /* [132] = */ "ERFKILL", + /* [133] = */ "EHWPOISON", + /* [134] = */ NULL, + /* [135] = */ NULL, + /* [136] = */ NULL, + /* [137] = */ NULL, + /* [138] = */ NULL, + /* [139] = */ NULL, + /* [140] = */ NULL, + /* [141] = */ NULL, + /* [142] = */ NULL, + /* [143] = */ NULL, + /* [144] = */ NULL, + /* [145] = */ NULL, + /* [146] = */ NULL, + /* [147] = */ NULL, + /* [148] = */ NULL, + /* [149] = */ NULL, + /* [150] = */ NULL, + /* [151] = */ NULL, + /* [152] = */ NULL, + /* [153] = */ NULL, + /* [154] = */ NULL, + /* [155] = */ NULL, + /* [156] = */ NULL, + /* [157] = */ NULL, + /* [158] = */ NULL, + /* [159] = */ NULL, + /* [160] = */ NULL, + /* [161] = */ NULL, + /* [162] = */ NULL, + /* [163] = */ NULL, + /* [164] = */ NULL, + /* [165] = */ NULL, + /* [166] = */ NULL, + /* [167] = */ NULL, + /* [168] = */ NULL, + /* [169] = */ NULL, + /* [170] = */ NULL, + /* [171] = */ NULL, + /* [172] = */ NULL, + /* [173] = */ NULL, + /* [174] = */ NULL, + /* [175] = */ NULL, + /* [176] = */ NULL, + /* [177] = */ NULL, + /* [178] = */ NULL, + /* [179] = */ NULL, + /* [180] = */ NULL, + /* [181] = */ NULL, + /* [182] = */ NULL, + /* [183] = */ NULL, + /* [184] = */ NULL, + /* [185] = */ NULL, + /* [186] = */ NULL, + /* [187] = */ NULL, + /* [188] = */ NULL, + /* [189] = */ NULL, + /* [190] = */ NULL, + /* [191] = */ NULL, + /* [192] = */ NULL, + /* [193] = */ NULL, + /* [194] = */ NULL, + /* [195] = */ NULL, + /* [196] = */ NULL, + /* [197] = */ NULL, + /* [198] = */ NULL, + /* [199] = */ NULL, + /* [200] = */ NULL, + /* [201] = */ NULL, + /* [202] = */ NULL, + /* [203] = */ NULL, + /* [204] = */ NULL, + /* [205] = */ NULL, + /* [206] = */ NULL, + /* [207] = */ NULL, + /* [208] = */ NULL, + /* [209] = */ NULL, + /* [210] = */ NULL, + /* [211] = */ NULL, + /* [212] = */ NULL, + /* [213] = */ NULL, + /* [214] = */ NULL, + /* [215] = */ NULL, + /* [216] = */ NULL, + /* [217] = */ NULL, + /* [218] = */ NULL, + /* [219] = */ NULL, + /* [220] = */ NULL, + /* [221] = */ NULL, + /* [222] = */ NULL, + /* [223] = */ NULL, + /* [224] = */ NULL, + /* [225] = */ NULL, + /* [226] = */ NULL, + /* [227] = */ NULL, + /* [228] = */ NULL, + /* [229] = */ NULL, + /* [230] = */ NULL, + /* [231] = */ NULL, + /* [232] = */ NULL, + /* [233] = */ NULL, + /* [234] = */ NULL, + /* [235] = */ NULL, + /* [236] = */ NULL, + /* [237] = */ NULL, + /* [238] = */ NULL, + /* [239] = */ NULL, + /* [240] = */ NULL, + /* [241] = */ NULL, + /* [242] = */ NULL, + /* [243] = */ NULL, + /* [244] = */ NULL, + /* [245] = */ NULL, + /* [246] = */ NULL, + /* [247] = */ NULL, + /* [248] = */ NULL, + /* [249] = */ NULL, + /* [250] = */ NULL, + /* [251] = */ NULL, + /* [252] = */ NULL, + /* [253] = */ NULL, + /* [254] = */ NULL, + /* [255] = */ NULL, + /* [256] = */ NULL, + /* [257] = */ NULL, + /* [258] = */ NULL, + /* [259] = */ NULL, + /* [260] = */ NULL, + /* [261] = */ NULL, + /* [262] = */ NULL, + /* [263] = */ NULL, + /* [264] = */ NULL, + /* [265] = */ NULL, + /* [266] = */ NULL, + /* [267] = */ NULL, + /* [268] = */ NULL, + /* [269] = */ NULL, + /* [270] = */ NULL, + /* [271] = */ NULL, + /* [272] = */ NULL, + /* [273] = */ NULL, + /* [274] = */ NULL, + /* [275] = */ NULL, + /* [276] = */ NULL, + /* [277] = */ NULL, + /* [278] = */ NULL, + /* [279] = */ NULL, + /* [280] = */ NULL, + /* [281] = */ NULL, + /* [282] = */ NULL, + /* [283] = */ NULL, + /* [284] = */ NULL, + /* [285] = */ NULL, + /* [286] = */ NULL, + /* [287] = */ NULL, + /* [288] = */ NULL, + /* [289] = */ NULL, + /* [290] = */ NULL, + /* [291] = */ NULL, + /* [292] = */ NULL, + /* [293] = */ NULL, + /* [294] = */ NULL, + /* [295] = */ NULL, + /* [296] = */ NULL, + /* [297] = */ NULL, + /* [298] = */ NULL, + /* [299] = */ NULL, + /* [300] = */ NULL, + /* [301] = */ NULL, + /* [302] = */ NULL, + /* [303] = */ NULL, + /* [304] = */ NULL, + /* [305] = */ NULL, + /* [306] = */ NULL, + /* [307] = */ NULL, + /* [308] = */ NULL, + /* [309] = */ NULL, + /* [310] = */ NULL, + /* [311] = */ NULL, + /* [312] = */ NULL, + /* [313] = */ NULL, + /* [314] = */ NULL, + /* [315] = */ NULL, + /* [316] = */ NULL, + /* [317] = */ NULL, + /* [318] = */ NULL, + /* [319] = */ NULL, + /* [320] = */ NULL, + /* [321] = */ NULL, + /* [322] = */ NULL, + /* [323] = */ NULL, + /* [324] = */ NULL, + /* [325] = */ NULL, + /* [326] = */ NULL, + /* [327] = */ NULL, + /* [328] = */ NULL, + /* [329] = */ NULL, + /* [330] = */ NULL, + /* [331] = */ NULL, + /* [332] = */ NULL, + /* [333] = */ NULL, + /* [334] = */ NULL, + /* [335] = */ NULL, + /* [336] = */ NULL, + /* [337] = */ NULL, + /* [338] = */ NULL, + /* [339] = */ NULL, + /* [340] = */ NULL, + /* [341] = */ NULL, + /* [342] = */ NULL, + /* [343] = */ NULL, + /* [344] = */ NULL, + /* [345] = */ NULL, + /* [346] = */ NULL, + /* [347] = */ NULL, + /* [348] = */ NULL, + /* [349] = */ NULL, + /* [350] = */ NULL, + /* [351] = */ NULL, + /* [352] = */ NULL, + /* [353] = */ NULL, + /* [354] = */ NULL, + /* [355] = */ NULL, + /* [356] = */ NULL, + /* [357] = */ NULL, + /* [358] = */ NULL, + /* [359] = */ NULL, + /* [360] = */ NULL, + /* [361] = */ NULL, + /* [362] = */ NULL, + /* [363] = */ NULL, + /* [364] = */ NULL, + /* [365] = */ NULL, + /* [366] = */ NULL, + /* [367] = */ NULL, + /* [368] = */ NULL, + /* [369] = */ NULL, + /* [370] = */ NULL, + /* [371] = */ NULL, + /* [372] = */ NULL, + /* [373] = */ NULL, + /* [374] = */ NULL, + /* [375] = */ NULL, + /* [376] = */ NULL, + /* [377] = */ NULL, + /* [378] = */ NULL, + /* [379] = */ NULL, + /* [380] = */ NULL, + /* [381] = */ NULL, + /* [382] = */ NULL, + /* [383] = */ NULL, + /* [384] = */ NULL, + /* [385] = */ NULL, + /* [386] = */ NULL, + /* [387] = */ NULL, + /* [388] = */ NULL, + /* [389] = */ NULL, + /* [390] = */ NULL, + /* [391] = */ NULL, + /* [392] = */ NULL, + /* [393] = */ NULL, + /* [394] = */ NULL, + /* [395] = */ NULL, + /* [396] = */ NULL, + /* [397] = */ NULL, + /* [398] = */ NULL, + /* [399] = */ NULL, + /* [400] = */ NULL, + /* [401] = */ NULL, + /* [402] = */ NULL, + /* [403] = */ NULL, + /* [404] = */ NULL, + /* [405] = */ NULL, + /* [406] = */ NULL, + /* [407] = */ NULL, + /* [408] = */ NULL, + /* [409] = */ NULL, + /* [410] = */ NULL, + /* [411] = */ NULL, + /* [412] = */ NULL, + /* [413] = */ NULL, + /* [414] = */ NULL, + /* [415] = */ NULL, + /* [416] = */ NULL, + /* [417] = */ NULL, + /* [418] = */ NULL, + /* [419] = */ NULL, + /* [420] = */ NULL, + /* [421] = */ NULL, + /* [422] = */ NULL, + /* [423] = */ NULL, + /* [424] = */ NULL, + /* [425] = */ NULL, + /* [426] = */ NULL, + /* [427] = */ NULL, + /* [428] = */ NULL, + /* [429] = */ NULL, + /* [430] = */ NULL, + /* [431] = */ NULL, + /* [432] = */ NULL, + /* [433] = */ NULL, + /* [434] = */ NULL, + /* [435] = */ NULL, + /* [436] = */ NULL, + /* [437] = */ NULL, + /* [438] = */ NULL, + /* [439] = */ NULL, + /* [440] = */ NULL, + /* [441] = */ NULL, + /* [442] = */ NULL, + /* [443] = */ NULL, + /* [444] = */ NULL, + /* [445] = */ NULL, + /* [446] = */ NULL, + /* [447] = */ NULL, + /* [448] = */ NULL, + /* [449] = */ NULL, + /* [450] = */ NULL, + /* [451] = */ NULL, + /* [452] = */ NULL, + /* [453] = */ NULL, + /* [454] = */ NULL, + /* [455] = */ NULL, + /* [456] = */ NULL, + /* [457] = */ NULL, + /* [458] = */ NULL, + /* [459] = */ NULL, + /* [460] = */ NULL, + /* [461] = */ NULL, + /* [462] = */ NULL, + /* [463] = */ NULL, + /* [464] = */ NULL, + /* [465] = */ NULL, + /* [466] = */ NULL, + /* [467] = */ NULL, + /* [468] = */ NULL, + /* [469] = */ NULL, + /* [470] = */ NULL, + /* [471] = */ NULL, + /* [472] = */ NULL, + /* [473] = */ NULL, + /* [474] = */ NULL, + /* [475] = */ NULL, + /* [476] = */ NULL, + /* [477] = */ NULL, + /* [478] = */ NULL, + /* [479] = */ NULL, + /* [480] = */ NULL, + /* [481] = */ NULL, + /* [482] = */ NULL, + /* [483] = */ NULL, + /* [484] = */ NULL, + /* [485] = */ NULL, + /* [486] = */ NULL, + /* [487] = */ NULL, + /* [488] = */ NULL, + /* [489] = */ NULL, + /* [490] = */ NULL, + /* [491] = */ NULL, + /* [492] = */ NULL, + /* [493] = */ NULL, + /* [494] = */ NULL, + /* [495] = */ NULL, + /* [496] = */ NULL, + /* [497] = */ NULL, + /* [498] = */ NULL, + /* [499] = */ NULL, + /* [500] = */ NULL, + /* [501] = */ NULL, + /* [502] = */ NULL, + /* [503] = */ NULL, + /* [504] = */ NULL, + /* [505] = */ NULL, + /* [506] = */ NULL, + /* [507] = */ NULL, + /* [508] = */ NULL, + /* [509] = */ NULL, + /* [510] = */ NULL, + /* [511] = */ NULL, + /* [512] = */ "ERESTARTSYS", + /* [513] = */ "ERESTARTNOINTR", + /* [514] = */ "ERESTARTNOHAND", + /* [515] = */ "ENOIOCTLCMD", + /* [516] = */ "ERESTART_RESTARTBLOCK", + /* [517] = */ "EPROBE_DEFER", + /* [518] = */ "EOPENSTALE", + /* [519] = */ NULL, + /* [520] = */ NULL, + /* [521] = */ "EBADHANDLE", + /* [522] = */ "ENOTSYNC", + /* [523] = */ "EBADCOOKIE", + /* [524] = */ "ENOTSUPP", + /* [525] = */ "ETOOSMALL", + /* [526] = */ "ESERVERFAULT", + /* [527] = */ "EBADTYPE", + /* [528] = */ "EJUKEBOX", + /* [529] = */ "EIOCBQUEUED", + /* [530] = */ "ERECALLCONFLICT", +}; + +static inline std::string +errno_name(int errno_value) { + + const auto name = errno_names.at(errno_value); + + if(!name) { + return "EUNKNOWN"; + } + + return name; +} + +static inline std::string +errno_message(int errno_value) { + // 1024 should be more than enough for most locales + constexpr const std::size_t MAX_ERROR_MSG = 0x400; + std::array errstr; + char* msg = ::strerror_r(errno_value, errstr.data(), MAX_ERROR_MSG); + return std::string{msg}; +} + +} // namespace syscalls +} // namespace gkfs + +#endif // GKFS_SYSCALLS_ERRNO_HPP diff --git a/include/client/syscalls/rets.hpp b/include/client/syscalls/rets.hpp new file mode 100644 index 000000000..bbde7d681 --- /dev/null +++ b/include/client/syscalls/rets.hpp @@ -0,0 +1,136 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GKFS_SYSCALLS_RETS_HPP +#define GKFS_SYSCALLS_RETS_HPP + +#include +#include +#include + +namespace gkfs { +namespace syscall { +namespace ret { + +/** Allowed ret types (based on the values of the corresponding C enum) */ +enum class type { + none = ::ret_type_t::rnone, + ptr = ::ret_type_t::rptr, + dec = ::ret_type_t::rdec, +}; + +/* Some constant definitions for convenience */ +static constexpr auto none = type::none; +static constexpr auto ptr = type::ptr; +static constexpr auto dec = type::dec; + + +/** All ret formatters must follow this prototype */ +template +using formatter = + std::add_pointer_t; + + +/** forward declare formatters */ +template inline void +format_none_ret_to(FmtBuffer& buffer, long val); + +template inline void +format_ptr_ret_to(FmtBuffer& buffer, long val); + +template inline void +format_dec_ret_to(FmtBuffer& buffer, long val); + +/** Known formatters */ +template +static const constexpr +std::array, ret_type_max> formatters = { + /* [rnone] = */ format_none_ret_to, + /* [rptr] = */ format_ptr_ret_to, + /* [rdec] = */ format_dec_ret_to, +}; + +/** A return value descriptor */ +struct desc { + ret::type type_; + + ret::type + type() const { + return type_; + } + + bool + operator==(ret::type t) const { + return type_ == t; + } + + bool + operator!=(ret::type t) const { + return type_ != t; + } + + template + formatter + formatter() const { + const auto idx = static_cast(type_); + + // if the type is unknown fall back to the default formatter + if(idx < 0 || idx >= static_cast(formatters.size())) { + return format_dec_ret_to; + } + + assert(formatters.at(idx) != nullptr); + + return formatters.at(idx); + } +}; + + +/** Specific formatter implementations follow */ +#define LIKELY(x) __builtin_expect(!!(x), 1) +#define UNLIKELY(x) __builtin_expect(!!(x), 0) + +template +inline void +format_none_ret_to(FmtBuffer& buffer, + long val) { + fmt::format_to(buffer, "void"); +} + +template +inline void +format_ptr_ret_to(FmtBuffer& buffer, + long val) { + if(LIKELY(reinterpret_cast(val) != nullptr)) { + fmt::format_to(buffer, "{}", reinterpret_cast(val)); + return; + } + + fmt::format_to(buffer, "NULL"); +} + +template +inline void +format_dec_ret_to(FmtBuffer& buffer, + long val) { + fmt::format_to(buffer, "{}", val); +} + +#undef LIKELY +#undef UNLIKELY + +} // namespace ret +} // namespace syscall +} // namespace gkfs + +#endif // GKFS_SYSCALLS_RETS_HPP diff --git a/include/client/syscalls/syscall.hpp b/include/client/syscalls/syscall.hpp new file mode 100644 index 000000000..82b7a2e74 --- /dev/null +++ b/include/client/syscalls/syscall.hpp @@ -0,0 +1,207 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GKFS_SYSCALL_HPP +#define GKFS_SYSCALL_HPP + +#include +#include +#include +#include + +namespace gkfs { +namespace syscall { + +static const auto constexpr MAX_ARGS = 6u; +using arg_list = std::array; + +struct descriptor : private ::syscall_info { + + long + number() const { + return s_nr; + } + + const char* + name() const { + return s_name; + } + + int + num_args() const { + return s_nargs; + } + + arg_list + args() const { + + std::array args; + + for(auto i = 0u; i < MAX_ARGS; ++i) { + args[i] = {static_cast(s_args[i].a_type), + s_args[i].a_name}; + } + + return args; + } + + ret::desc + return_type() const { + return ret::desc{static_cast(s_return_type.r_type)}; + } +}; + +static inline descriptor +lookup_syscall(const long syscall_number) { + const auto* info = ::get_syscall_info(syscall_number, nullptr); + return *reinterpret_cast(info); +} + +static inline descriptor +lookup_syscall(const long syscall_number, + const long argv[MAX_ARGS]) { + const auto* info = ::get_syscall_info(syscall_number, argv); + return *reinterpret_cast(info); +} + +static inline bool +never_returns(const long syscall_number) { + const auto desc = lookup_syscall(syscall_number); + return desc.return_type() == ret::none; +} + +static inline bool +always_returns(const long syscall_number) { + return !never_returns(syscall_number); +} + +static inline bool +may_not_return(const long syscall_number) { + return syscall_number == SYS_execve || + syscall_number == SYS_execveat; +} + + +// information about a syscall +enum class info : int { + unknown = 0x00000000, // no info (reset) + + // syscall origin + internal = 0x00000001, // syscall originates from GekkoFS' internals + external = 0x00000002, // syscall originates from client application + + // syscall target + kernel = 0x00000010, // syscall forwarded to the kernel + hook = 0x00000020, // syscall handled by GekkoFS + + // syscall state + executed = 0x00000100, // syscall has been executed + not_executed = 0x00000000, // syscall has not been executed + + // masks + origin_mask = 0x00000003, // mask for syscall's origin information + target_mask = 0x7ffffefc, // mask for syscall's target information + execution_mask = 0x00000100 // mask for syscall's execution state +}; + + +inline constexpr info +operator&(info t1, info t2) { + return info(static_cast(t1) & static_cast(t2)); +} + +inline constexpr info +operator|(info t1, info t2) { + return info(static_cast(t1) | static_cast(t2)); +} + +inline constexpr info +operator^(info t1, info t2) { + return info(static_cast(t1) ^ static_cast(t2)); +} + +inline constexpr info +operator~(info t1) { + return info(~static_cast(t1)); +} + +inline const info& +operator|=(info& t1, info t2) { + return t1 = t1 | t2; +} + +inline const info& +operator&=(info& t1, info t2) { + return t1 = t1 & t2; +} + +inline const info& +operator^=(info& t1, info t2) { + return t1 = t1 ^ t2; +} + + +static const auto constexpr no_info = info::unknown; +static const auto constexpr from_internal_code = info::internal; +static const auto constexpr from_external_code = info::external; +static const auto constexpr to_kernel = info::kernel; +static const auto constexpr to_hook = info::hook; + +static const auto constexpr executed = info::executed; +static const auto constexpr not_executed = info::not_executed; + +static const auto constexpr origin_mask = info::origin_mask; +static const auto constexpr target_mask = info::target_mask; +static const auto constexpr execution_mask = info::execution_mask; + +enum { + hooked = 0x0, + forward_to_kernel = 0x1 +}; + +static constexpr auto +origin(syscall::info info) { + return info & origin_mask; +} + +static constexpr auto +target(syscall::info info) { + return info & target_mask; +} + +static constexpr bool +is_handled_by_kernel(syscall::info info) { + return (info & target_mask) == to_kernel; +} + +static constexpr auto +execution_is_pending(syscall::info info) { + return (info & execution_mask) == not_executed; +} + +/* + * error_code - examines a return value from a syscall execution + * and returns an error code if said return value indicates an error. + */ +static inline int +error_code(long result) { + if (result < 0 && result >= -0x1000) + return (int)-result; + + return 0; +} + +} // namespace syscall +} // namespace gkfs + +#endif // GKFS_SYSCALL_HPP diff --git a/include/daemon/env.hpp b/include/daemon/env.hpp new file mode 100644 index 000000000..cb75bf547 --- /dev/null +++ b/include/daemon/env.hpp @@ -0,0 +1,34 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#ifndef GKFS_DAEMON_ENV +#define GKFS_DAEMON_ENV + +#include + +#define ADD_PREFIX(str) DAEMON_ENV_PREFIX str + +/* Environment variables for the GekkoFS daemon */ +namespace gkfs { +namespace env { + +static constexpr auto HOSTS_FILE = ADD_PREFIX("HOSTS_FILE"); + +} // namespace env +} // namespace gkfs + +#undef ADD_PREFIX + +#endif // GKFS_DAEMON_ENV + + diff --git a/include/global/configure.hpp.in b/include/global/configure.hpp.in index 94051a741..accce2d46 100644 --- a/include/global/configure.hpp.in +++ b/include/global/configure.hpp.in @@ -60,13 +60,14 @@ #define RPC_DIRENTS_BUFF_SIZE (8 * 1024 * 1024) // 8 mega // environment prefixes -#define ENV_PREFIX "GKFS_" +#define CLIENT_ENV_PREFIX "LIBGKFS_" +#define DAEMON_ENV_PREFIX "GKFS_" // Log -#define DEFAULT_PRELOAD_LOG_PATH "/tmp/gkfs_preload.log" +#define DEFAULT_CLIENT_LOG_PATH "/tmp/gkfs_client.log" #define DEFAULT_DAEMON_LOG_PATH "/tmp/gkfs_daemon.log" -#define DEFAULT_PRELOAD_LOG_LEVEL 4 // info +#define DEFAULT_CLIENT_LOG_LEVEL "info,errors,critical,mercury" #define DEFAULT_DAEMON_LOG_LEVEL 4 // info #endif //FS_CONFIGURE_H diff --git a/include/global/env_util.hpp b/include/global/env_util.hpp index 70f26c878..993699b20 100644 --- a/include/global/env_util.hpp +++ b/include/global/env_util.hpp @@ -11,16 +11,19 @@ SPDX-License-Identifier: MIT */ -#ifndef GKFS_ENV_UTIL_HPP -#define GKFS_ENV_UTIL_HPP +#ifndef GKFS_COMMON_ENV_UTIL_HPP +#define GKFS_COMMON_ENV_UTIL_HPP #include namespace gkfs { +namespace env { -std::string get_env(const std::string& env_name); -std::string get_env_own(const std::string& env_name); +std::string +get_var(const std::string& name, + const std::string& default_value = ""); -} +} // namespace env +} // namespace gkfs -#endif //IFS_ENV_UTIL_HPP +#endif // GKFS_COMMON_ENV_UTIL_HPP diff --git a/scripts/compile_dep.sh b/scripts/compile_dep.sh index a3293c2ba..3a73a0e07 100755 --- a/scripts/compile_dep.sh +++ b/scripts/compile_dep.sh @@ -1,8 +1,23 @@ #!/bin/bash +mogon1_deps=( + "zstd" "lz4" "snappy" "bmi" "libfabric" "mercury" "argobots" "margo" + "rocksdb" "syscall_intercept date" +) + +mogon2_deps=( + "zstd" "lz4" "snappy" "bmi" "mercury" "argobots" "margo" "rocksdb" + "syscall_intercept date" +) + +fh2_deps=( + "zstd" "lz4" "snappy" "bmi" "libfabric" "mercury" "argobots" "margo" + "rocksdb" "syscall_intercept date" +) + usage_short() { echo " -usage: compile_dep.sh [-h] [-n ] [-c ] [-j ] +usage: compile_dep.sh [-h] [-n ] [-c ] [-d ] [-j ] source_path install_path " } @@ -19,13 +34,18 @@ positional arguments: optional arguments: - -h, --help shows this help message and exits + -h, --help shows this help message and exits + -l, --list-dependencies + list dependencies available for building and installation -n , --na network layer that is used for communication. Valid: {bmi,ofi,all} defaults to 'all' -c , --cluster additional configurations for specific compute clusters supported clusters: {mogon1,mogon2,fh2} + -d , --dependency + build and install a specific dependency. If unspecified + all dependencies are built and installed. -j , --compilecores number of cores that are used to compile the depdencies defaults to number of available cores @@ -33,6 +53,33 @@ optional arguments: " } + +list_dependencies() { + + echo "Available dependencies: " + + echo -n " Mogon 1: " + for d in "${mogon1_deps[@]}" + do + echo -n "$d " + done + echo "" + + echo -n " Mogon 2: " + for d in "${mogon2_deps[@]}" + do + echo -n "$d " + done + echo "" + + echo -n " fh2: " + for d in "${fh2_deps[@]}" + do + echo -n "$d " + done + echo "" +} + prepare_build_dir() { if [ ! -d "$1/build" ]; then mkdir $1/build @@ -52,6 +99,7 @@ find_cmake() { PATCH_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" PATCH_DIR="${PATCH_DIR}/patches" CLUSTER="" +DEPENDENCY="" NA_LAYER="" CORES="" SOURCE="" @@ -72,6 +120,15 @@ case ${key} in CLUSTER="$2" shift # past argument shift # past value + ;; + -d|--dependency) + if [[ -z "$2" ]]; then + echo "Missing argument for -d/--dependency option" + exit + fi + DEPENDENCY="$2" + shift # past argument + shift # past value ;; -j|--compilecores) CORES="$2" @@ -82,6 +139,10 @@ case ${key} in PERFORM_TEST=true shift ;; + -l|--list-dependencies) + list_dependencies + exit + ;; -h|--help) help_msg exit @@ -160,117 +221,155 @@ export LIBRARY_PATH="${LIBRARY_PATH}:${INSTALL}/lib:${INSTALL}/lib64" # Set cluster dependencies first if [[ ( "${CLUSTER}" == "mogon1" ) || ( "${CLUSTER}" == "fh2" ) || ( "${CLUSTER}" == "mogon2" ) ]]; then + # compile zstd - echo "############################################################ Installing: zstd" - CURR=${SOURCE}/zstd/build/cmake + if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "zstd" ) ]]; then + echo "############################################################ Installing: zstd" + CURR=${SOURCE}/zstd/build/cmake + prepare_build_dir ${CURR} + cd ${CURR}/build + $CMAKE -DCMAKE_INSTALL_PREFIX=${INSTALL} -DCMAKE_BUILD_TYPE:STRING=Release .. + make -j${CORES} + make install + fi + + # build lz4 + if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "zstd" ) ]]; then + echo "############################################################ Installing: zstd" + CURR=${SOURCE}/lz4 + cd ${CURR} + make -j${CORES} + make DESTDIR=${INSTALL} PREFIX="" install + fi + + # build snappy + if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "snappy" ) ]]; then + echo "############################################################ Installing: snappy" + CURR=${SOURCE}/snappy + prepare_build_dir ${CURR} + cd ${CURR}/build + $CMAKE -DCMAKE_INSTALL_PREFIX=${INSTALL} -DCMAKE_BUILD_TYPE:STRING=Release .. + make -j${CORES} + make install + fi +fi + +# build bmi +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "bmi" ) ]]; then + if [ "$NA_LAYER" == "bmi" ] || [ "$NA_LAYER" == "all" ]; then + USE_BMI="-DNA_USE_BMI:BOOL=ON" + echo "############################################################ Installing: BMI" + # BMI + CURR=${SOURCE}/bmi + prepare_build_dir ${CURR} + cd ${CURR} + ./prepare + cd ${CURR}/build + CFLAGS="${CFLAGS} -w" ../configure --prefix=${INSTALL} --enable-shared --disable-static --disable-karma --enable-bmi-only --enable-fast --disable-strict + make -j${CORES} + make install + fi +fi + +# build ofi +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "ofi" ) ]]; then + if [ "$NA_LAYER" == "ofi" ] || [ "$NA_LAYER" == "all" ]; then + USE_OFI="-DNA_USE_OFI:BOOL=ON" + # Mogon2 already has libfabric installed in a version that Mercury supports. + if [[ ("${CLUSTER}" != "mogon2") ]]; then + echo "############################################################ Installing: LibFabric" + #libfabric + CURR=${SOURCE}/libfabric + prepare_build_dir ${CURR} + cd ${CURR}/build + ../configure --prefix=${INSTALL} --enable-tcp=yes + make -j${CORES} + make install + [ "${PERFORM_TEST}" ] && make check + fi + fi +fi + + +# Mercury +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "mercury" ) ]]; then + echo "############################################################ Installing: Mercury" + CURR=${SOURCE}/mercury prepare_build_dir ${CURR} cd ${CURR}/build - $CMAKE -DCMAKE_INSTALL_PREFIX=${INSTALL} -DCMAKE_BUILD_TYPE:STRING=Release .. + $CMAKE \ + -DCMAKE_BUILD_TYPE:STRING=Debug \ + -DBUILD_TESTING:BOOL=ON \ + -DMERCURY_USE_SM_ROUTING:BOOL=OFF \ + -DMERCURY_USE_SELF_FORWARD:BOOL=OFF \ + -DMERCURY_USE_CHECKSUMS:BOOL=OFF \ + -DMERCURY_USE_BOOST_PP:BOOL=ON \ + -DMERCURY_USE_EAGER_BULK:BOOL=ON \ + -DBUILD_SHARED_LIBS:BOOL=ON \ + -DCMAKE_INSTALL_PREFIX=${INSTALL} \ + ${USE_BMI} ${USE_OFI} \ + .. make -j${CORES} make install - echo "############################################################ Installing: lz4" - CURR=${SOURCE}/lz4 - cd ${CURR} - make -j${CORES} - make DESTDIR=${INSTALL} PREFIX="" install - echo "############################################################ Installing: snappy" - CURR=${SOURCE}/snappy +fi + +# Argobots +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "argobots" ) ]]; then + echo "############################################################ Installing: Argobots" + CURR=${SOURCE}/argobots prepare_build_dir ${CURR} + cd ${CURR} + ./autogen.sh cd ${CURR}/build - $CMAKE -DCMAKE_INSTALL_PREFIX=${INSTALL} -DCMAKE_BUILD_TYPE:STRING=Release .. + ../configure --prefix=${INSTALL} --enable-perf-opt --disable-checks make -j${CORES} make install + [ "${PERFORM_TEST}" ] && make check fi -if [ "$NA_LAYER" == "bmi" ] || [ "$NA_LAYER" == "all" ]; then - USE_BMI="-DNA_USE_BMI:BOOL=ON" - echo "############################################################ Installing: BMI" - # BMI - CURR=${SOURCE}/bmi +# Margo +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "margo" ) ]]; then + echo "############################################################ Installing: Margo" + CURR=${SOURCE}/margo prepare_build_dir ${CURR} cd ${CURR} - ./prepare + ./prepare.sh cd ${CURR}/build - CFLAGS="${CFLAGS} -w" ../configure --prefix=${INSTALL} --enable-shared --disable-static --disable-karma --enable-bmi-only --enable-fast --disable-strict + ../configure --prefix=${INSTALL} PKG_CONFIG_PATH=${INSTALL}/lib/pkgconfig CFLAGS="${CFLAGS} -Wall -O3" make -j${CORES} make install + [ "${PERFORM_TEST}" ] && make check fi -if [ "$NA_LAYER" == "ofi" ] || [ "$NA_LAYER" == "all" ]; then - USE_OFI="-DNA_USE_OFI:BOOL=ON" - # Mogon2 already has libfabric installed in a version that Mercury supports. - if [[ ("${CLUSTER}" != "mogon2") ]]; then - echo "############################################################ Installing: LibFabric" - #libfabric - CURR=${SOURCE}/libfabric - prepare_build_dir ${CURR} - cd ${CURR}/build - ../configure --prefix=${INSTALL} --enable-tcp=yes - make -j${CORES} - make install - [ "${PERFORM_TEST}" ] && make check - fi +# Rocksdb +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "rocksdb" ) ]]; then + echo "############################################################ Installing: Rocksdb" + CURR=${SOURCE}/rocksdb + cd ${CURR} + make clean + USE_RTTI=1 make -j${CORES} static_lib + INSTALL_PATH=${INSTALL} make install fi -echo "############################################################ Installing: Mercury" +# syscall_intercept +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "syscall_intercept" ) ]]; then + echo "############################################################ Installing: Syscall_intercept" + CURR=${SOURCE}/syscall_intercept + prepare_build_dir ${CURR} + cd ${CURR}/build + CC="" CXX="" $CMAKE -DCMAKE_INSTALL_PREFIX=${INSTALL} -DCMAKE_BUILD_TYPE:STRING=Debug -DBUILD_EXAMPLES:BOOL=OFF -DBUILD_TESTS:BOOK=OFF .. + make install +fi -# Mercury -CURR=${SOURCE}/mercury -prepare_build_dir ${CURR} -cd ${CURR}/build -$CMAKE \ - -DCMAKE_BUILD_TYPE:STRING=Release \ - -DBUILD_TESTING:BOOL=ON \ - -DMERCURY_USE_SM_ROUTING:BOOL=ON \ - -DMERCURY_USE_SELF_FORWARD:BOOL=ON \ - -DMERCURY_USE_CHECKSUMS:BOOL=OFF \ - -DMERCURY_USE_BOOST_PP:BOOL=ON \ - -DMERCURY_USE_EAGER_BULK:BOOL=ON \ - -DBUILD_SHARED_LIBS:BOOL=ON \ - -DCMAKE_INSTALL_PREFIX=${INSTALL} \ - ${USE_BMI} ${USE_OFI} \ - .. -make -j${CORES} -make install - -echo "############################################################ Installing: Argobots" +# date +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "date" ) ]]; then + echo "############################################################ Installing: date" + CURR=${SOURCE}/date + prepare_build_dir ${CURR} + cd ${CURR}/build + $CMAKE -DCMAKE_INSTALL_PREFIX=${INSTALL} -DCMAKE_CXX_STANDARD:STRING=14 -DUSE_SYSTEM_TZ_DB:BOOL=ON -DBUILD_SHARED_LIBS:BOOL=ON .. + make install +fi -# Argobots -CURR=${SOURCE}/argobots -prepare_build_dir ${CURR} -cd ${CURR} -./autogen.sh -cd ${CURR}/build -../configure --prefix=${INSTALL} --enable-perf-opt --disable-checks -make -j${CORES} -make install -[ "${PERFORM_TEST}" ] && make check - -echo "############################################################ Installing: Margo" -# Margo -CURR=${SOURCE}/margo -prepare_build_dir ${CURR} -cd ${CURR} -./prepare.sh -cd ${CURR}/build -../configure --prefix=${INSTALL} PKG_CONFIG_PATH=${INSTALL}/lib/pkgconfig CFLAGS="${CFLAGS} -Wall -O3" -make -j${CORES} -make install -[ "${PERFORM_TEST}" ] && make check - -echo "############################################################ Installing: Rocksdb" -# Rocksdb -CURR=${SOURCE}/rocksdb -cd ${CURR} -make clean -USE_RTTI=1 make -j${CORES} static_lib -INSTALL_PATH=${INSTALL} make install - -echo "############################################################ Installing: Syscall_intercept" -CURR=${SOURCE}/syscall_intercept -prepare_build_dir ${CURR} -cd ${CURR}/build -$CMAKE -DCMAKE_INSTALL_PREFIX=${INSTALL} -DCMAKE_BUILD_TYPE:STRING=Release -DBUILD_EXAMPLES:BOOL=OFF -DBUILD_TESTS:BOOK=OFF .. -make install echo "Done" diff --git a/scripts/dl_dep.sh b/scripts/dl_dep.sh index 6700ca07e..036ce615c 100755 --- a/scripts/dl_dep.sh +++ b/scripts/dl_dep.sh @@ -198,6 +198,8 @@ clonedeps "margo" "https://xgitlab.cels.anl.gov/sds/margo.git" "6ed94e4f3a4d526b wgetdeps "rocksdb" "https://github.com/facebook/rocksdb/archive/v6.1.2.tar.gz" & # get syscall_intercept clonedeps "syscall_intercept" "https://github.com/pmem/syscall_intercept.git" "cc3412a2ad39f2e26cc307d5b155232811d7408e" & +# get date +clonedeps "date" "https://github.com/HowardHinnant/date.git" "e7e1482087f58913b80a20b04d5c58d9d6d90155" # Wait for all download to be completed wait diff --git a/src/client/CMakeLists.txt b/src/client/CMakeLists.txt index e053728c0..5ea177e7f 100644 --- a/src/client/CMakeLists.txt +++ b/src/client/CMakeLists.txt @@ -14,6 +14,8 @@ set(PRELOAD_SRC rpc/ld_rpc_metadentry.cpp ../global/rpc/rpc_utils.cpp ../global/path_util.cpp + logging.cpp + syscalls/detail/syscall_info.c ) set(PRELOAD_HEADERS ../../include/global/configure.hpp @@ -25,7 +27,6 @@ set(PRELOAD_HEADERS ../../include/client/preload_context.hpp ../../include/client/adafs_functions.hpp ../../include/client/intercept.hpp - ../../include/client/syscall_names.hpp ../../include/client/hooks.hpp ../../include/client/open_file_map.hpp ../../include/client/open_dir.hpp @@ -36,6 +37,15 @@ set(PRELOAD_HEADERS ../../include/client/rpc/ld_rpc_management.hpp ../../include/client/rpc/ld_rpc_data_ws.hpp ../../include/client/rpc/ld_rpc_metadentry.hpp + ../../include/client/logging.hpp + ../../include/client/env.hpp + ../../include/client/make_array.hpp + ../../include/client/syscalls/args.hpp + ../../include/client/syscalls/decoder.hpp + ../../include/client/syscalls/errno.hpp + ../../include/client/syscalls/rets.hpp + ../../include/client/syscalls/syscall.hpp + ../../include/client/syscalls/detail/syscall_info.h ) add_library(gkfs_intercept SHARED ${PRELOAD_SRC} ${PRELOAD_HEADERS}) @@ -44,15 +54,16 @@ target_link_libraries(gkfs_intercept # internal metadata distributor - log_util env_util # external Syscall_intercept::Syscall_intercept dl mercury hermes + fmt Boost::boost # needed for tokenizer header Threads::Threads + Date::TZ ) target_include_directories(gkfs_intercept diff --git a/src/client/adafs_functions.cpp b/src/client/adafs_functions.cpp index 3334e8531..f006442c3 100644 --- a/src/client/adafs_functions.cpp +++ b/src/client/adafs_functions.cpp @@ -18,6 +18,7 @@ #include #include #include "client/preload_util.hpp" +#include #include #include #include @@ -48,13 +49,13 @@ using namespace std; int adafs_open(const std::string& path, mode_t mode, int flags) { if(flags & O_PATH){ - CTX->log()->error("{}() `O_PATH` flag is not supported", __func__); + LOG(ERROR, "`O_PATH` flag is not supported"); errno = ENOTSUP; return -1; } if(flags & O_APPEND){ - CTX->log()->error("{}() `O_APPEND` flag is not supported", __func__); + LOG(ERROR, "`O_APPEND` flag is not supported"); errno = ENOTSUP; return -1; } @@ -65,7 +66,7 @@ int adafs_open(const std::string& path, mode_t mode, int flags) { if(errno == ENOENT) { exists = false; } else { - CTX->log()->error("{}() error while retriving stat to file", __func__); + LOG(ERROR, "Error while retriving stat to file"); return -1; } } @@ -81,14 +82,14 @@ int adafs_open(const std::string& path, mode_t mode, int flags) { assert(flags & O_CREAT); if(flags & O_DIRECTORY){ - CTX->log()->error("{}() O_DIRECTORY use with O_CREAT. NOT SUPPORTED", __func__); + LOG(ERROR, "O_DIRECTORY use with O_CREAT. NOT SUPPORTED"); errno = ENOTSUP; return -1; } // no access check required here. If one is using our FS they have the permissions. if(adafs_mk_node(path, mode | S_IFREG)) { - CTX->log()->error("{}() error creating non-existent file", __func__); + LOG(ERROR, "Error creating non-existent file"); return -1; } } else { @@ -103,7 +104,7 @@ int adafs_open(const std::string& path, mode_t mode, int flags) { #ifdef HAS_SYMLINKS if (md->is_link()) { if (flags & O_NOFOLLOW) { - CTX->log()->warn("{}() symlink found and O_NOFOLLOW flag was specified", __func__); + LOG(WARNING, "Symlink found and O_NOFOLLOW flag was specified"); errno = ELOOP; return -1; } @@ -121,7 +122,7 @@ int adafs_open(const std::string& path, mode_t mode, int flags) { if( (flags & O_TRUNC) && ((flags & O_RDWR) || (flags & O_WRONLY)) ) { if(adafs_truncate(path, md->size(), 0)) { - CTX->log()->error("{}() error truncating file", __func__); + LOG(ERROR, "Error truncating file"); return -1; } } @@ -144,11 +145,11 @@ int adafs_mk_node(const std::string& path, mode_t mode) { case S_IFBLK: case S_IFIFO: case S_IFSOCK: - CTX->log()->warn("{}() unsupported node type", __func__); + LOG(WARNING, "Unsupported node type"); errno = ENOTSUP; return -1; default: - CTX->log()->warn("{}() unrecognized node type", __func__); + LOG(WARNING, "Unrecognized node type"); errno = EINVAL; return -1; } @@ -156,12 +157,12 @@ int adafs_mk_node(const std::string& path, mode_t mode) { auto p_comp = dirname(path); auto md = adafs_metadata(p_comp); if (!md) { - CTX->log()->debug("{}() parent component does not exists: '{}'", __func__, p_comp); + LOG(DEBUG, "Parent component does not exist: '{}'", p_comp); errno = ENOENT; return -1; } if (!S_ISDIR(md->mode())) { - CTX->log()->debug("{}() parent component is not a direcotory: '{}'", __func__, p_comp); + LOG(DEBUG, "Parent component is not a directory: '{}'", p_comp); errno = ENOTDIR; return -1; } @@ -222,7 +223,6 @@ std::shared_ptr adafs_metadata(const string& path, bool follow_links) } int adafs_statfs(struct statfs* buf) { - CTX->log()->trace("{}() called", __func__); auto blk_stat = rpc_send::chunk_stat(); buf->f_type = 0; buf->f_bsize = blk_stat.chunk_size; @@ -264,15 +264,12 @@ off_t adafs_lseek(unsigned int fd, off_t offset, unsigned int whence) { off_t adafs_lseek(shared_ptr adafs_fd, off_t offset, unsigned int whence) { switch (whence) { case SEEK_SET: - CTX->log()->debug("{}() whence is SEEK_SET", __func__); adafs_fd->pos(offset); break; case SEEK_CUR: - CTX->log()->debug("{}() whence is SEEK_CUR", __func__); adafs_fd->pos(adafs_fd->pos() + offset); break; case SEEK_END: { - CTX->log()->debug("{}() whence is SEEK_END", __func__); off64_t file_size; auto err = rpc_send::get_metadentry_size(adafs_fd->path(), file_size); if (err < 0) { @@ -283,17 +280,17 @@ off_t adafs_lseek(shared_ptr adafs_fd, off_t offset, unsigned int when break; } case SEEK_DATA: - CTX->log()->warn("{}() SEEK_DATA whence is not supported", __func__); + LOG(WARNING, "SEEK_DATA whence is not supported"); // We do not support this whence yet errno = EINVAL; return -1; case SEEK_HOLE: - CTX->log()->warn("{}() SEEK_HOLE whence is not supported", __func__); + LOG(WARNING, "SEEK_HOLE whence is not supported"); // We do not support this whence yet errno = EINVAL; return -1; default: - CTX->log()->warn("{}() unknown whence {}", __func__, whence); + LOG(WARNING, "Unknown whence value {:#x}", whence); errno = EINVAL; return -1; } @@ -309,12 +306,12 @@ int adafs_truncate(const std::string& path, off_t old_size, off_t new_size) { } if (rpc_send::decr_size(path, new_size)) { - CTX->log()->debug("{}() failed to decrease size", __func__); + LOG(DEBUG, "Failed to decrease size"); return -1; } if(rpc_send::trunc_data(path, old_size, new_size)){ - CTX->log()->debug("{}() failed to truncate data", __func__); + LOG(DEBUG, "Failed to truncate data"); return -1; } return 0; @@ -330,7 +327,7 @@ int adafs_truncate(const std::string& path, off_t length) { * length increased. */ if(length < 0) { - CTX->log()->debug("{}() length is negative: {}", __func__, length); + LOG(DEBUG, "Length is negative: {}", length); errno = EINVAL; return -1; } @@ -341,8 +338,7 @@ int adafs_truncate(const std::string& path, off_t length) { } auto size = md->size(); if(static_cast(length) > size) { - CTX->log()->debug("{}() length is greater then file size: {} > {}", - __func__, length, size); + LOG(DEBUG, "Length is greater then file size: {} > {}", length, size); errno = EINVAL; return -1; } @@ -360,24 +356,23 @@ int adafs_dup2(const int oldfd, const int newfd) { ssize_t adafs_pwrite(std::shared_ptr file, const char * buf, size_t count, off64_t offset) { if (file->type() != FileType::regular) { assert(file->type() == FileType::directory); - CTX->log()->warn("{}() cannot read from directory", __func__); + LOG(WARNING, "Cannot read from directory"); errno = EISDIR; return -1; } auto path = make_shared(file->path()); - CTX->log()->trace("{}() count: {}, offset: {}", __func__, count, offset); auto append_flag = file->get_flag(OpenFile_flags::append); ssize_t ret = 0; long updated_size = 0; ret = rpc_send::update_metadentry_size(*path, count, offset, append_flag, updated_size); if (ret != 0) { - CTX->log()->error("{}() update_metadentry_size failed with ret {}", __func__, ret); + LOG(ERROR, "update_metadentry_size() failed with ret {}", ret); return ret; // ERR } ret = rpc_send::write(*path, buf, append_flag, offset, count, updated_size); if (ret < 0) { - CTX->log()->warn("{}() rpc_send::write failed with ret {}", __func__, ret); + LOG(WARNING, "rpc_send::write() failed with ret {}", ret); } return ret; // return written size or -1 as error } @@ -406,8 +401,6 @@ ssize_t adafs_write(int fd, const void * buf, size_t count) { } ssize_t adafs_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset) { - CTX->log()->trace("{}() called with fd {}, op num {}, offset {}", - __func__, fd, iovcnt, offset); auto file = CTX->file_map()->get(fd); auto pos = offset; // keep truck of current position @@ -438,8 +431,7 @@ ssize_t adafs_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset) } ssize_t adafs_writev(int fd, const struct iovec * iov, int iovcnt) { - CTX->log()->trace("{}() called with fd {}, ops num {}", - __func__, fd, iovcnt); + auto adafs_fd = CTX->file_map()->get(fd); auto pos = adafs_fd->pos(); // retrieve the current offset auto ret = adafs_pwritev(fd, iov, iovcnt, pos); @@ -454,18 +446,18 @@ ssize_t adafs_writev(int fd, const struct iovec * iov, int iovcnt) { ssize_t adafs_pread(std::shared_ptr file, char * buf, size_t count, off64_t offset) { if (file->type() != FileType::regular) { assert(file->type() == FileType::directory); - CTX->log()->warn("{}() cannot read from directory", __func__); + LOG(WARNING, "Cannot read from directory"); errno = EISDIR; return -1; } - CTX->log()->trace("{}() count: {}, offset: {}", __func__, count, offset); + // Zeroing buffer before read is only relevant for sparse files. Otherwise sparse regions contain invalid data. #if defined(ZERO_BUFFER_BEFORE_READ) memset(buf, 0, sizeof(char)*count); #endif auto ret = rpc_send::read(file->path(), buf, offset, count); if (ret < 0) { - CTX->log()->warn("{}() rpc_send::read failed with ret {}", __func__, ret); + LOG(WARNING, "rpc_send::read() failed with ret {}", ret); } // XXX check that we don't try to read past end of the file return ret; // return read size or -1 as error @@ -494,7 +486,7 @@ int adafs_opendir(const std::string& path) { return -1; } if (!S_ISDIR(md->mode())) { - CTX->log()->debug("{}() path is not a directory", __func__); + LOG(DEBUG, "Path is not a directory"); errno = ENOTDIR; return -1; } @@ -507,12 +499,12 @@ int adafs_opendir(const std::string& path) { int adafs_rmdir(const std::string& path) { auto md = adafs_metadata(path); if (!md) { - CTX->log()->debug("{}() path does not exists: '{}'", __func__, path); + LOG(DEBUG, "Path '{}' does not exist: ", path); errno = ENOENT; return -1; } if (!S_ISDIR(md->mode())) { - CTX->log()->debug("{}() path is not a directory", __func__); + LOG(DEBUG, "Path '{}' is not a directory", path); errno = ENOTDIR; return -1; } @@ -530,7 +522,7 @@ int adafs_rmdir(const std::string& path) { int getdents(unsigned int fd, struct linux_dirent *dirp, unsigned int count) { - CTX->log()->trace("{}() called on fd: {}, count {}", __func__, fd, count); + auto open_dir = CTX->file_map()->get_dir(fd); if(open_dir == nullptr){ //Cast did not succeeded: open_file is a regular file @@ -563,7 +555,7 @@ int getdents(unsigned int fd, *(reinterpret_cast(current_dirp) + total_size - 1) = ((de.type() == FileType::regular)? DT_REG : DT_DIR); - CTX->log()->trace("{}() name {}: {}", __func__, pos, de.name()); + LOG(DEBUG, "name {}: {}", pos, de.name()); std::strcpy(&(current_dirp->d_name[0]), de.name().c_str()); ++pos; current_dirp->d_off = pos; @@ -582,7 +574,7 @@ int getdents(unsigned int fd, int getdents64(unsigned int fd, struct linux_dirent64 *dirp, unsigned int count) { - CTX->log()->trace("{}() called on fd: {}, count {}", __func__, fd, count); + auto open_dir = CTX->file_map()->get_dir(fd); if(open_dir == nullptr){ //Cast did not succeeded: open_file is a regular file @@ -615,7 +607,7 @@ int getdents64(unsigned int fd, - CTX->log()->trace("{}() name {}: {}", __func__, pos, de.name()); + LOG(DEBUG, "name {}: {}", pos, de.name()); std::strcpy(&(current_dirp->d_name[0]), de.name().c_str()); ++pos; current_dirp->d_off = pos; @@ -645,7 +637,7 @@ int adafs_mk_symlink(const std::string& path, const std::string& target_path) { auto trg_mode = target_md->mode(); if (!(S_ISREG(trg_mode) || S_ISLNK(trg_mode))) { assert(S_ISDIR(trg_mode)); - CTX->log()->debug("{}() target path is a directory. Not supported", __func__); + LOG(DEBUG, "Target path is a directory. Not supported"); errno = ENOTSUP; return -1; } @@ -654,19 +646,19 @@ int adafs_mk_symlink(const std::string& path, const std::string& target_path) { auto p_comp = dirname(path); auto md = adafs_metadata(p_comp, false); if (md == nullptr) { - CTX->log()->debug("{}() parent component does not exist: '{}'", __func__, p_comp); + LOG(DEBUG, "Parent component does not exist: '{}'", p_comp); errno = ENOENT; return -1; } if (!S_ISDIR(md->mode())) { - CTX->log()->debug("{}() parent component is not a directory: '{}'", __func__, p_comp); + LOG(DEBUG, "Parent component is not a directory: '{}'", p_comp); errno = ENOTDIR; return -1; } auto link_md = adafs_metadata(path, false); if (link_md != nullptr) { - CTX->log()->debug("{}() Link exists: '{}'", __func__, p_comp); + LOG(DEBUG, "Link exists: '{}'", p_comp); errno = EEXIST; return -1; } @@ -678,17 +670,17 @@ int adafs_readlink(const std::string& path, char *buf, int bufsize) { init_ld_env_if_needed(); auto md = adafs_metadata(path, false); if (md == nullptr) { - CTX->log()->debug("{}() named link doesn't exists", __func__); + LOG(DEBUG, "Named link doesn't exist"); return -1; } if (!(md->is_link())) { - CTX->log()->debug("{}() The named file is not a symbolic link", __func__); + LOG(DEBUG, "The named file is not a symbolic link"); errno = EINVAL; return -1; } int path_size = md->target_path().size() + CTX->mountdir().size(); if (path_size >= bufsize) { - CTX->log()->warn("{}() destination buffer size is to short: {} < {}, {} ", __func__, bufsize, path_size, md->target_path()); + LOG(WARNING, "Destination buffer size is too short: {} < {}, {} ", bufsize, path_size, md->target_path()); errno = ENAMETOOLONG; return -1; } diff --git a/src/client/hooks.cpp b/src/client/hooks.cpp index 40f5d254f..2f30cb70f 100644 --- a/src/client/hooks.cpp +++ b/src/client/hooks.cpp @@ -13,6 +13,8 @@ #include "client/hooks.hpp" #include "client/preload.hpp" +#include "client/logging.hpp" + #include "client/adafs_functions.hpp" #include "client/resolve.hpp" #include "client/open_dir.hpp" @@ -30,8 +32,8 @@ static inline int with_errno(int ret) { int hook_openat(int dirfd, const char *cpath, int flags, mode_t mode) { - CTX->log()->trace("{}() called with fd: {}, path: {}, flags: {}, mode: {}", - __func__, dirfd, cpath, flags, mode); + LOG(DEBUG, "{}() called with fd: {}, path: \"{}\", flags: {}, mode: {}", + __func__, dirfd, cpath, flags, mode); std::string resolved; auto rstatus = CTX->relativize_fd_path(dirfd, cpath, resolved); @@ -49,13 +51,15 @@ int hook_openat(int dirfd, const char *cpath, int flags, mode_t mode) { return with_errno(adafs_open(resolved, mode, flags)); default: - CTX->log()->error("{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown: {}", __func__); return -EINVAL; } } int hook_close(int fd) { - CTX->log()->trace("{}() called with fd {}", __func__, fd); + + LOG(DEBUG, "{}() called with fd: {}", __func__, fd); + if(CTX->file_map()->exist(fd)) { // No call to the daemon is required CTX->file_map()->remove(fd); @@ -72,7 +76,10 @@ int hook_close(int fd) { } int hook_stat(const char* path, struct stat* buf) { - CTX->log()->trace("{}() called with path '{}'", __func__, path); + + LOG(DEBUG, "{}() called with path: \"{}\", buf: {}", + __func__, path, fmt::ptr(buf)); + std::string rel_path; if (CTX->relativize_path(path, rel_path, false)) { return with_errno(adafs_stat(rel_path, buf)); @@ -81,7 +88,10 @@ int hook_stat(const char* path, struct stat* buf) { } int hook_lstat(const char* path, struct stat* buf) { - CTX->log()->trace("{}() called with path '{}'", __func__, path); + + LOG(DEBUG, "{}() called with path: \"{}\", buf: {}", + __func__, path, fmt::ptr(buf)); + std::string rel_path; if (CTX->relativize_path(path, rel_path)) { return with_errno(adafs_stat(rel_path, buf)); @@ -90,7 +100,10 @@ int hook_lstat(const char* path, struct stat* buf) { } int hook_fstat(unsigned int fd, struct stat* buf) { - CTX->log()->trace("{}() called with fd '{}'", __func__, fd); + + LOG(DEBUG, "{}() called with fd: {}, buf: {}", + __func__, fd, fmt::ptr(buf)); + if (CTX->file_map()->exist(fd)) { auto path = CTX->file_map()->get(fd)->path(); return with_errno(adafs_stat(path, buf)); @@ -99,10 +112,12 @@ int hook_fstat(unsigned int fd, struct stat* buf) { } int hook_fstatat(int dirfd, const char * cpath, struct stat * buf, int flags) { - CTX->log()->trace("{}() called with path '{}' and fd {}", __func__, cpath, dirfd); + + LOG(DEBUG, "{}() called with path: \"{}\", fd: {}, buf: {}, flags: {}", + __func__, cpath, dirfd, fmt::ptr(buf), flags); if(flags & AT_EMPTY_PATH) { - CTX->log()->error("{}() AT_EMPTY_PATH flag not supported", __func__); + LOG(ERROR, "{}() AT_EMPTY_PATH flag not supported", __func__); return -ENOTSUP; } @@ -122,13 +137,16 @@ int hook_fstatat(int dirfd, const char * cpath, struct stat * buf, int flags) { return with_errno(adafs_stat(resolved, buf)); default: - CTX->log()->error("{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown: {}", __func__); return -EINVAL; } } int hook_read(unsigned int fd, void* buf, size_t count) { - CTX->log()->trace("{}() called with fd {}, count {}", __func__, fd, count); + + LOG(DEBUG, "{}() called with fd: {}, buf: {} count: {}", + __func__, fd, fmt::ptr(buf), count); + if (CTX->file_map()->exist(fd)) { return with_errno(adafs_read(fd, buf, count)); } @@ -136,8 +154,10 @@ int hook_read(unsigned int fd, void* buf, size_t count) { } int hook_pread(unsigned int fd, char * buf, size_t count, loff_t pos) { - CTX->log()->trace("{}() called with fd {}, count {}, pos {}", - __func__, fd, count, pos); + + LOG(DEBUG, "{}() called with fd: {}, buf: {}, count: {}, pos: {}", + __func__, fd, fmt::ptr(buf), count, pos); + if (CTX->file_map()->exist(fd)) { return with_errno(adafs_pread_ws(fd, buf, count, pos)); } @@ -146,7 +166,10 @@ int hook_pread(unsigned int fd, char * buf, size_t count, loff_t pos) { } int hook_write(unsigned int fd, const char * buf, size_t count) { - CTX->log()->trace("{}() called with fd {}, count {}", __func__, fd, count); + + LOG(DEBUG, "{}() called with fd: {}, buf: {}, count {}", + __func__, fd, fmt::ptr(buf), count); + if (CTX->file_map()->exist(fd)) { return with_errno(adafs_write(fd, buf, count)); } @@ -154,8 +177,10 @@ int hook_write(unsigned int fd, const char * buf, size_t count) { } int hook_pwrite(unsigned int fd, const char * buf, size_t count, loff_t pos) { - CTX->log()->trace("{}() called with fd {}, count {}, pos {}", - __func__, fd, count, pos); + + LOG(DEBUG, "{}() called with fd: {}, buf: {}, count: {}, pos: {}", + __func__, fd, fmt::ptr(buf), count, pos); + if (CTX->file_map()->exist(fd)) { return with_errno(adafs_pwrite_ws(fd, buf, count, pos)); } @@ -164,7 +189,10 @@ int hook_pwrite(unsigned int fd, const char * buf, size_t count, loff_t pos) { } int hook_writev(unsigned long fd, const struct iovec * iov, unsigned long iovcnt) { - CTX->log()->trace("{}() called with fd {}, ops_num {}", __func__, fd, iovcnt); + + LOG(DEBUG, "{}() called with fd: {}, iov: {}, iovcnt: {}", + __func__, fd, fmt::ptr(iov), iovcnt); + if (CTX->file_map()->exist(fd)) { return with_errno(adafs_writev(fd, iov, iovcnt)); } @@ -173,21 +201,25 @@ int hook_writev(unsigned long fd, const struct iovec * iov, unsigned long iovcnt int hook_pwritev(unsigned long fd, const struct iovec * iov, unsigned long iovcnt, unsigned long pos_l, unsigned long pos_h) { - CTX->log()->trace("{}() called with fd {}, ops_num {}, low position {}," - "high postion {}", __func__, fd, iovcnt, pos_l, pos_h); + + LOG(DEBUG, "{}() called with fd: {}, iov: {}, iovcnt: {}, " + "pos_l: {}," "pos_h: {}", + __func__, fd, fmt::ptr(iov), iovcnt, pos_l, pos_h); + if (CTX->file_map()->exist(fd)) { - CTX->log()->warn("{}() Not supported", __func__); + LOG(WARNING, "{}() Not supported", __func__); return -ENOTSUP; } return syscall_no_intercept(SYS_pwritev, fd, iov, iovcnt); } int hook_unlinkat(int dirfd, const char * cpath, int flags) { - CTX->log()->trace("{}() called with path '{}' dirfd {}, flags {}", - __func__, cpath, dirfd, flags); + + LOG(DEBUG, "{}() called with dirfd: {}, path: \"{}\", flags: {}", + __func__, dirfd, cpath, flags); if ((flags & ~AT_REMOVEDIR) != 0) { - CTX->log()->error("{}() Flags unknown: {}", __func__, flags); + LOG(ERROR, "{}() Flags unknown: {}", __func__, flags); return -EINVAL; } @@ -211,18 +243,19 @@ int hook_unlinkat(int dirfd, const char * cpath, int flags) { } default: - CTX->log()->error("{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown: {}", __func__); return -EINVAL; } } int hook_symlinkat(const char * oldname, int newdfd, const char * newname) { - CTX->log()->trace("{}() called with oldname '{}', new fd {}, new name '{}'", - __func__, oldname, newdfd, newname); + + LOG(DEBUG, "{}() called with oldname: \"{}\", newfd: {}, newname: \"{}\"", + __func__, oldname, newdfd, newname); std::string oldname_resolved; if (CTX->relativize_path(oldname, oldname_resolved)) { - CTX->log()->warn("{}() operation not supported", __func__); + LOG(WARNING, "{}() operation not supported", __func__); return -ENOTSUP; } @@ -239,18 +272,21 @@ int hook_symlinkat(const char * oldname, int newdfd, const char * newname) { return -ENOTDIR; case RelativizeStatus::internal: - CTX->log()->warn("{}() operation not supported", __func__); + LOG(WARNING, "{}() operation not supported", __func__); return -ENOTSUP; default: - CTX->log()->error("{}() relativize status unknown", __func__); + LOG(ERROR, "{}() relativize status unknown", __func__); return -EINVAL; } } int hook_access(const char* path, int mask) { - CTX->log()->trace("{}() called path '{}', mask {}", __func__, path, mask); + + LOG(DEBUG, "{}() called path: \"{}\", mask: {}", + __func__, path, mask); + std::string rel_path; if (CTX->relativize_path(path, rel_path)) { auto ret = adafs_access(rel_path, mask); @@ -263,8 +299,9 @@ int hook_access(const char* path, int mask) { } int hook_faccessat(int dirfd, const char * cpath, int mode) { - CTX->log()->trace("{}() called with path '{}' dirfd {}, mode {}", - __func__, cpath, dirfd, mode); + + LOG(DEBUG, "{}() called with dirfd: {}, path: \"{}\", mode: {}", + __func__, dirfd, cpath, mode); std::string resolved; auto rstatus = CTX->relativize_fd_path(dirfd, cpath, resolved); @@ -282,13 +319,16 @@ int hook_faccessat(int dirfd, const char * cpath, int mode) { return with_errno(adafs_access(resolved, mode)); default: - CTX->log()->error("{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown: {}", __func__); return -EINVAL; } } off_t hook_lseek(unsigned int fd, off_t offset, unsigned int whence) { - CTX->log()->trace("{}() called with fd {}, offset {}, whence {}", __func__, fd, offset, whence); + + LOG(DEBUG, "{}() called with fd: {}, offset: {}, whence: {}", + __func__, fd, offset, whence); + if (CTX->file_map()->exist(fd)) { auto off_ret = adafs_lseek(fd, static_cast(offset), whence); if (off_ret > std::numeric_limits::max()) { @@ -296,14 +336,17 @@ off_t hook_lseek(unsigned int fd, off_t offset, unsigned int whence) { } else if(off_ret < 0) { return -errno; } - CTX->log()->trace("{}() returning {}", __func__, off_ret); + LOG(DEBUG, "{}() returning {}", __func__, off_ret); return off_ret; } return syscall_no_intercept(SYS_lseek, fd, offset, whence); } int hook_truncate(const char* path, long length) { - CTX->log()->trace("{}() called with path: {}, offset: {}", __func__, path, length); + + LOG(DEBUG, "{}() called with path: {}, offset: {}", + __func__, path, length); + std::string rel_path; if (CTX->relativize_path(path, rel_path)) { return with_errno(adafs_truncate(rel_path, length)); @@ -312,7 +355,10 @@ int hook_truncate(const char* path, long length) { } int hook_ftruncate(unsigned int fd, unsigned long length) { - CTX->log()->trace("{}() called [fd: {}, offset: {}]", __func__, fd, length); + + LOG(DEBUG, "{}() called with fd: {}, offset: {}", + __func__, fd, length); + if (CTX->file_map()->exist(fd)) { auto path = CTX->file_map()->get(fd)->path(); return with_errno(adafs_truncate(path, length)); @@ -321,7 +367,10 @@ int hook_ftruncate(unsigned int fd, unsigned long length) { } int hook_dup(unsigned int fd) { - CTX->log()->trace("{}() called with oldfd {}", __func__, fd); + + LOG(DEBUG, "{}() called with oldfd: {}", + __func__, fd); + if (CTX->file_map()->exist(fd)) { return with_errno(adafs_dup(fd)); } @@ -329,7 +378,10 @@ int hook_dup(unsigned int fd) { } int hook_dup2(unsigned int oldfd, unsigned int newfd) { - CTX->log()->trace("{}() called with fd {} newfd {}", __func__, oldfd, newfd); + + LOG(DEBUG, "{}() called with oldfd: {}, newfd: {}", + __func__, oldfd, newfd); + if (CTX->file_map()->exist(oldfd)) { return with_errno(adafs_dup2(oldfd, newfd)); } @@ -337,17 +389,24 @@ int hook_dup2(unsigned int oldfd, unsigned int newfd) { } int hook_dup3(unsigned int oldfd, unsigned int newfd, int flags) { + + LOG(DEBUG, "{}() called with oldfd: {}, newfd: {}, flags: {}", + __func__, oldfd, newfd, flags); + if (CTX->file_map()->exist(oldfd)) { // TODO implement O_CLOEXEC flag first which is used with fcntl(2) // It is in glibc since kernel 2.9. So maybe not that important :) - CTX->log()->warn("{}() Not supported", __func__); + LOG(WARNING, "{}() Not supported", __func__); return -ENOTSUP; } return syscall_no_intercept(SYS_dup3, oldfd, newfd, flags); } int hook_getdents(unsigned int fd, struct linux_dirent *dirp, unsigned int count) { - CTX->log()->trace("{}() called with fd {}, count {}", __func__, fd, count); + + LOG(DEBUG, "{}() called with fd: {}, dirp: {}, count: {}", + __func__, fd, fmt::ptr(dirp), count); + if (CTX->file_map()->exist(fd)) { return with_errno(getdents(fd, dirp, count)); } @@ -356,7 +415,10 @@ int hook_getdents(unsigned int fd, struct linux_dirent *dirp, unsigned int count int hook_getdents64(unsigned int fd, struct linux_dirent64 *dirp, unsigned int count) { - CTX->log()->trace("{}() called with fd {}, count {}", __func__, fd, count); + + LOG(DEBUG, "{}() called with fd: {}, dirp: {}, count: {}", + __func__, fd, fmt::ptr(dirp), count); + if (CTX->file_map()->exist(fd)) { return with_errno(getdents64(fd, dirp, count)); } @@ -365,8 +427,9 @@ int hook_getdents64(unsigned int fd, struct linux_dirent64 *dirp, unsigned int c int hook_mkdirat(int dirfd, const char * cpath, mode_t mode) { - CTX->log()->trace("{}() called with fd: {}, path: {}, mode: {}", - __func__, dirfd, cpath, mode); + + LOG(DEBUG, "{}() called with dirfd: {}, path: \"{}\", mode: {}", + __func__, dirfd, cpath, mode); std::string resolved; auto rstatus = CTX->relativize_fd_path(dirfd, cpath, resolved); @@ -384,13 +447,15 @@ int hook_mkdirat(int dirfd, const char * cpath, mode_t mode) { return with_errno(adafs_mk_node(resolved, mode | S_IFDIR)); default: - CTX->log()->error("{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown: {}", __func__); return -EINVAL; } } int hook_fchmodat(int dirfd, const char * cpath, mode_t mode) { - CTX->log()->trace("{}() called dirfd {}, path '{}', mode {}", __func__, dirfd, cpath, mode); + + LOG(DEBUG, "{}() called dirfd: {}, path: \"{}\", mode: {}", + __func__, dirfd, cpath, mode); std::string resolved; auto rstatus = CTX->relativize_fd_path(dirfd, cpath, resolved); @@ -405,37 +470,43 @@ int hook_fchmodat(int dirfd, const char * cpath, mode_t mode) { return -ENOTDIR; case RelativizeStatus::internal: - CTX->log()->warn("{}() operation not supported", __func__); + LOG(WARNING, "{}() operation not supported", __func__); return -ENOTSUP; default: - CTX->log()->error("{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown: {}", __func__); return -EINVAL; } } int hook_fchmod(unsigned int fd, mode_t mode) { - CTX->log()->trace("{}() called with fd {}, mode {}", __func__, fd, mode); + + LOG(DEBUG, "{}() called with fd: {}, mode: {}", + __func__, fd, mode); + if (CTX->file_map()->exist(fd)) { - CTX->log()->warn("{}() operation not supported", __func__); + LOG(WARNING, "{}() operation not supported", __func__); return -ENOTSUP; } return syscall_no_intercept(SYS_fchmod, fd, mode); } int hook_chdir(const char * path) { - CTX->log()->trace("{}() called with path '{}'", __func__, path); + + LOG(DEBUG, "{}() called with path: \"{}\"", + __func__, path); + std::string rel_path; bool internal = CTX->relativize_path(path, rel_path); if (internal) { //path falls in our namespace auto md = adafs_metadata(rel_path); if (md == nullptr) { - CTX->log()->error("{}() path does not exists", __func__); + LOG(ERROR, "{}() path does not exists", __func__); return -ENOENT; } if(!S_ISDIR(md->mode())) { - CTX->log()->error("{}() path is not a directory", __func__); + LOG(ERROR, "{}() path is not a directory", __func__); return -ENOTDIR; } //TODO get complete path from relativize_path instead of @@ -455,12 +526,15 @@ int hook_chdir(const char * path) { } int hook_fchdir(unsigned int fd) { - CTX->log()->trace("{}() called with fd {}", __func__, fd); + + LOG(DEBUG, "{}() called with fd: {}", + __func__, fd); + if (CTX->file_map()->exist(fd)) { auto open_dir = CTX->file_map()->get_dir(fd); if (open_dir == nullptr) { //Cast did not succeeded: open_file is a regular file - CTX->log()->error("{}() file descriptor refers to a normal file: '{}'", + LOG(ERROR, "{}() file descriptor refers to a normal file: '{}'", __func__, open_dir->path()); return -EBADF; } @@ -489,9 +563,12 @@ int hook_fchdir(unsigned int fd) { } int hook_getcwd(char * buf, unsigned long size) { - CTX->log()->trace("{}() called with size {}", __func__, size); + + LOG(DEBUG, "{}() called with buf: {}, size: {}", + __func__, fmt::ptr(buf), size); + if(CTX->cwd().size() + 1 > size) { - CTX->log()->error("{}() buffer too small to host current working dir", __func__); + LOG(ERROR, "{}() buffer too small to host current working dir", __func__); return -ERANGE; } @@ -500,8 +577,9 @@ int hook_getcwd(char * buf, unsigned long size) { } int hook_readlinkat(int dirfd, const char * cpath, char * buf, int bufsiz) { - CTX->log()->trace("{}() called with path '{}' dirfd {}, bufsize {}", - __func__, cpath, dirfd, bufsiz); + + LOG(DEBUG, "{}() called with dirfd: {}, path \"{}\", buf: {}, bufsize: {}", + __func__, dirfd, cpath, fmt::ptr(buf), bufsiz); std::string resolved; auto rstatus = CTX->relativize_fd_path(dirfd, cpath, resolved, false); @@ -516,17 +594,20 @@ int hook_readlinkat(int dirfd, const char * cpath, char * buf, int bufsiz) { return -ENOTDIR; case RelativizeStatus::internal: - CTX->log()->warn("{}() not supported", __func__); + LOG(WARNING, "{}() not supported", __func__); return -ENOTSUP; default: - CTX->log()->error("{}() relativize status unknown: {}", __func__); + LOG(ERROR, "{}() relativize status unknown: {}", __func__); return -EINVAL; } } int hook_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) { - CTX->log()->trace("{}() called with fd {}, cmd {}, arg {}", __func__, fd, cmd, arg); + + LOG(DEBUG, "{}() called with fd: {}, cmd: {}, arg: {}", + __func__, fd, cmd, arg); + if (!CTX->file_map()->exist(fd)) { return syscall_no_intercept(SYS_fcntl, fd, cmd, arg); } @@ -534,11 +615,11 @@ int hook_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) { switch (cmd) { case F_DUPFD: - CTX->log()->trace("{}() F_DUPFD on fd {}", __func__, fd); + LOG(DEBUG, "{}() F_DUPFD on fd {}", __func__, fd); return with_errno(adafs_dup(fd)); case F_DUPFD_CLOEXEC: - CTX->log()->trace("{}() F_DUPFD_CLOEXEC on fd {}", __func__, fd); + LOG(DEBUG, "{}() F_DUPFD_CLOEXEC on fd {}", __func__, fd); ret = adafs_dup(fd); if(ret == -1) { return -errno; @@ -547,7 +628,7 @@ int hook_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) { return ret; case F_GETFD: - CTX->log()->trace("{}() F_GETFD on fd {}", __func__, fd); + LOG(DEBUG, "{}() F_GETFD on fd {}", __func__, fd); if(CTX->file_map()->get(fd) ->get_flag(OpenFile_flags::cloexec)) { return FD_CLOEXEC; @@ -555,7 +636,7 @@ int hook_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) { return 0; case F_GETFL: - CTX->log()->trace("{}() F_GETFL on fd {}", __func__, fd); + LOG(DEBUG, "{}() F_GETFL on fd {}", __func__, fd); ret = 0; if(CTX->file_map()->get(fd) ->get_flag(OpenFile_flags::rdonly)) { @@ -572,7 +653,7 @@ int hook_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) { return ret; case F_SETFD: - CTX->log()->trace("{}() [fd: {}, cmd: F_SETFD, FD_CLOEXEC: {}]", + LOG(DEBUG, "{}() [fd: {}, cmd: F_SETFD, FD_CLOEXEC: {}]", __func__, fd, (arg & FD_CLOEXEC)); CTX->file_map()->get(fd) ->set_flag(OpenFile_flags::cloexec, (arg & FD_CLOEXEC)); @@ -580,7 +661,7 @@ int hook_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) { default: - CTX->log()->error("{}() unrecognized command {} on fd {}", + LOG(ERROR, "{}() unrecognized command {} on fd {}", __func__, cmd, fd); return -ENOTSUP; } @@ -590,8 +671,9 @@ int hook_renameat(int olddfd, const char * oldname, int newdfd, const char * newname, unsigned int flags) { - CTX->log()->trace("{}() called with olddfd {}, oldname: '{}', newfd {}, newname '{}', flags {}", - __func__, olddfd, oldname, newdfd, newname, flags); + LOG(DEBUG, "{}() called with olddfd: {}, oldname: \"{}\", newfd: {}, " + "newname \"{}\", flags {}", + __func__, olddfd, oldname, newdfd, newname, flags); const char * oldpath_pass; std::string oldpath_resolved; @@ -609,11 +691,11 @@ int hook_renameat(int olddfd, const char * oldname, return -ENOTDIR; case RelativizeStatus::internal: - CTX->log()->warn("{}() not supported", __func__); + LOG(WARNING, "{}() not supported", __func__); return -ENOTSUP; default: - CTX->log()->error("{}() relativize status unknown", __func__); + LOG(ERROR, "{}() relativize status unknown", __func__); return -EINVAL; } @@ -633,11 +715,11 @@ int hook_renameat(int olddfd, const char * oldname, return -ENOTDIR; case RelativizeStatus::internal: - CTX->log()->warn("{}() not supported", __func__); + LOG(WARNING, "{}() not supported", __func__); return -ENOTSUP; default: - CTX->log()->error("{}() relativize status unknown", __func__); + LOG(ERROR, "{}() relativize status unknown", __func__); return -EINVAL; } @@ -645,7 +727,10 @@ int hook_renameat(int olddfd, const char * oldname, } int hook_statfs(const char * path, struct statfs * buf) { - CTX->log()->trace("{}() called with path: {}", __func__, path); + + LOG(DEBUG, "{}() called with path: \"{}\", buf: {}", + __func__, path, fmt::ptr(buf)); + std::string rel_path; if (CTX->relativize_path(path, rel_path)) { return with_errno(adafs_statfs(buf)); @@ -654,7 +739,10 @@ int hook_statfs(const char * path, struct statfs * buf) { } int hook_fstatfs(unsigned int fd, struct statfs * buf) { - CTX->log()->trace("{}() called with fs: {}", __func__, fd); + + LOG(DEBUG, "{}() called with fd: {}, buf: {}", + __func__, fd, fmt::ptr(buf)); + if (CTX->file_map()->exist(fd)) { return with_errno(adafs_statfs(buf)); } diff --git a/src/client/intercept.cpp b/src/client/intercept.cpp index 530553b98..3a620cace 100644 --- a/src/client/intercept.cpp +++ b/src/client/intercept.cpp @@ -15,45 +15,35 @@ #include "client/preload.hpp" #include "client/hooks.hpp" -#ifndef NDEBUG -#include "client/syscall_names.hpp" -#endif +#include #include #include #include +#include +#include -#define NOT_HOOKED 1 -#define HOOKED 0 - -#if 0 -static void -log_write(const char *fmt, ...) -{ - int log_fd = 2; - if (log_fd < 0) - return; - - char buf[0x1000]; - int len; - va_list ap; +#include - va_start(ap, fmt); - len = vsnprintf(buf, sizeof(buf) - 1, fmt, ap); - va_end(ap); +static thread_local bool reentrance_guard_flag; +static thread_local gkfs::syscall::info saved_syscall_info; - if (len < 1) - return; - - buf[len++] = '\n'; +static constexpr void +save_current_syscall_info(gkfs::syscall::info info) { + saved_syscall_info = info; +} - syscall_no_intercept(SYS_write, log_fd, buf, len); +static constexpr void +reset_current_syscall_info() { + saved_syscall_info = gkfs::syscall::no_info; } -#endif -static __thread bool reentrance_guard_flag; +static inline gkfs::syscall::info +get_current_syscall_info() { + return saved_syscall_info; +} /* @@ -68,10 +58,18 @@ static __thread bool reentrance_guard_flag; */ static inline int hook_internal(long syscall_number, - long arg0, long arg1, long arg2, - long arg3, long arg4, long arg5, - long *result) -{ + long arg0, long arg1, long arg2, + long arg3, long arg4, long arg5, + long *result) { + +#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) + const long args[gkfs::syscall::MAX_ARGS] = { + arg0, arg1, arg2, arg3, arg4, arg5 + }; +#endif + + LOG(SYSCALL, gkfs::syscall::from_internal_code | gkfs::syscall::to_hook | + gkfs::syscall::not_executed, syscall_number, args); switch (syscall_number) { @@ -81,9 +79,10 @@ hook_internal(long syscall_number, static_cast(arg1), static_cast(arg2)); - if(*result != -1) { - CTX->register_internal_fd(*result); + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); } + break; case SYS_creat: @@ -92,9 +91,10 @@ hook_internal(long syscall_number, O_WRONLY | O_CREAT | O_TRUNC, static_cast(arg1)); - if(*result != -1) { - CTX->register_internal_fd(*result); + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); } + break; case SYS_openat: @@ -104,29 +104,40 @@ hook_internal(long syscall_number, static_cast(arg2), static_cast(arg3)); - if(*result != -1) { - CTX->register_internal_fd(*result); + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); } + break; - // epoll_create and epoll_create1 have the same prototype case SYS_epoll_create: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + + break; + case SYS_epoll_create1: *result = syscall_no_intercept(syscall_number, static_cast(arg0)); - if(*result != -1) { - CTX->register_internal_fd(*result); + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); } + break; case SYS_dup: *result = syscall_no_intercept(syscall_number, static_cast(arg0)); - if(*result != -1) { - CTX->register_internal_fd(*result); + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); } + break; case SYS_dup2: @@ -134,9 +145,10 @@ hook_internal(long syscall_number, static_cast(arg0), static_cast(arg1)); - if(*result != -1) { - CTX->register_internal_fd(*result); + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); } + break; case SYS_dup3: @@ -145,26 +157,29 @@ hook_internal(long syscall_number, static_cast(arg1), static_cast(arg2)); - if(*result != -1) { - CTX->register_internal_fd(*result); + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); } + break; case SYS_inotify_init: *result = syscall_no_intercept(syscall_number); - if(*result != -1) { - CTX->register_internal_fd(*result); + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); } + break; case SYS_inotify_init1: *result = syscall_no_intercept(syscall_number, static_cast(arg0)); - if(*result != -1) { - CTX->register_internal_fd(*result); + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); } + break; case SYS_perf_event_open: @@ -175,8 +190,8 @@ hook_internal(long syscall_number, static_cast(arg3), static_cast(arg4)); - if(*result != -1) { - CTX->register_internal_fd(*result); + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); } break; @@ -185,8 +200,8 @@ hook_internal(long syscall_number, static_cast(arg0), reinterpret_cast(arg1)); - if(*result != -1) { - CTX->register_internal_fd(*result); + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); } break; @@ -196,8 +211,8 @@ hook_internal(long syscall_number, reinterpret_cast(arg1), static_cast(arg2)); - if(*result != -1) { - CTX->register_internal_fd(*result); + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); } break; @@ -206,41 +221,104 @@ hook_internal(long syscall_number, static_cast(arg0), static_cast(arg1)); - if(*result != -1) { - CTX->register_internal_fd(*result); + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); } break; + + case SYS_socket: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + static_cast(arg1), + static_cast(arg2)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + break; + + case SYS_socketpair: + + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + static_cast(arg1), + static_cast(arg2), + reinterpret_cast(arg3)); + + if(*result >= 0) { + reinterpret_cast(arg3)[0] = + CTX->register_internal_fd(reinterpret_cast(arg3)[0]); + reinterpret_cast(arg3)[1] = + CTX->register_internal_fd(reinterpret_cast(arg3)[1]); + } + + break; + + case SYS_pipe: + *result = syscall_no_intercept(syscall_number, + reinterpret_cast(arg0)); + + if(*result >= 0) { + reinterpret_cast(arg0)[0] = + CTX->register_internal_fd(reinterpret_cast(arg0)[0]); + reinterpret_cast(arg0)[1] = + CTX->register_internal_fd(reinterpret_cast(arg0)[1]); + } + + break; + + case SYS_pipe2: + + *result = syscall_no_intercept(syscall_number, + reinterpret_cast(arg0), + static_cast(arg1)); + if(*result >= 0) { + reinterpret_cast(arg0)[0] = + CTX->register_internal_fd(reinterpret_cast(arg0)[0]); + reinterpret_cast(arg0)[1] = + CTX->register_internal_fd(reinterpret_cast(arg0)[1]); + } + + break; + + case SYS_accept: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + reinterpret_cast(arg1), + reinterpret_cast(arg2)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + break; + + case SYS_close: *result = syscall_no_intercept(syscall_number, static_cast(arg0)); - CTX->unregister_internal_fd(*result); + + if(*result == 0) { + CTX->unregister_internal_fd(arg0); + } break; default: - /* - * Ignore any other syscalls - * i.e.: pass them on to the kernel - * as would normally happen. - */ - - #ifndef NDEBUG - CTX->log()->trace("Syscall [{}, {}] Passthrough", - syscall_names[syscall_number], syscall_number); - #endif - return NOT_HOOKED; + // ignore any other syscalls, i.e.: pass them on to the kernel + // (syscalls forwarded to the kernel that return are logged in + // hook_forwarded_syscall()) + ::save_current_syscall_info( + gkfs::syscall::from_internal_code | + gkfs::syscall::to_kernel | + gkfs::syscall::not_executed); + return gkfs::syscall::forward_to_kernel; } - #ifndef NDEBUG - CTX->log()->trace("Syscall [{}, {}] Intercepted", - syscall_names[syscall_number], syscall_number); - #endif - -#if 0 - log_write("Internal syscall [%s, %d] = %d", syscall_names[syscall_number]);//, syscall_number, *result); -#endif + LOG(SYSCALL, gkfs::syscall::from_internal_code | + gkfs::syscall::to_hook | gkfs::syscall::executed, + syscall_number, args, *result); - return HOOKED; + return gkfs::syscall::hooked; } @@ -253,11 +331,36 @@ static inline int hook(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5, - long *result) -{ + long *result) { + +#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) + const long args[gkfs::syscall::MAX_ARGS] = { + arg0, arg1, arg2, arg3, arg4, arg5 + }; +#endif + + LOG(SYSCALL, gkfs::syscall::from_external_code | + gkfs::syscall::to_hook | gkfs::syscall::not_executed, + syscall_number, args); switch (syscall_number) { + case SYS_execve: + *result = syscall_no_intercept(syscall_number, + reinterpret_cast(arg0), + reinterpret_cast(arg1), + reinterpret_cast(arg2)); + break; + + case SYS_execveat: + *result = syscall_no_intercept(syscall_number, + arg0, + reinterpret_cast(arg1), + reinterpret_cast(arg2), + reinterpret_cast(arg3), + arg4); + break; + case SYS_open: *result = hook_openat(AT_FDCWD, reinterpret_cast(arg0), @@ -290,7 +393,7 @@ int hook(long syscall_number, case SYS_lstat: *result = hook_lstat(reinterpret_cast(arg0), - reinterpret_cast(arg1)); + reinterpret_cast(arg1)); break; case SYS_fstat: @@ -430,8 +533,8 @@ int hook(long syscall_number, case SYS_mkdirat: *result = hook_mkdirat(static_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2)); + reinterpret_cast(arg1), + static_cast(arg2)); break; case SYS_mkdir: @@ -467,27 +570,27 @@ int hook(long syscall_number, case SYS_getcwd: *result = hook_getcwd(reinterpret_cast(arg0), - static_cast(arg1)); + static_cast(arg1)); break; case SYS_readlink: *result = hook_readlinkat(AT_FDCWD, - reinterpret_cast(arg0), - reinterpret_cast(arg1), - static_cast(arg2)); + reinterpret_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); break; case SYS_readlinkat: *result = hook_readlinkat(static_cast(arg0), - reinterpret_cast(arg1), - reinterpret_cast(arg2), - static_cast(arg3)); + reinterpret_cast(arg1), + reinterpret_cast(arg2), + static_cast(arg3)); break; case SYS_fcntl: *result = hook_fcntl(static_cast(arg0), - static_cast(arg1), - static_cast(arg2)); + static_cast(arg1), + static_cast(arg2)); break; case SYS_rename: @@ -525,22 +628,123 @@ int hook(long syscall_number, break; default: - /* - * Ignore any other syscalls - * i.e.: pass them on to the kernel - * as would normally happen. - */ - - #ifndef NDEBUG - CTX->log()->trace("Syscall [{}, {}] Passthrough", syscall_names[syscall_number], syscall_number); - #endif - return NOT_HOOKED; + // ignore any other syscalls, i.e.: pass them on to the kernel + // (syscalls forwarded to the kernel that return are logged in + // hook_forwarded_syscall()) + ::save_current_syscall_info( + gkfs::syscall::from_external_code | + gkfs::syscall::to_kernel | + gkfs::syscall::not_executed); + return gkfs::syscall::forward_to_kernel; } - #ifndef NDEBUG - CTX->log()->trace("Syscall [{}, {}] Intercepted", syscall_names[syscall_number], syscall_number); - #endif - return HOOKED; + LOG(SYSCALL, gkfs::syscall::from_external_code | + gkfs::syscall::to_hook | gkfs::syscall::executed, + syscall_number, args, *result); + + return gkfs::syscall::hooked; +} + +static void +hook_forwarded_syscall(long syscall_number, + long arg0, long arg1, long arg2, + long arg3, long arg4, long arg5, + long result) +{ + + if(::get_current_syscall_info() == gkfs::syscall::no_info) { + return; + } + +#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) + const long args[gkfs::syscall::MAX_ARGS] = { + arg0, arg1, arg2, arg3, arg4, arg5 + }; +#endif + + LOG(SYSCALL, + ::get_current_syscall_info() | + gkfs::syscall::executed, + syscall_number, args, result); + + ::reset_current_syscall_info(); +} + +static void +hook_clone_at_child(unsigned long flags, + void* child_stack, + int* ptid, + int* ctid, + long newtls) { + +#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) + const long args[gkfs::syscall::MAX_ARGS] = { + static_cast(flags), + reinterpret_cast(child_stack), + reinterpret_cast(ptid), + reinterpret_cast(ctid), + static_cast(newtls), + 0}; +#endif + + LOG(SYSCALL, + ::get_current_syscall_info() | + gkfs::syscall::executed, + SYS_clone, args, 0); +} + +static void +hook_clone_at_parent(unsigned long flags, + void* child_stack, + int* ptid, + int* ctid, + long newtls, + long returned_pid) { + +#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) + const long args[gkfs::syscall::MAX_ARGS] = { + static_cast(flags), + reinterpret_cast(child_stack), + reinterpret_cast(ptid), + reinterpret_cast(ctid), + static_cast(newtls), + 0}; +#endif + + LOG(SYSCALL, + ::get_current_syscall_info() | + gkfs::syscall::executed, + SYS_clone, args, returned_pid); +} + + +int +internal_hook_guard_wrapper(long syscall_number, + long arg0, long arg1, long arg2, + long arg3, long arg4, long arg5, + long *syscall_return_value) { + assert(CTX->interception_enabled()); + + + if (reentrance_guard_flag) { + ::save_current_syscall_info( + gkfs::syscall::from_internal_code | + gkfs::syscall::to_kernel | + gkfs::syscall::not_executed); + return gkfs::syscall::forward_to_kernel; + } + + int was_hooked = 0; + + reentrance_guard_flag = true; + int oerrno = errno; + was_hooked = hook_internal(syscall_number, + arg0, arg1, arg2, arg3, arg4, arg5, + syscall_return_value); + errno = oerrno; + reentrance_guard_flag = false; + + return was_hooked; } @@ -562,50 +766,63 @@ int hook_guard_wrapper(long syscall_number, long arg0, long arg1, long arg2, long arg3, long arg4, long arg5, - long *syscall_return_value) -{ - assert(CTX->interception_enabled()); + long *syscall_return_value) { -#if 0 - log_write("syscall %s called from %s", - syscall_names[syscall_number], - reentrance_guard_flag ? "gkfs" : "client"); -#endif + assert(CTX->interception_enabled()); - int is_hooked; + int was_hooked = 0; if (reentrance_guard_flag) { int oerrno = errno; - is_hooked = hook_internal(syscall_number, - arg0, arg1, arg2, arg3, arg4, arg5, - syscall_return_value); + was_hooked = hook_internal(syscall_number, + arg0, arg1, arg2, arg3, arg4, arg5, + syscall_return_value); errno = oerrno; - return is_hooked; + return was_hooked; } reentrance_guard_flag = true; int oerrno = errno; - is_hooked = hook(syscall_number, - arg0, arg1, arg2, arg3, arg4, arg5, - syscall_return_value); + was_hooked = hook(syscall_number, + arg0, arg1, arg2, arg3, arg4, arg5, + syscall_return_value); errno = oerrno; reentrance_guard_flag = false; - return is_hooked; + return was_hooked; } +void start_self_interception() { + + LOG(DEBUG, "Enabling syscall interception for self"); + + intercept_hook_point = internal_hook_guard_wrapper; + intercept_hook_point_post_kernel = hook_forwarded_syscall; + intercept_hook_point_clone_child = hook_clone_at_child; + intercept_hook_point_clone_parent = hook_clone_at_parent; +} void start_interception() { + assert(CTX->interception_enabled()); -#ifndef NDEBUG - CTX->log()->debug("Activating interception of syscalls"); -#endif + + LOG(DEBUG, "Enabling syscall interception for client process"); + // Set up the callback function pointer intercept_hook_point = hook_guard_wrapper; + intercept_hook_point_post_kernel = hook_forwarded_syscall; + intercept_hook_point_clone_child = hook_clone_at_child; + intercept_hook_point_clone_parent = hook_clone_at_parent; } void stop_interception() { assert(CTX->interception_enabled()); + + LOG(DEBUG, "Disabling syscall interception for client process"); + // Reset callback function pointer intercept_hook_point = nullptr; + intercept_hook_point_post_kernel = nullptr; + intercept_hook_point_clone_child = nullptr; + intercept_hook_point_clone_parent = nullptr; } diff --git a/src/client/logging.cpp b/src/client/logging.cpp new file mode 100644 index 000000000..4e8cf2a0a --- /dev/null +++ b/src/client/logging.cpp @@ -0,0 +1,310 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#include +#include +#include +#include +#include +#include + +namespace gkfs { +namespace log { + +struct opt_info { + const char name_[32]; + const std::size_t length_; + const char help_text_[8][64]; + const log_level mask_; +}; + +#define STR_AND_LEN(strbuf) \ + strbuf, sizeof(strbuf) - 1 + +static const auto constexpr debug_opts = utils::make_array( + + opt_info{STR_AND_LEN("none"), + {"don't print any messages"}, + log::none}, + +#ifdef GKFS_DEBUG_BUILD + + opt_info{STR_AND_LEN("syscalls"), + {"Trace system calls: print the name of each system call,", + "its arguments, and its return value. All system calls are", + "printed after being executed save for those that may not", + "return, such as execve() and execve_at()", + "[ default: off ]"}, + log::syscall}, + + opt_info{STR_AND_LEN("syscalls_at_entry"), + {"Trace system calls: print the name of each system call", + "and its arguments. All system calls are printed before ", + "being executed and therefore their return values are not", + "available in the log", + "[ default: off ]"}, + log::syscall_at_entry}, + +#endif // !GKFS_DEBUG_BUILD + + opt_info{STR_AND_LEN("info"), + {"Print information messages", + "[ default: on ]"}, + log::info}, + + opt_info{STR_AND_LEN("critical"), + {"Print critical errors", + "[ default: on ]"}, + log::critical}, + + opt_info{STR_AND_LEN("errors"), + {"Print errors", + "[ default: on ]"}, + log::error}, + + opt_info{STR_AND_LEN("warnings"), + {"Print warnings", + "[ default: on ]"}, + log::warning}, + + opt_info{STR_AND_LEN("mercury"), + {"Print Mercury messages", + "[ default: on ]"}, + log::mercury}, + +#ifdef GKFS_DEBUG_BUILD + + opt_info{STR_AND_LEN("debug"), + {"Print debug messages", + "[ default: off ]"}, + log::debug}, + + opt_info{STR_AND_LEN("most"), + {"All previous options except 'syscalls_at_entry' combined."}, + log::most }, + +#endif // !GKFS_DEBUG_BUILD + + opt_info{STR_AND_LEN("all"), + {"All previous options combined."}, + log::all }, + + opt_info{STR_AND_LEN("help"), + {"Print this help message and exit."}, + log::help} +); + +static const auto constexpr max_debug_opt_length = + sizeof("syscalls_at_entry") - 1; + +static const auto constexpr max_help_text_rows = + sizeof(debug_opts[0].help_text_) / sizeof(debug_opts[0].help_text_[0]); + +/** + * process_log_options -- process the string given as parameter to determine + * which debugging options are enabled and return a + * log_level describing them + */ +log_level +process_log_options(const std::string gkfs_debug) { + +#ifdef GKFS_DISABLE_LOGGING + + (void) gkfs_debug; + logger::log_message(stdout, "warning: logging options ignored: " + "logging support was disabled in this build"); + return log::none; + +#endif // ! GKFS_DISABLE_LOGGING + + log_level dm = log::none; + + std::vector tokens; + + // skip separating white spaces and commas + boost::split(tokens, gkfs_debug, + [](char c) { return c == ' ' || c == ','; }); + + for(const auto& t : tokens) { + + bool is_known = false; + + for(const auto& opt : debug_opts) { + if(t == opt.name_) { + dm |= opt.mask_; + is_known = true; + break; + } + } + + if(!is_known) { + logger::log_message(stdout, "warning: logging option '{}' unknown; " + "try {}=help", t, gkfs::env::LOG); + } + } + + if(!!(dm & log::help)) { + logger::log_message(stdout, "Valid options for the {} " + "environment variable are:\n", gkfs::env::LOG); + + + for(const auto& opt : debug_opts) { + const auto padding = max_debug_opt_length - opt.length_ + 2; + + logger::log_message(stdout, " {}{:>{}}{}", opt.name_, "", + padding, opt.help_text_[0]); + + for(auto i = 1lu; i < max_help_text_rows; ++i) { + if(opt.help_text_[i][0] != 0) { + logger::log_message(stdout, " {:>{}}{}", "", + max_debug_opt_length + 2, + opt.help_text_[i]); + } + } + + logger::log_message(stdout, ""); + } + + logger::log_message(stdout, "\n" + "To direct the logging output into a file " + "instead of standard output\n" + "a filename can be specified using the " + "{} environment variable.", gkfs::env::LOG_OUTPUT); + ::_exit(0); + } + + return dm; +} + + +logger::logger(const std::string& opts, + const std::string& path, + bool trunc) : + timezone_(date::current_zone()) { + + /* use stderr (dup()ed to an internal fd) by default */ + log_fd_ = ::dup(2); + + if(log_fd_ == -1) { + log(gkfs::log::error, __func__, __LINE__, "Failed to dup stderr. " + "Logging will fall back to normal stderr", path); + log_fd_ = 2; + } + + log_mask_ = process_log_options(opts); + + if(!path.empty()) { + int flags = O_CREAT | O_RDWR | O_APPEND | O_TRUNC; + + if(!trunc) { + flags &= ~O_TRUNC; + } + + // we use ::open() here rather than ::syscall_no_intercept(SYS_open) + // because we want the call to be intercepted by our hooks, which + // allows us to categorize the resulting fd as 'internal' + int fd = ::open(path.c_str(), flags, 0600); + + if(fd == -1) { + log(gkfs::log::error, __func__, __LINE__, "Failed to open log " + "file '{}'. Logging will fall back to stderr", path); + return; + } + + log_fd_ = fd; + } + + // Finding the current timezone implies accessing OS files (i.e. syscalls), + // but current_zone() doesn't actually retrieve the time zone but rather + // provides a descriptor to it that is **atomically initialized** upon its + // first use. Thus, if we don't force the initialization here, logging + // the first intercepted syscall will produce a call to + // date::time_zone::init() (under std::call_once) which internally ends up + // calling fopen(). Since fopen() ends up calling sys_open(), we will need + // to generate another timestamp for a system call log entry, which will + // attempt to call date::time_zone::init() since the prior initialization + // (under the same std::call_once) has not yet completed. + // + // Unfortunately, date::time_zone doesn't provide a function to prevent + // this lazy initialization, therefore we force it by requesting + // information from an arbitrary timepoint (January 1st 1970) which forces + // the initialization. This doesn't do any actual work and could safely + // be removed if the date API ends up providing this functionality. + using namespace date; + timezone_->get_info(date::sys_days{January/1/1970}); +} + +logger::~logger() { + log_fd_ = ::syscall_no_intercept(SYS_close, log_fd_); +} + +void +logger::log_syscall(syscall::info info, + const long syscall_number, + const long args[6], + boost::optional result) { + + + const bool log_syscall_entry = !!(gkfs::log::syscall_at_entry & log_mask_); + const bool log_syscall_result = !!(gkfs::log::syscall & log_mask_); + + // log the syscall if and only if logging for syscalls is enabled + if(!log_syscall_entry && !log_syscall_result) { + return; + } + + // log the syscall even if we don't have information on it, since it may + // be important to the user (we assume that the syscall has completed + // though) + if(info == gkfs::syscall::no_info) { + goto print_syscall; + } + + // log the syscall entry if the syscall may not return (e.g. execve), + // even if log::syscall_entry is disabled + if(gkfs::syscall::may_not_return(syscall_number)) { + goto print_syscall; + } + + if(log_syscall_entry && gkfs::syscall::execution_is_pending(info)) { + goto print_syscall; + } + + if(log_syscall_result && !gkfs::syscall::execution_is_pending(info)) { + goto print_syscall; + } + + return; + +print_syscall: + + detail::safe_buffer buffer; + + detail::format_timestamp_to(buffer, timezone_); + detail::format_syscall_info_to(buffer, info); + + if(result) { + syscall::decode(buffer, syscall_number, args, *result); + } + else { + syscall::decode(buffer, syscall_number, args); + } + + fmt::format_to(buffer, "\n"); + + ::syscall_no_intercept(SYS_write, log_fd_, buffer.data(), buffer.size()); +} + +} // namespace log +} // namespace gkfs + diff --git a/src/client/open_file_map.cpp b/src/client/open_file_map.cpp index 5a13e8b28..3f2678a18 100644 --- a/src/client/open_file_map.cpp +++ b/src/client/open_file_map.cpp @@ -17,6 +17,7 @@ #include #include #include +#include using namespace std; @@ -143,7 +144,7 @@ bool OpenFileMap::remove(const int fd) { files_.erase(fd); if (fd_validation_needed && files_.empty()) { fd_validation_needed = false; - CTX->log()->info("{}() fd_validation flag reset", __func__); + LOG(DEBUG, "fd_validation flag reset"); } return true; } @@ -188,7 +189,7 @@ int OpenFileMap::generate_fd_idx() { // We need a mutex here for thread safety std::lock_guard inode_lock(fd_idx_mutex); if (fd_idx == std::numeric_limits::max()) { - CTX->log()->info("{}() File descriptor index exceeded ints max value. Setting it back to 100000", __func__); + LOG(WARNING, "File descriptor index exceeded ints max value. Setting it back to 100000"); /* * Setting fd_idx back to 3 could have the effect that fd are given twice for different path. * This must not happen. Instead a flag is set which tells can tell the OpenFileMap that it should check diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 2f712b574..5b6535e15 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -11,7 +11,6 @@ SPDX-License-Identifier: MIT */ -#include #include #include #include @@ -20,6 +19,7 @@ #include #include #include "global/rpc/rpc_types.hpp" +#include #include #include #include @@ -28,6 +28,7 @@ #include #include +#include #include @@ -56,9 +57,34 @@ hg_id_t rpc_chunk_stat_id; std::unique_ptr ld_network_service; static inline void exit_error_msg(int errcode, const string& msg) { - CTX->log()->error(msg); - cerr << "GekkoFS error: " << msg << endl; - exit(errcode); + + LOG_ERROR("{}", msg); + gkfs::log::logger::log_message(stderr, "{}\n", msg); + + // if we don't disable interception before calling ::exit() + // syscall hooks may find an inconsistent in shared state + // (e.g. the logger) and thus, crash + stop_interception(); + CTX->disable_interception(); + ::exit(errcode); +} + +int +hg_log_function(FILE *stream, const char *fmt, ...) { + +#ifdef GKFS_DISABLE_LOGGING + (void) stream; + (void) fmt; + + return 0; +#endif // GKFS_DISABLE_LOGGING + + va_list ap; + ::va_start(ap, fmt); + int n = gkfs::log::get_global_logger()->log(gkfs::log::mercury, fmt, ap); + ::va_end(ap); + + return n; } /** @@ -78,6 +104,9 @@ bool init_hermes_client(const std::string& transport_prefix) { ld_network_service = std::make_unique( hermes::get_transport_type(transport_prefix), opts); + + ld_network_service->set_mercury_log_function(::hg_log_function); + ld_network_service->run(); } catch (const std::exception& ex) { fmt::print(stderr, "Failed to initialize Hermes RPC client {}\n", @@ -107,57 +136,21 @@ bool init_hermes_client(const std::string& transport_prefix) { return true; } -static inline std::set -query_open_fds() { - - std::set fds; - const std::string path{"/proc/self/fd"}; - - std::unique_ptr dirp( - ::opendir(path.c_str()), - closedir); - - struct dirent entry; - struct dirent *result; - - while (::readdir_r(dirp.get(), &entry, &result) == 0 && result != NULL) { - const std::string name{entry.d_name}; - - if(name == "." || name == ".." || - std::stoi(name) == dirfd(dirp.get())) { - continue; - } - - fds.insert(std::stoi(name)); - } - - return fds; -} - - /** * This function is only called in the preload constructor and initializes * the file system client */ void init_ld_environment_() { - // Client applications such as ssh attempt to close all open file - // descriptors, which causes havoc with the interception library's internal - // state. To account for this, in the interception code we keep track of - // internal fds by distinguishing between internal syscalls (i.e. those - // coming from internal code) application syscalls. The problem is that - // at this point in initialization we have not enabled interception yet, - // but the initialization process itself needs to create file descriptors. - // To solve this problem, we find out which fds are created by the - // initialization process and manually protect them at this point - auto pre_init_fds = query_open_fds(); - // initialize Hermes interface to Mercury + LOG(INFO, "Initializing RPC subsystem..."); + if (!init_hermes_client(RPC_PROTOCOL)) { - exit_error_msg(EXIT_FAILURE, "Unable to initialize Hermes RPC client"); + exit_error_msg(EXIT_FAILURE, "Unable to initialize RPC subsystem"); } try { + LOG(INFO, "Loading peer addresses..."); load_hosts(); } catch (const std::exception& e) { exit_error_msg(EXIT_FAILURE, "Failed to load hosts addresses: "s + e.what()); @@ -167,62 +160,32 @@ void init_ld_environment_() { auto simple_hash_dist = std::make_shared(CTX->local_host_id(), CTX->hosts().size()); CTX->distributor(simple_hash_dist); + LOG(INFO, "Retrieving file system configuration..."); + if (!rpc_send::get_fs_config()) { exit_error_msg(EXIT_FAILURE, "Unable to fetch file system configurations from daemon process through RPC."); } - auto post_init_fds = query_open_fds(); - std::set internal_fds{3}; // fd 3 is created by the logging system - - std::set_difference(post_init_fds.begin(), post_init_fds.end(), - pre_init_fds.begin(), pre_init_fds.end(), - std::inserter(internal_fds, internal_fds.end())); - - for(const auto& fd : internal_fds) { - CTX->register_internal_fd(fd); - } - - CTX->log()->info("{}() Environment initialization successful.", __func__); + LOG(INFO, "Environment initialization successful."); } void init_ld_env_if_needed() { pthread_once(&init_env_thread, init_ld_environment_); } -void init_logging() { - std::string path; - try { - path = gkfs::get_env_own("PRELOAD_LOG_PATH"); - } catch (const std::exception& e) { - path = DEFAULT_PRELOAD_LOG_PATH; - } - - spdlog::level::level_enum level; - try { - level = get_spdlog_level(gkfs::get_env_own("LOG_LEVEL")); - } catch (const std::exception& e) { - level = get_spdlog_level(DEFAULT_DAEMON_LOG_LEVEL); - } - - auto logger_names = std::vector {"main"}; - - setup_loggers(logger_names, level, path); - - CTX->log(spdlog::get(logger_names.at(0))); -} - void log_prog_name() { std::string line; std::ifstream cmdline("/proc/self/cmdline"); if (!cmdline.is_open()) { - CTX->log()->error("Unable to open cmdline file"); + LOG(ERROR, "Unable to open cmdline file"); throw std::runtime_error("Unable to open cmdline file"); } if(!getline(cmdline, line)) { throw std::runtime_error("Unable to read cmdline file"); } std::replace(line.begin(), line.end(), '\0', ' '); - CTX->log()->info("Command to itercept: '{}'", line); + line.erase(line.length() - 1, line.length()); + LOG(INFO, "Process cmdline: '{}'", line); cmdline.close(); } @@ -231,14 +194,20 @@ void log_prog_name() { */ void init_preload() { - init_logging(); - CTX->log()->debug("Initialized logging subsystem"); + CTX->enable_interception(); + start_self_interception(); + + CTX->init_logging(); + + // from here ownwards it is safe to print messages + LOG(DEBUG, "Logging subsystem initialized"); + log_prog_name(); init_cwd(); - CTX->log()->debug("Current working directory: '{}'", CTX->cwd()); + + LOG(DEBUG, "Current working directory: '{}'", CTX->cwd()); init_ld_env_if_needed(); CTX->enable_interception(); - CTX->log()->debug("{}() exit", __func__); start_interception(); } @@ -249,12 +218,13 @@ void destroy_preload() { stop_interception(); CTX->disable_interception(); + LOG(DEBUG, "Syscall interception stopped"); CTX->clear_hosts(); - CTX->log()->debug("{}() About to finalize the Hermes RPC client", __func__); + LOG(DEBUG, "Peer information deleted"); ld_network_service.reset(); + LOG(DEBUG, "RPC subsystem shut down"); - CTX->log()->debug("{}() Shut down Hermes RPC client successful", __func__); - CTX->log()->info("All services shut down. Client shutdown complete."); + LOG(INFO, "All subsystems shut down. Client shutdown complete."); } diff --git a/src/client/preload_context.cpp b/src/client/preload_context.cpp index d8a5e87bc..2c7b21b2d 100644 --- a/src/client/preload_context.cpp +++ b/src/client/preload_context.cpp @@ -14,24 +14,46 @@ #include #include +#include +#include + +#include +#include +#include #include #include #include + #include #include +#include PreloadContext::PreloadContext(): ofm_(std::make_shared()), - fs_conf_(std::make_shared()) -{} + fs_conf_(std::make_shared()) { -void PreloadContext::log(std::shared_ptr logger) { - log_ = logger; +#ifdef USE_BITSET_FOR_INTERNAL_FDS + internal_fds_.set(); +#endif // USE_BITSET_FOR_INTERNAL_FDS + } -std::shared_ptr PreloadContext::log() const { - return log_; +void +PreloadContext::init_logging() { + + const std::string log_opts = + gkfs::env::get_var(gkfs::env::LOG, DEFAULT_CLIENT_LOG_LEVEL); + + const std::string log_output = + gkfs::env::get_var(gkfs::env::LOG_OUTPUT, DEFAULT_CLIENT_LOG_PATH); + + const std::string trunc_val = + gkfs::env::get_var(gkfs::env::LOG_OUTPUT_TRUNC); + + const bool log_trunc = !(!trunc_val.empty() && trunc_val[0] == 0); + + gkfs::log::create_global_logger(log_opts, log_output, log_trunc); } void PreloadContext::mountdir(const std::string& path) { @@ -50,7 +72,6 @@ const std::vector& PreloadContext::mountdir_components() const { } void PreloadContext::cwd(const std::string& path) { - log_->debug("Setting CWD to '{}'", path); cwd_ = path; } @@ -172,39 +193,84 @@ bool PreloadContext::interception_enabled() const { return interception_enabled_; } -void PreloadContext::register_internal_fd(int fd) { +int PreloadContext::register_internal_fd(int fd) { -#ifdef USE_BITSET_FOR_INTERNAL_FDS - internal_fds_.set(fd); + assert(fd >= 0); + + std::lock_guard lock(internal_fds_mutex_); + const int pos = internal_fds_._Find_first(); + internal_fds_.reset(pos); + + LOG(DEBUG, "registering internal fd: {} -> {}", fd, pos + INTERNAL_FD_BASE); + +#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) + long args[gkfs::syscall::MAX_ARGS]{fd, pos + INTERNAL_FD_BASE, O_CLOEXEC}; +#endif + + LOG(SYSCALL, + gkfs::syscall::from_internal_code | + gkfs::syscall::to_kernel | + gkfs::syscall::not_executed, + SYS_dup3, args); + + const int ifd = + ::syscall_no_intercept(SYS_dup3, fd, pos + INTERNAL_FD_BASE, O_CLOEXEC); + + LOG(SYSCALL, + gkfs::syscall::from_internal_code | + gkfs::syscall::to_kernel | + gkfs::syscall::executed, + SYS_dup3, args, ifd); + + assert(::syscall_error_code(ifd) != -1); + +#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) + long args2[gkfs::syscall::MAX_ARGS]{fd}; +#endif + + LOG(SYSCALL, + gkfs::syscall::from_internal_code | + gkfs::syscall::to_kernel | + gkfs::syscall::not_executed, + SYS_close, args2); + +#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) + int rv = ::syscall_no_intercept(SYS_close, fd); #else - decltype(internal_fds_)::iterator it; - bool inserted; + ::syscall_no_intercept(SYS_close, fd); +#endif - std::tie(it, inserted) = internal_fds_.insert(fd); - assert(inserted); -#endif // USE_BITSET_FOR_INTERNAL_FDS + LOG(SYSCALL, + gkfs::syscall::from_internal_code | + gkfs::syscall::to_kernel | + gkfs::syscall::executed, + SYS_close, args2, rv); + return ifd; } void PreloadContext::unregister_internal_fd(int fd) { -#ifdef USE_BITSET_FOR_INTERNAL_FDS - internal_fds_.reset(fd); -#else - std::size_t n = internal_fds_.erase(fd); - assert(n == 1); -#endif // USE_BITSET_FOR_INTERNAL_FDS + assert(fd >= INTERNAL_FD_BASE); + const auto pos = fd - INTERNAL_FD_BASE; + + std::lock_guard lock(internal_fds_mutex_); + internal_fds_.set(pos); + + LOG(DEBUG, "unregistering internal fd: {}", fd); } bool PreloadContext::is_internal_fd(int fd) const { -#ifdef USE_BITSET_FOR_INTERNAL_FDS - return internal_fds_[fd]; -#else - return internal_fds_.count(fd) != 0; -#endif // USE_BITSET_FOR_INTERNAL_FDS + if(fd < INTERNAL_FD_BASE) { + return false; + } + + const auto pos = fd - INTERNAL_FD_BASE; + std::lock_guard lock(internal_fds_mutex_); + return !internal_fds_.test(pos); } diff --git a/src/client/preload_util.cpp b/src/client/preload_util.cpp index 40294cf11..155a9a821 100644 --- a/src/client/preload_util.cpp +++ b/src/client/preload_util.cpp @@ -12,6 +12,8 @@ */ #include +#include +#include #include #include #include @@ -77,7 +79,9 @@ int metadata_to_stat(const std::string& path, const Metadata& md, struct stat& a } vector> load_hosts_file(const std::string& lfpath) { - CTX->log()->debug("{}() Loading hosts file: '{}'", __func__, lfpath); + + LOG(DEBUG, "Loading hosts file: \"{}\"", lfpath); + ifstream lf(lfpath); if (!lf) { throw runtime_error(fmt::format("Failed to open hosts file '{}': {}", @@ -92,8 +96,10 @@ vector> load_hosts_file(const std::string& lfpath) { std::smatch match; while (getline(lf, line)) { if (!regex_match(line, match, line_re)) { - spdlog::error("{}() Unrecognized line format: [path: '{}', line: '{}']", - __func__, lfpath, line); + + LOG(ERROR, "Unrecognized line format: [path: '{}', line: '{}']", + lfpath, line); + throw runtime_error( fmt::format("unrecognized line format: '{}'", line)); } @@ -107,7 +113,7 @@ vector> load_hosts_file(const std::string& lfpath) { hermes::endpoint lookup_endpoint(const std::string& uri, std::size_t max_retries = 3) { - CTX->log()->debug("{}() Looking up address '{}'", __func__, uri); + LOG(DEBUG, "Looking up address \"{}\"", uri); std::random_device rd; // obtain a random number from hardware std::size_t attempts = 0; @@ -118,8 +124,10 @@ hermes::endpoint lookup_endpoint(const std::string& uri, return ld_network_service->lookup(uri); } catch (const exception& ex) { error_msg = ex.what(); - CTX->log()->warn("{}() Failed to lookup address '{}'. Attempts [{}/{}]", - __func__, uri, attempts + 1, max_retries); + + LOG(WARNING, "Failed to lookup address '{}'. Attempts [{}/{}]", + uri, attempts + 1, max_retries); + // Wait a random amount of time and try again std::mt19937 g(rd()); // seed the random generator std::uniform_int_distribution<> distr(50, 50 * (attempts + 2)); // define the range @@ -135,14 +143,8 @@ hermes::endpoint lookup_endpoint(const std::string& uri, void load_hosts() { string hosts_file; - try { - hosts_file = gkfs::get_env_own("HOSTS_FILE"); - } catch (const exception& e) { - CTX->log()->info("{}() Failed to get hosts file path" - " from environment, using default: '{}'", - __func__, DEFAULT_HOSTS_FILE); - hosts_file = DEFAULT_HOSTS_FILE; - } + + hosts_file = gkfs::env::get_var(gkfs::env::HOSTS_FILE, DEFAULT_HOSTS_FILE); vector> hosts; try { @@ -156,7 +158,7 @@ void load_hosts() { throw runtime_error(fmt::format("Host file empty: '{}'", hosts_file)); } - CTX->log()->info("{}() Hosts pool size: {}", __func__, hosts.size()); + LOG(INFO, "Hosts pool size: {}", hosts.size()); auto local_hostname = get_my_hostname(true); bool local_host_found = false; @@ -185,15 +187,14 @@ void load_hosts() { addrs[id] = ::lookup_endpoint(uri); if (!local_host_found && hostname == local_hostname) { - CTX->log()->debug("{}() Found local host: {}", __func__, hostname); + LOG(DEBUG, "Found local host: {}", hostname); CTX->local_host_id(id); local_host_found = true; } } if (!local_host_found) { - CTX->log()->warn("{}() Failed to find local host." - "Fallback: use host id '0' as local host", __func__); + LOG(WARNING, "Failed to find local host. Using host '0' as local host"); CTX->local_host_id(0); } diff --git a/src/client/resolve.cpp b/src/client/resolve.cpp index 673d7322c..b1e88df46 100644 --- a/src/client/resolve.cpp +++ b/src/client/resolve.cpp @@ -21,10 +21,10 @@ #include "global/path_util.hpp" #include "global/configure.hpp" #include "client/preload.hpp" +#include "client/logging.hpp" +#include "client/env.hpp" -constexpr static const char * ENV_NAME_CWD = ENV_PREFIX "CWD"; - /* Match components in path * * Returns the number of consecutive components at start of `path` @@ -79,7 +79,9 @@ unsigned int path_match_components(const std::string& path, unsigned int &path_c * and false otherwise. */ bool resolve_path (const std::string& path, std::string& resolved, bool resolve_last_link) { - CTX->log()->debug("{}() path: '{}'", __func__, path); + + LOG(DEBUG, "path: \"{}\", resolved: \"{}\", resolve_last_link: {}", + path, resolved, resolve_last_link); struct stat st; const std::vector& mnt_components = CTX->mountdir_components(); @@ -149,9 +151,9 @@ unsigned int path_match_components(const std::string& path, unsigned int &path_c ++matched_components; } if (lstat(resolved.c_str(), &st) < 0) { -#ifndef NDEBUG - CTX->log()->debug("{}() path does not exists: '{}'", __func__, resolved.c_str()); -#endif + + LOG(DEBUG, "path \"{}\" does not exist", resolved); + resolved.append(path, end, std::string::npos); return false; } @@ -161,7 +163,10 @@ unsigned int path_match_components(const std::string& path, unsigned int &path_c } auto link_resolved = std::unique_ptr(new char[PATH_MAX]); if (realpath(resolved.c_str(), link_resolved.get()) == nullptr) { - CTX->log()->error("{}() Failed to get realpath for link '{}'. Error: {}", __func__, resolved, strerror(errno)); + + LOG(ERROR, "Failed to get realpath for link \"{}\". " + "Error: {}", resolved, ::strerror(errno)); + resolved.append(path, end, std::string::npos); return false; } @@ -184,14 +189,14 @@ unsigned int path_match_components(const std::string& path, unsigned int &path_c if (matched_components >= mnt_components.size()) { resolved.erase(1, CTX->mountdir().size()); - CTX->log()->debug("{}() internal: '{}'", __func__, resolved); + LOG(DEBUG, "internal: \"{}\"", resolved); return true; } if (resolved.size() == 0) { resolved.push_back(PSP); } - CTX->log()->debug("{}() external: '{}'", __func__, resolved); + LOG(DEBUG, "external: \"{}\"", resolved); return false; } @@ -211,10 +216,12 @@ std::string get_sys_cwd() { } void set_sys_cwd(const std::string& path) { - CTX->log()->debug("{}() to '{}'", __func__, path); + + LOG(DEBUG, "Changing working directory to \"{}\"", path); + if (long ret = syscall_no_intercept(SYS_chdir, path.c_str())) { - CTX->log()->error("{}() failed to set system current working directory: {}", - __func__, std::strerror(syscall_error_code(ret))); + LOG(ERROR, "Failed to change working directory: {}", + std::strerror(syscall_error_code(ret))); throw std::system_error(syscall_error_code(ret), std::system_category(), "Failed to set system current working directory"); @@ -222,10 +229,12 @@ void set_sys_cwd(const std::string& path) { } void set_env_cwd(const std::string& path) { - CTX->log()->debug("{}() to '{}'", __func__, path); - if(setenv(ENV_NAME_CWD, path.c_str(), 1)) { - CTX->log()->error("{}() failed to set environment current working directory: {}", - __func__, std::strerror(errno)); + + LOG(DEBUG, "Setting {} to \"{}\"", gkfs::env::CWD, path); + + if(setenv(gkfs::env::CWD, path.c_str(), 1)) { + LOG(ERROR, "Failed while setting {}: {}", + gkfs::env::CWD, std::strerror(errno)); throw std::system_error(errno, std::system_category(), "Failed to set environment current working directory"); @@ -233,10 +242,14 @@ void set_env_cwd(const std::string& path) { } void unset_env_cwd() { - CTX->log()->debug("{}()", __func__); - if(unsetenv(ENV_NAME_CWD)) { - CTX->log()->error("{}() failed to unset environment current working directory: {}", - __func__, std::strerror(errno)); + + LOG(DEBUG, "Clearing {}()", gkfs::env::CWD); + + if(unsetenv(gkfs::env::CWD)) { + + LOG(ERROR, "Failed to clear {}: {}", + gkfs::env::CWD, std::strerror(errno)); + throw std::system_error(errno, std::system_category(), "Failed to unset environment current working directory"); @@ -244,7 +257,7 @@ void unset_env_cwd() { } void init_cwd() { - const char* env_cwd = std::getenv(ENV_NAME_CWD); + const char* env_cwd = std::getenv(gkfs::env::CWD); if (env_cwd != nullptr) { CTX->cwd(env_cwd); } else { diff --git a/src/client/rpc/ld_rpc_data_ws.cpp b/src/client/rpc/ld_rpc_data_ws.cpp index ebd4de556..c080150d5 100644 --- a/src/client/rpc/ld_rpc_data_ws.cpp +++ b/src/client/rpc/ld_rpc_data_ws.cpp @@ -18,10 +18,10 @@ #include #include #include +#include #include - namespace rpc_send { @@ -94,7 +94,7 @@ ssize_t write(const string& path, const void* buf, const bool append_flag, ld_network_service->expose(bufseq, hermes::access_mode::read_only); } catch (const std::exception& ex) { - CTX->log()->error("{}() Failed to expose buffers for RMA", __func__); + LOG(ERROR, "Failed to expose buffers for RMA"); errno = EBUSY; return -1; } @@ -125,7 +125,7 @@ ssize_t write(const string& path, const void* buf, const bool append_flag, try { - CTX->log()->debug("{}() Sending RPC ...", __func__); + LOG(DEBUG, "Sending RPC ..."); gkfs::rpc::write_data::input in( path, @@ -152,15 +152,12 @@ ssize_t write(const string& path, const void* buf, const bool append_flag, handles.emplace_back( ld_network_service->post(endp, in)); - CTX->log()->trace("{}() host: {}, path: {}, chunks: {}, size: {}, " - "offset: {}", __func__, - target, path, in.chunk_n(), - total_chunk_size, in.offset()); + LOG(DEBUG, "host: {}, path: \"{}\", chunks: {}, size: {}, offset: {}", + target, path, in.chunk_n(), total_chunk_size, in.offset()); } catch(const std::exception& ex) { - CTX->log()->error("{}() Unable to send non-blocking rpc for " - "path {} and recipient {}", __func__, path, - target); + LOG(ERROR, "Unable to send non-blocking rpc for " + "path \"{}\" [peer: {}]", path, target); errno = EBUSY; return -1; } @@ -180,8 +177,7 @@ ssize_t write(const string& path, const void* buf, const bool append_flag, auto out = h.get().at(0); if(out.err() != 0) { - CTX->log()->error("{}() Daemon reported error: {}", - __func__, out.err()); + LOG(ERROR, "Daemon reported error: {}", out.err()); error = true; errno = out.err(); } @@ -189,8 +185,8 @@ ssize_t write(const string& path, const void* buf, const bool append_flag, out_size += static_cast(out.io_size()); } catch(const std::exception& ex) { - CTX->log()->error("{}() Failed to get rpc output for path {} " - "recipient {}", __func__, path, targets[idx]); + LOG(ERROR, "Failed to get rpc output for path \"{}\" [peer: {}]", + path, targets[idx]); error = true; errno = EIO; } @@ -257,7 +253,7 @@ ssize_t read(const string& path, void* buf, const off64_t offset, const size_t r ld_network_service->expose(bufseq, hermes::access_mode::write_only); } catch (const std::exception& ex) { - CTX->log()->error("{}() Failed to expose buffers for RMA", __func__); + LOG(ERROR, "Failed to expose buffers for RMA"); errno = EBUSY; return -1; } @@ -288,7 +284,7 @@ ssize_t read(const string& path, void* buf, const off64_t offset, const size_t r try { - CTX->log()->debug("{}() Sending RPC ...", __func__); + LOG(DEBUG, "Sending RPC ..."); gkfs::rpc::read_data::input in( path, @@ -315,15 +311,12 @@ ssize_t read(const string& path, void* buf, const off64_t offset, const size_t r handles.emplace_back( ld_network_service->post(endp, in)); - CTX->log()->trace("{}() host: {}, path: {}, chunks: {}, size: {}, " - "offset: {}", __func__, - target, path, in.chunk_n(), - total_chunk_size, in.offset()); + LOG(DEBUG, "host: {}, path: {}, chunks: {}, size: {}, offset: {}", + target, path, in.chunk_n(), total_chunk_size, in.offset()); } catch(const std::exception& ex) { - CTX->log()->error("{}() Unable to send non-blocking rpc for " - "path {} and recipient {}", __func__, path, - target); + LOG(ERROR, "Unable to send non-blocking rpc for path \"{}\" " + "[peer: {}]", path, target); errno = EBUSY; return -1; } @@ -343,8 +336,7 @@ ssize_t read(const string& path, void* buf, const off64_t offset, const size_t r auto out = h.get().at(0); if(out.err() != 0) { - CTX->log()->error("{}() Daemon reported error: {}", - __func__, out.err()); + LOG(ERROR, "Daemon reported error: {}", out.err()); error = true; errno = out.err(); } @@ -352,8 +344,8 @@ ssize_t read(const string& path, void* buf, const off64_t offset, const size_t r out_size += static_cast(out.io_size()); } catch(const std::exception& ex) { - CTX->log()->error("{}() Failed to get rpc output for path {} " - "recipient {}", __func__, path, targets[idx]); + LOG(ERROR, "Failed to get rpc output for path \"{}\" [peer: {}]", + path, targets[idx]); error = true; errno = EIO; } @@ -387,7 +379,7 @@ int trunc_data(const std::string& path, size_t current_size, size_t new_size) { auto endp = CTX->hosts().at(host); try { - CTX->log()->debug("{}() Sending RPC ...", __func__); + LOG(DEBUG, "Sending RPC ..."); gkfs::rpc::trunc_data::input in(path, new_size); @@ -402,8 +394,7 @@ int trunc_data(const std::string& path, size_t current_size, size_t new_size) { } catch (const std::exception& ex) { // TODO(amiranda): we should cancel all previously posted requests // here, unfortunately, Hermes does not support it yet :/ - CTX->log()->error("{}() Failed to send request to host: {}", - __func__, host); + LOG(ERROR, "Failed to send request to host: {}", host); errno = EIO; return -1; } @@ -419,13 +410,12 @@ int trunc_data(const std::string& path, size_t current_size, size_t new_size) { auto out = h.get().at(0); if(out.err() != 0) { - CTX->log()->error("{}() received error response: {}", - __func__, out.err()); + LOG(ERROR, "received error response: {}", out.err()); error = true; errno = EIO; } } catch(const std::exception& ex) { - CTX->log()->error("{}() while getting rpc output", __func__); + LOG(ERROR, "while getting rpc output"); error = true; errno = EIO; } @@ -436,14 +426,11 @@ int trunc_data(const std::string& path, size_t current_size, size_t new_size) { ChunkStat chunk_stat() { - CTX->log()->trace("{}()", __func__); - std::vector> handles; for (const auto& endp : CTX->hosts()) { try { - CTX->log()->trace("{}() Sending RPC to host: {}", - __func__, endp.to_string()); + LOG(DEBUG, "Sending RPC to host: {}", endp.to_string()); gkfs::rpc::chunk_stat::input in(0); @@ -458,8 +445,7 @@ ChunkStat chunk_stat() { } catch (const std::exception& ex) { // TODO(amiranda): we should cancel all previously posted requests // here, unfortunately, Hermes does not support it yet :/ - CTX->log()->error("{}() Failed to send request to host: {}", - __func__, endp.to_string()); + LOG(ERROR, "Failed to send request to host: {}", endp.to_string()); throw std::runtime_error("Failed to forward non-blocking rpc request"); } } diff --git a/src/client/rpc/ld_rpc_management.cpp b/src/client/rpc/ld_rpc_management.cpp index 544043318..6e52aaca6 100644 --- a/src/client/rpc/ld_rpc_management.cpp +++ b/src/client/rpc/ld_rpc_management.cpp @@ -13,6 +13,7 @@ #include "client/rpc/ld_rpc_management.hpp" #include "global/rpc/rpc_types.hpp" +#include #include #include // see https://github.com/boostorg/tokenizer/issues/9 #include @@ -34,7 +35,7 @@ bool get_fs_config() { gkfs::rpc::fs_config::output out; try { - CTX->log()->debug("{}() Retrieving file system configurations from daemon", __func__); + LOG(DEBUG, "Retrieving file system configurations from daemon"); // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can retry // for RPC_TRIES (see old commits with margo) // TODO(amiranda): hermes will eventually provide a post(endpoint) @@ -42,12 +43,12 @@ bool get_fs_config() { // result_set. When that happens we can remove the .at(0) :/ out = ld_network_service->post(endp).get().at(0); } catch (const std::exception& ex) { - CTX->log()->error("{}() Retrieving fs configurations from daemon", __func__); + LOG(ERROR, "Retrieving fs configurations from daemon"); return false; } CTX->mountdir(out.mountdir()); - CTX->log()->info("Mountdir: '{}'", CTX->mountdir()); + LOG(INFO, "Mountdir: '{}'", CTX->mountdir()); CTX->fs_conf()->rootdir = out.rootdir(); CTX->fs_conf()->atime_state = out.atime_state(); @@ -58,7 +59,7 @@ bool get_fs_config() { CTX->fs_conf()->uid = out.uid(); CTX->fs_conf()->gid = out.gid(); - CTX->log()->debug("{}() Got response with mountdir {}", __func__, out.mountdir()); + LOG(DEBUG, "Got response with mountdir {}", out.mountdir()); return true; } diff --git a/src/client/rpc/ld_rpc_metadentry.cpp b/src/client/rpc/ld_rpc_metadentry.cpp index b41faf8a9..20bae8276 100644 --- a/src/client/rpc/ld_rpc_metadentry.cpp +++ b/src/client/rpc/ld_rpc_metadentry.cpp @@ -14,6 +14,7 @@ #include #include #include "client/preload.hpp" +#include "client/logging.hpp" #include "client/preload_util.hpp" #include "client/open_dir.hpp" #include @@ -32,7 +33,7 @@ int mk_node(const std::string& path, const mode_t mode) { CTX->distributor()->locate_file_metadata(path)); try { - CTX->log()->debug("{}() Sending RPC ...", __func__); + LOG(DEBUG, "Sending RPC ..."); // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can // retry for RPC_TRIES (see old commits with margo) // TODO(amiranda): hermes will eventually provide a post(endpoint) @@ -41,10 +42,10 @@ int mk_node(const std::string& path, const mode_t mode) { auto out = ld_network_service->post(endp, path, mode).get().at(0); err = out.err(); - CTX->log()->debug("{}() Got response success: {}", __func__, err); + LOG(DEBUG, "Got response success: {}", err); } catch(const std::exception& ex) { - CTX->log()->error("{}() while getting rpc output", __func__); + LOG(ERROR, "while getting rpc output"); errno = EBUSY; return -1; } @@ -58,7 +59,7 @@ int stat(const std::string& path, string& attr) { CTX->distributor()->locate_file_metadata(path)); try { - CTX->log()->debug("{}() Sending RPC ...", __func__); + LOG(DEBUG, "Sending RPC ..."); // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can // retry for RPC_TRIES (see old commits with margo) // TODO(amiranda): hermes will eventually provide a post(endpoint) @@ -66,7 +67,7 @@ int stat(const std::string& path, string& attr) { // result_set. When that happens we can remove the .at(0) :/ auto out = ld_network_service->post(endp, path).get().at(0); - CTX->log()->debug("{}() Got response success: {}", __func__, out.err()); + LOG(DEBUG, "Got response success: {}", out.err()); if(out.err() != 0) { errno = out.err(); @@ -77,7 +78,7 @@ int stat(const std::string& path, string& attr) { return 0; } catch(const std::exception& ex) { - CTX->log()->error("{}() while getting rpc output", __func__); + LOG(ERROR, "while getting rpc output"); errno = EBUSY; return -1; } @@ -92,7 +93,7 @@ int decr_size(const std::string& path, size_t length) { try { - CTX->log()->debug("{}() Sending RPC ...", __func__); + LOG(DEBUG, "Sending RPC ..."); // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can // retry for RPC_TRIES (see old commits with margo) // TODO(amiranda): hermes will eventually provide a post(endpoint) @@ -102,7 +103,7 @@ int decr_size(const std::string& path, size_t length) { ld_network_service->post( endp, path, length).get().at(0); - CTX->log()->debug("{}() Got response success: {}", __func__, out.err()); + LOG(DEBUG, "Got response success: {}", out.err()); if(out.err() != 0) { errno = out.err(); @@ -112,7 +113,7 @@ int decr_size(const std::string& path, size_t length) { return 0; } catch(const std::exception& ex) { - CTX->log()->error("{}() while getting rpc output", __func__); + LOG(ERROR, "while getting rpc output"); errno = EBUSY; return -1; } @@ -130,7 +131,7 @@ int rm_node(const std::string& path, const bool remove_metadentry_only, const ss try { - CTX->log()->debug("{}() Sending RPC ...", __func__); + LOG(DEBUG, "Sending RPC ..."); // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can // retry for RPC_TRIES (see old commits with margo) // TODO(amiranda): hermes will eventually provide a post(endpoint) @@ -139,7 +140,7 @@ int rm_node(const std::string& path, const bool remove_metadentry_only, const ss auto out = ld_network_service->post(endp, path).get().at(0); - CTX->log()->debug("{}() Got response success: {}", __func__, out.err()); + LOG(DEBUG, "Got response success: {}", out.err()); if(out.err() != 0) { errno = out.err(); @@ -149,7 +150,7 @@ int rm_node(const std::string& path, const bool remove_metadentry_only, const ss return 0; } catch(const std::exception& ex) { - CTX->log()->error("{}() while getting rpc output", __func__); + LOG(ERROR, "while getting rpc output"); errno = EBUSY; return -1; } @@ -159,62 +160,64 @@ int rm_node(const std::string& path, const bool remove_metadentry_only, const ss std::vector> handles; - if ((size / CHUNKSIZE) < CTX->hosts().size()) { // Small files - auto endp = CTX->hosts().at( - CTX->distributor()->locate_file_metadata(path)); + // Small files + if(static_cast(size / CHUNKSIZE) < CTX->hosts().size()) { - try { - CTX->log()->trace("{}() Sending RPC to host: {}", - __func__, endp.to_string()); - gkfs::rpc::remove::input in(path); - handles.emplace_back(ld_network_service->post(endp,in)); - - auto chnk_start = 0; - auto chnk_end = size/CHUNKSIZE; - - for (uint64_t chnk_id = chnk_start; chnk_id <= chnk_end; chnk_id++) { - auto target = CTX->hosts().at(CTX->distributor()->locate_data(path, chnk_id)); - - CTX->log()->trace("{}() Sending RPC to host: {}", - __func__, target.to_string()); - - handles.emplace_back( - ld_network_service->post(target, in)); - } - } catch (const std::exception & ex) { - CTX->log()->error("{}() Failed to send reduced remove requests", - __func__); - throw std::runtime_error("Failed to forward non-blocking rpc request"); - } - } - else { // "Big" files - for (const auto& endp : CTX->hosts()) { - try { - CTX->log()->trace("{}() Sending RPC to host: {}", - __func__, endp.to_string()); + auto endp = CTX->hosts().at( + CTX->distributor()->locate_file_metadata(path)); + try { + LOG(DEBUG, "Sending RPC to host: {}", endp.to_string()); gkfs::rpc::remove::input in(path); + handles.emplace_back( + ld_network_service->post(endp,in)); - // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that - // we can retry for RPC_TRIES (see old commits with margo) - // TODO(amiranda): hermes will eventually provide a post(endpoint) - // returning one result and a broadcast(endpoint_set) returning a - // result_set. When that happens we can remove the .at(0) :/ - // - // + uint64_t chnk_start = 0; + uint64_t chnk_end = size/CHUNKSIZE; - handles.emplace_back( - ld_network_service->post(endp, in)); + for (uint64_t chnk_id = chnk_start; chnk_id <= chnk_end; chnk_id++) { + const auto target = CTX->hosts().at( + CTX->distributor()->locate_data(path, chnk_id)); - } catch (const std::exception& ex) { - // TODO(amiranda): we should cancel all previously posted requests - // here, unfortunately, Hermes does not support it yet :/ - CTX->log()->error("{}() Failed to send request to host: {}", - __func__, endp.to_string()); - throw std::runtime_error("Failed to forward non-blocking rpc request"); + LOG(DEBUG, "Sending RPC to host: {}", target.to_string()); + + handles.emplace_back( + ld_network_service->post(target, in)); + } + } catch (const std::exception & ex) { + LOG(ERROR, "Failed to send reduced remove requests"); + throw std::runtime_error( + "Failed to forward non-blocking rpc request"); + } + } + else { // "Big" files + for (const auto& endp : CTX->hosts()) { + try { + LOG(DEBUG, "Sending RPC to host: {}", endp.to_string()); + + gkfs::rpc::remove::input in(path); + + // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that + // we can retry for RPC_TRIES (see old commits with margo) + // TODO(amiranda): hermes will eventually provide a post(endpoint) + // returning one result and a broadcast(endpoint_set) returning a + // result_set. When that happens we can remove the .at(0) :/ + // + // + + handles.emplace_back( + ld_network_service->post(endp, in)); + + } catch (const std::exception& ex) { + // TODO(amiranda): we should cancel all previously posted requests + // here, unfortunately, Hermes does not support it yet :/ + LOG(ERROR, "Failed to send request to host: {}", + endp.to_string()); + throw std::runtime_error( + "Failed to forward non-blocking rpc request"); + } } } - } // wait for RPC responses bool got_error = false; @@ -226,13 +229,12 @@ int rm_node(const std::string& path, const bool remove_metadentry_only, const ss auto out = h.get().at(0); if(out.err() != 0) { - CTX->log()->error("{}() received error response: {}", - __func__, out.err()); + LOG(ERROR, "received error response: {}", out.err()); got_error = true; errno = out.err(); } } catch(const std::exception& ex) { - CTX->log()->error("{}() while getting rpc output", __func__); + LOG(ERROR, "while getting rpc output"); got_error = true; errno = EBUSY; } @@ -250,7 +252,7 @@ int update_metadentry(const string& path, const Metadata& md, const MetadentryUp try { - CTX->log()->debug("{}() Sending RPC ...", __func__); + LOG(DEBUG, "Sending RPC ..."); // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can // retry for RPC_TRIES (see old commits with margo) // TODO(amiranda): hermes will eventually provide a post(endpoint) @@ -277,7 +279,7 @@ int update_metadentry(const string& path, const Metadata& md, const MetadentryUp bool_to_merc_bool(md_flags.mtime), bool_to_merc_bool(md_flags.ctime)).get().at(0); - CTX->log()->debug("{}() Got response success: {}", __func__, out.err()); + LOG(DEBUG, "Got response success: {}", out.err()); if(out.err() != 0) { errno = out.err(); @@ -287,7 +289,7 @@ int update_metadentry(const string& path, const Metadata& md, const MetadentryUp return 0; } catch(const std::exception& ex) { - CTX->log()->error("{}() while getting rpc output", __func__); + LOG(ERROR, "while getting rpc output"); errno = EBUSY; return -1; } @@ -301,7 +303,7 @@ int update_metadentry_size(const string& path, const size_t size, const off64_t try { - CTX->log()->debug("{}() Sending RPC ...", __func__); + LOG(DEBUG, "Sending RPC ..."); // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can // retry for RPC_TRIES (see old commits with margo) // TODO(amiranda): hermes will eventually provide a post(endpoint) @@ -312,7 +314,7 @@ int update_metadentry_size(const string& path, const size_t size, const off64_t endp, path, size, offset, bool_to_merc_bool(append_flag)).get().at(0); - CTX->log()->debug("{}() Got response success: {}", __func__, out.err()); + LOG(DEBUG, "Got response success: {}", out.err()); if(out.err() != 0) { errno = out.err(); @@ -325,7 +327,7 @@ int update_metadentry_size(const string& path, const size_t size, const off64_t return 0; } catch(const std::exception& ex) { - CTX->log()->error("{}() while getting rpc output", __func__); + LOG(ERROR, "while getting rpc output"); errno = EBUSY; ret_size = 0; return EUNKNOWN; @@ -339,7 +341,7 @@ int get_metadentry_size(const std::string& path, off64_t& ret_size) { try { - CTX->log()->debug("{}() Sending RPC ...", __func__); + LOG(DEBUG, "Sending RPC ..."); // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can // retry for RPC_TRIES (see old commits with margo) // TODO(amiranda): hermes will eventually provide a post(endpoint) @@ -349,13 +351,13 @@ int get_metadentry_size(const std::string& path, off64_t& ret_size) { ld_network_service->post( endp, path).get().at(0); - CTX->log()->debug("{}() Got response success: {}", __func__, out.err()); + LOG(DEBUG, "Got response success: {}", out.err()); ret_size = out.ret_size(); return out.err(); } catch(const std::exception& ex) { - CTX->log()->error("{}() while getting rpc output", __func__); + LOG(ERROR, "while getting rpc output"); errno = EBUSY; ret_size = 0; return EUNKNOWN; @@ -367,7 +369,6 @@ int get_metadentry_size(const std::string& path, off64_t& ret_size) { */ void get_dirents(OpenDir& open_dir){ - CTX->log()->trace("{}() called", __func__); auto const root_dir = open_dir.path(); auto const targets = CTX->distributor()->locate_directory_metadata(root_dir); @@ -410,7 +411,7 @@ void get_dirents(OpenDir& open_dir){ for(std::size_t i = 0; i < targets.size(); ++i) { - CTX->log()->trace("{}() target_host: {}", __func__, targets[i]); + LOG(DEBUG, "target_host: {}", targets[i]); // Setup rpc input parameters for each host auto endp = CTX->hosts().at(targets[i]); @@ -419,14 +420,12 @@ void get_dirents(OpenDir& open_dir){ try { - CTX->log()->trace("{}() Sending RPC to host: {}", - __func__, targets[i]); + LOG(DEBUG, "Sending RPC to host: {}", targets[i]); handles.emplace_back( ld_network_service->post(endp, in)); } catch(const std::exception& ex) { - CTX->log()->error("{}() Unable to send non-blocking get_dirents " - "on {} to recipient {}", - __func__, root_dir, targets[i]); + LOG(ERROR, "Unable to send non-blocking get_dirents() " + "on {} [peer: {}]", root_dir, targets[i]); throw std::runtime_error("Failed to post non-blocking RPC request"); } } @@ -494,7 +493,7 @@ int mk_symlink(const std::string& path, const std::string& target_path) { try { - CTX->log()->debug("{}() Sending RPC ...", __func__); + LOG(DEBUG, "Sending RPC ..."); // TODO(amiranda): add a post() with RPC_TIMEOUT to hermes so that we can // retry for RPC_TRIES (see old commits with margo) // TODO(amiranda): hermes will eventually provide a post(endpoint) @@ -504,7 +503,7 @@ int mk_symlink(const std::string& path, const std::string& target_path) { ld_network_service->post( endp, path, target_path).get().at(0); - CTX->log()->debug("{}() Got response success: {}", __func__, out.err()); + LOG(DEBUG, "Got response success: {}", out.err()); if(out.err() != 0) { errno = out.err(); @@ -514,7 +513,7 @@ int mk_symlink(const std::string& path, const std::string& target_path) { return 0; } catch(const std::exception& ex) { - CTX->log()->error("{}() while getting rpc output", __func__); + LOG(ERROR, "while getting rpc output"); errno = EBUSY; return -1; } diff --git a/src/client/syscalls/detail/syscall_info.c b/src/client/syscalls/detail/syscall_info.c new file mode 100644 index 000000000..a56122cd7 --- /dev/null +++ b/src/client/syscalls/detail/syscall_info.c @@ -0,0 +1,498 @@ +/* + Copyright 2018-2019, Barcelona Supercomputing Center (BSC), Spain + Copyright 2015-2019, Johannes Gutenberg Universitaet Mainz, Germany + + This software was partially supported by the + EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). + + This software was partially supported by the + ADA-FS project under the SPPEXA project funded by the DFG. + + SPDX-License-Identifier: MIT +*/ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define SYSCALL(id, nargs, ret, ...) \ + [SYS_##id] = \ +{ \ + .s_nr = SYS_##id, \ + .s_name = #id, \ + .s_nargs = nargs, \ + .s_return_type = ret, \ + .s_args = {__VA_ARGS__} \ +} + +#define S_NOARGS() {0} + +#define S_UARG(t) \ +{ \ + .a_type = t, \ + .a_name = #t \ +} + +#define S_NARG(t, n) \ +{ \ + .a_type = t, \ + .a_name = n \ +} + +#define S_RET(t) \ +{ \ + .r_type = t \ +} + +/* Linux syscalls on x86_64 */ +const struct syscall_info syscall_table[] = { + SYSCALL(read, 3, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "buf"), S_NARG(arg, "count")), + SYSCALL(write, 3, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "buf"), S_NARG(arg, "count")), + SYSCALL(open, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(open_flags, "flags")), + SYSCALL(close, 1, S_RET(rdec), S_UARG(fd)), + SYSCALL(stat, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(ptr, "statbuf")), + SYSCALL(fstat, 2, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "statbuf")), + SYSCALL(lstat, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(ptr, "statbuf")), + SYSCALL(poll, 3, S_RET(rdec), S_NARG(ptr, "fds"), S_NARG(dec, "nfds"), S_NARG(dec, "timeout")), + SYSCALL(lseek, 3, S_RET(rdec), S_UARG(fd), S_UARG(offset), S_UARG(whence)), + SYSCALL(mmap, 6, S_RET(rptr), S_NARG(ptr, "addr"), S_NARG(dec, "length"), S_NARG(mmap_prot, "prot"), S_NARG(mmap_flags, "flags"), S_UARG(fd), S_UARG(offset)), + SYSCALL(mprotect, 3, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length"), S_NARG(mmap_prot, "prot")), + SYSCALL(munmap, 2, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length")), + SYSCALL(brk, 1, S_RET(rdec), S_NARG(ptr, "addr")), + SYSCALL(rt_sigaction, 4, S_RET(rdec), S_NARG(signum, "signum"), S_NARG(ptr, "act"), S_NARG(ptr, "oldact"), S_NARG(dec, "sigsetsize")), + SYSCALL(rt_sigprocmask, 4, S_RET(rdec), S_NARG(sigproc_how, "how"), S_NARG(ptr, "set"), S_NARG(ptr, "oldset"), S_NARG(dec, "sigsetsize")), + SYSCALL(rt_sigreturn, 0, S_RET(rnone), S_NOARGS()), + SYSCALL(ioctl, 3, S_RET(rdec), S_UARG(fd), S_NARG(arg, "cmd"), S_NARG(arg, "argp")), + SYSCALL(pread64, 4, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "buf"), S_NARG(arg, "count"), S_UARG(offset)), + SYSCALL(pwrite64, 4, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "buf"), S_NARG(arg, "count"), S_UARG(offset)), + SYSCALL(readv, 3, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "iov"), S_NARG(dec, "iovcnt")), + SYSCALL(writev, 3, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "iov"), S_NARG(dec, "iovcnt")), + SYSCALL(access, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(octal_mode, "mode")), + SYSCALL(pipe, 1, S_RET(rdec), S_NARG(ptr, "pipefd")), + SYSCALL(select, 5, S_RET(rdec), S_NARG(dec, "nfds"), S_NARG(ptr, "readfds"), S_NARG(ptr, "writefds"), S_NARG(ptr, "exceptfds"), S_NARG(ptr, "timeout")), + SYSCALL(sched_yield, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(mremap, 5, S_RET(rdec), S_NARG(ptr, "old_address"), S_NARG(dec, "old_size"), S_NARG(dec, "new_size"), S_NARG(arg, "flags"), S_NARG(ptr, "new_addr")), + SYSCALL(msync, 3, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length"), S_NARG(arg, "flags")), + SYSCALL(mincore, 3, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length"), S_NARG(ptr, "vec")), + SYSCALL(madvise, 3, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length"), S_NARG(arg, "behavior")), + SYSCALL(shmget, 3, S_RET(rdec), S_NARG(arg, "key"), S_NARG(dec, "size"), S_NARG(arg, "flag")), + SYSCALL(shmat, 3, S_RET(rdec), S_NARG(arg, "shmid"), S_NARG(ptr, "shmaddr"), S_NARG(arg, "shmflg")), + SYSCALL(shmctl, 3, S_RET(rdec), S_NARG(arg, "shmid"), S_NARG(arg, "cmd"), S_NARG(ptr, "buf")), + SYSCALL(dup, 1, S_RET(rdec), S_NARG(fd, "oldfd")), + SYSCALL(dup2, 2, S_RET(rdec), S_NARG(fd, "oldfd"), S_NARG(fd, "newfd")), + SYSCALL(pause, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(nanosleep, 2, S_RET(rdec), S_NARG(ptr, "rqtp"), S_NARG(ptr, "rmtp")), + SYSCALL(getitimer, 2, S_RET(rdec), S_NARG(arg, "which"), S_NARG(ptr, "value")), + SYSCALL(alarm, 1, S_RET(rdec), S_NARG(dec, "seconds")), + SYSCALL(setitimer, 3, S_RET(rdec), S_NARG(arg, "which"), S_NARG(ptr, "value"), S_NARG(ptr, "ovalue")), + SYSCALL(getpid, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(sendfile, 4, S_RET(rdec), S_NARG(fd, "out_fd"), S_NARG(fd, "in_fd"), S_NARG(ptr, "offset"), S_NARG(arg, "count")), + SYSCALL(socket, 3, S_RET(rdec), S_NARG(arg, "domain"), S_NARG(arg, "type"), S_NARG(arg, "protocol")), + SYSCALL(connect, 3, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "addr"), S_NARG(arg, "addrlen")), + SYSCALL(accept, 3, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "addr"), S_NARG(ptr, "addrlen")), + SYSCALL(sendto, 5, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "dest_addr"), S_NARG(arg, "len"), S_NARG(ptr, "addr"), S_NARG(arg, "addrlen")), + SYSCALL(recvfrom, 5, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "src_addr"), S_NARG(arg, "len"), S_NARG(ptr, "addr"), S_NARG(ptr, "addrlen")), + SYSCALL(sendmsg, 3, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "msg"), S_NARG(arg, "flags")), + SYSCALL(recvmsg, 3, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "msg"), S_NARG(arg, "flags")), + SYSCALL(shutdown, 2, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(arg, "how")), + SYSCALL(bind, 3, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "addr"), S_NARG(arg, "addrlen")), + SYSCALL(listen, 2, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(arg, "backlog")), + SYSCALL(getsockname, 3, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "addr"), S_NARG(ptr, "addrlen")), + SYSCALL(getpeername, 3, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "addr"), S_NARG(ptr, "addrlen")), + SYSCALL(socketpair, 4, S_RET(rdec), S_NARG(arg, "domain"), S_NARG(arg, "type"), S_NARG(arg, "protocol"), S_NARG(ptr, "sv")), + SYSCALL(setsockopt, 5, S_RET(rdec), S_UARG(fd), S_NARG(arg, "level"), S_NARG(arg, "optname"), S_NARG(ptr, "optval"), S_NARG(arg, "optlen")), + SYSCALL(getsockopt, 5, S_RET(rdec), S_UARG(fd), S_NARG(arg, "level"), S_NARG(arg, "optname"), S_NARG(ptr, "optval"), S_NARG(ptr, "optlen")), + SYSCALL(clone, 5, S_RET(rdec), S_NARG(clone_flags, "flags"), S_NARG(ptr, "child_stack"), S_NARG(ptr, "ptid"), S_NARG(ptr, "ctid"), S_NARG(ptr, "newtls")), + SYSCALL(fork, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(vfork, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(execve, 3, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(ptr, "argv"), S_NARG(ptr, "envp")), + SYSCALL(exit, 1, S_RET(rnone), S_NARG(dec, "status")), + SYSCALL(wait4, 4, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "stat_addr"), S_NARG(arg, "options"), S_NARG(ptr, "rusage")), + SYSCALL(kill, 2, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(signum, "sig")), + SYSCALL(uname, 1, S_RET(rdec), S_NARG(ptr, "buf")), + SYSCALL(semget, 3, S_RET(rdec), S_NARG(arg, "key"), S_NARG(dec, "nsems"), S_NARG(arg, "semflg")), + SYSCALL(semop, 3, S_RET(rdec), S_NARG(dec, "semid"), S_NARG(ptr, "sops"), S_NARG(arg, "nsops")), + SYSCALL(semctl, 4, S_RET(rdec), S_NARG(dec, "semid"), S_NARG(dec, "semnum"), S_NARG(arg, "cmd"), S_NARG(arg, "arg")), + SYSCALL(shmdt, 1, S_RET(rdec), S_NARG(ptr, "shmaddr")), + SYSCALL(msgget, 2, S_RET(rdec), S_NARG(arg, "key"), S_NARG(arg, "msflg")), + SYSCALL(msgsnd, 4, S_RET(rdec), S_NARG(arg, "msqid"), S_NARG(ptr, "msgp"), S_NARG(dec, "msgsz"), S_NARG(arg, "msflg")), + SYSCALL(msgrcv, 5, S_RET(rdec), S_NARG(arg, "msqid"), S_NARG(ptr, "msgp"), S_NARG(dec, "msgsz"), S_NARG(arg, "msgtyp"), S_NARG(arg, "msflg")), + SYSCALL(msgctl, 3, S_RET(rdec), S_NARG(arg, "msqid"), S_NARG(arg, "cmd"), S_NARG(ptr, "buf")), + SYSCALL(fcntl, 3, S_RET(rdec), S_UARG(fd), S_NARG(arg, "cmd"), S_NARG(arg, "arg")), + SYSCALL(flock, 2, S_RET(rdec), S_UARG(fd), S_NARG(arg, "cmd")), + SYSCALL(fsync, 1, S_RET(rdec), S_UARG(fd)), + SYSCALL(fdatasync, 2, S_RET(rdec), S_UARG(fd)), + SYSCALL(truncate, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(arg, "length")), + SYSCALL(ftruncate, 2, S_RET(rdec), S_UARG(fd), S_NARG(offset, "length")), + SYSCALL(getdents, 3, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "dirent"), S_NARG(arg, "count")), + SYSCALL(getcwd, 2, S_RET(rdec), S_NARG(ptr, "buf"), S_NARG(dec, "size")), + SYSCALL(chdir, 1, S_RET(rdec), S_NARG(cstr, "pathname")), + SYSCALL(fchdir, 1, S_RET(rdec), S_UARG(fd)), + SYSCALL(rename, 2, S_RET(rdec), S_NARG(cstr, "oldpath"), S_NARG(cstr, "newpath")), + SYSCALL(mkdir, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(octal_mode, "mode")), + SYSCALL(rmdir, 1, S_RET(rdec), S_NARG(cstr, "pathname")), + SYSCALL(creat, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(octal_mode, "mode")), + SYSCALL(link, 2, S_RET(rdec), S_NARG(cstr, "oldpath"), S_NARG(cstr, "newpath")), + SYSCALL(unlink, 1, S_RET(rdec), S_NARG(cstr, "pathname")), + SYSCALL(symlink, 2, S_RET(rdec), S_NARG(cstr, "target"), S_NARG(cstr, "linkpath")), + SYSCALL(readlink, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(ptr, "buf"), S_NARG(arg, "bufsiz")), + SYSCALL(chmod, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(octal_mode, "mode")), + SYSCALL(fchmod, 2, S_RET(rdec), S_UARG(fd), S_NARG(octal_mode, "mode")), + SYSCALL(chown, 3, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(dec, "user"), S_NARG(dec, "group")), + SYSCALL(fchown, 3, S_RET(rdec), S_UARG(fd), S_NARG(dec, "user"), S_NARG(dec, "group")), + SYSCALL(lchown, 3, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(dec, "user"), S_NARG(dec, "group")), + SYSCALL(umask, 1, S_RET(rdec), S_NARG(arg, "mask")), + SYSCALL(gettimeofday, 2, S_RET(rdec), S_NARG(ptr, "tv"), S_NARG(ptr, "tz")), + SYSCALL(getrlimit, 2, S_RET(rdec), S_NARG(arg, "resource"), S_NARG(ptr, "rlim")), + SYSCALL(getrusage, 2, S_RET(rdec), S_NARG(arg, "who"), S_NARG(ptr, "ru")), + SYSCALL(sysinfo, 1, S_RET(rdec), S_NARG(ptr, "info")), + SYSCALL(times, 1, S_RET(rdec), S_NARG(ptr, "tbuf")), + SYSCALL(ptrace, 4, S_RET(rdec), S_NARG(arg, "request"), S_NARG(dec, "pid"), S_NARG(ptr, "addr"), S_NARG(ptr, "data")), + SYSCALL(getuid, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(syslog, 3, S_RET(rdec), S_NARG(arg, "type"), S_NARG(ptr, "buf"), S_NARG(arg, "length")), + SYSCALL(getgid, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(setuid, 1, S_RET(rdec), S_NARG(dec, "uid")), + SYSCALL(setgid, 1, S_RET(rdec), S_NARG(dec, "gid")), + SYSCALL(geteuid, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(getegid, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(setpgid, 2, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(dec, "pgid")), + SYSCALL(getppid, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(getpgrp, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(setsid, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(setreuid, 2, S_RET(rdec), S_NARG(dec, "ruid"), S_NARG(dec, "euid")), + SYSCALL(setregid, 2, S_RET(rdec), S_NARG(dec, "rgid"), S_NARG(dec, "egid")), + SYSCALL(getgroups, 2, S_RET(rdec), S_NARG(arg, "gidsetsize"), S_NARG(ptr, "grouplist")), + SYSCALL(setgroups, 2, S_RET(rdec), S_NARG(arg, "gidsetsize"), S_NARG(ptr, "grouplist")), + SYSCALL(setresuid, 3, S_RET(rdec), S_NARG(dec, "ruid"), S_NARG(dec, "euid"), S_NARG(dec, "suid")), + SYSCALL(getresuid, 3, S_RET(rdec), S_NARG(ptr, "ruid"), S_NARG(ptr, "euid"), S_NARG(ptr, "suid")), + SYSCALL(setresgid, 3, S_RET(rdec), S_NARG(dec, "rgid"), S_NARG(dec, "egid"), S_NARG(dec, "sgid")), + SYSCALL(getresgid, 3, S_RET(rdec), S_NARG(ptr, "rgid"), S_NARG(ptr, "egid"), S_NARG(ptr, "sgid")), + SYSCALL(getpgid, 1, S_RET(rdec), S_NARG(dec, "pid")), + SYSCALL(setfsuid, 1, S_RET(rdec), S_NARG(dec, "uid")), + SYSCALL(setfsgid, 1, S_RET(rdec), S_NARG(dec, "gid")), + SYSCALL(getsid, 1, S_RET(rdec), S_NARG(dec, "pid")), + SYSCALL(capget, 2, S_RET(rdec), S_NARG(ptr, "header"), S_NARG(ptr, "datap")), + SYSCALL(capset, 2, S_RET(rdec), S_NARG(ptr, "header"), S_NARG(ptr, "datap")), + SYSCALL(rt_sigpending, 2, S_RET(rdec), S_NARG(ptr, "set"), S_NARG(dec, "sigsetsize")), + SYSCALL(rt_sigtimedwait, 4, S_RET(rdec), S_NARG(ptr, "uthese"), S_NARG(ptr, "uinfo"), S_NARG(ptr, "uts"), S_NARG(dec, "sigsetsize")), + SYSCALL(rt_sigqueueinfo, 4, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(signum, "sig"), S_NARG(ptr, "uinfo")), + SYSCALL(rt_sigsuspend, 2, S_RET(rdec), S_NARG(ptr, "unewset"), S_NARG(dec, "sigsetsize")), + SYSCALL(sigaltstack, 2, S_RET(rdec), S_NARG(ptr, "ss"), S_NARG(ptr, "old_ss")), + SYSCALL(utime, 2, S_RET(rdec), S_NARG(cstr, "filename"), S_NARG(ptr, "times")), + SYSCALL(mknod, 3, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(octal_mode, "mode"), S_NARG(arg, "dev")), + SYSCALL(uselib, 1, S_RET(rdec), S_NARG(cstr, "library")), + SYSCALL(personality, 1, S_RET(rdec), S_NARG(arg, "personality")), + SYSCALL(ustat, 2, S_RET(rdec), S_NARG(arg, "dev"), S_NARG(ptr, "ubuf")), + SYSCALL(statfs, 2, S_RET(rdec), S_NARG(cstr, "path"), S_NARG(ptr, "buf")), + SYSCALL(fstatfs, 2, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "buf")), + SYSCALL(sysfs, 3, S_RET(rdec), S_NARG(arg, "option"), S_NARG(ptr, "arg1"), S_NARG(ptr, "arg2")), + SYSCALL(getpriority, 2, S_RET(rdec), S_NARG(arg, "which"), S_NARG(arg, "who")), + SYSCALL(setpriority, 3, S_RET(rdec), S_NARG(arg, "which"), S_NARG(arg, "who"), S_NARG(arg, "prio")), + SYSCALL(sched_setparam, 2, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "param")), + SYSCALL(sched_getparam, 2, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "param")), + SYSCALL(sched_setscheduler, 3, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(arg, "policy"), S_NARG(ptr, "param")), + SYSCALL(sched_getscheduler, 1, S_RET(rdec), S_NARG(dec, "pid")), + SYSCALL(sched_get_priority_max, 1, S_RET(rdec), S_NARG(arg, "policy")), + SYSCALL(sched_get_priority_min, 1, S_RET(rdec), S_NARG(arg, "policy")), + SYSCALL(sched_rr_get_interval, 2, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "interval")), + SYSCALL(mlock, 2, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length")), + SYSCALL(munlock, 2, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length")), + SYSCALL(mlockall, 1, S_RET(rdec), S_NARG(arg, "flags")), + SYSCALL(munlockall, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(vhangup, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(modify_ldt, 3, S_RET(rdec), S_NARG(arg, "func"), S_NARG(ptr, "ptr"), S_NARG(arg, "bytecount")), + SYSCALL(pivot_root, 2, S_RET(rdec), S_NARG(cstr, "new_root"), S_NARG(cstr, "put_old")), + SYSCALL(_sysctl, 1, S_RET(rdec), S_NARG(ptr, "args")), + SYSCALL(prctl, 5, S_RET(rdec), S_NARG(arg, "option"), S_NARG(arg, "arg2"), S_NARG(arg, "arg3"), S_NARG(arg, "arg4"), S_NARG(arg, "arg5")), + SYSCALL(arch_prctl, 2, S_RET(rdec), S_NARG(arg, "code"), S_NARG(arg, "addr")), + SYSCALL(adjtimex, 1, S_RET(rdec), S_NARG(ptr, "txc_p")), + SYSCALL(setrlimit, 2, S_RET(rdec), S_NARG(arg, "resource"), S_NARG(ptr, "rlim")), + SYSCALL(chroot, 1, S_RET(rdec), S_NARG(cstr, "pathname")), + SYSCALL(sync, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(acct, 1, S_RET(rdec), S_NARG(cstr, "pathname")), + SYSCALL(settimeofday, 2, S_RET(rdec), S_NARG(ptr, "tv"), S_NARG(ptr, "tz")), + SYSCALL(mount, 5, S_RET(rdec), S_NARG(cstr, "dev_name"), S_NARG(cstr, "dir_name"), S_NARG(cstr, "type"), S_NARG(arg, "flags"), S_NARG(ptr, "data")), + SYSCALL(umount2, 2, S_RET(rdec), S_NARG(cstr, "target"), S_NARG(arg, "flags")), + SYSCALL(swapon, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(arg, "swap_flags")), + SYSCALL(swapoff, 1, S_RET(rdec), S_NARG(cstr, "pathname")), + SYSCALL(reboot, 4, S_RET(rdec), S_NARG(arg, "magic1"), S_NARG(arg, "magic2"), S_NARG(arg, "cmd"), S_NARG(ptr, "arg")), + SYSCALL(sethostname, 2, S_RET(rdec), S_NARG(cstr, "name"), S_NARG(arg, "length")), + SYSCALL(setdomainname, 2, S_RET(rdec), S_NARG(cstr, "name"), S_NARG(arg, "length")), + SYSCALL(iopl, 1, S_RET(rdec), S_NARG(arg, "level")), + SYSCALL(ioperm, 3, S_RET(rdec), S_NARG(arg, "from"), S_NARG(arg, "num"), S_NARG(arg, "on")), + SYSCALL(create_module, 2, S_RET(rdec), S_NARG(cstr, "name"), S_NARG(arg, "size")), + SYSCALL(init_module, 3, S_RET(rdec), S_NARG(ptr, "module_image"), S_NARG(dec, "length"), S_NARG(cstr, "param_values")), + SYSCALL(delete_module, 2, S_RET(rdec), S_NARG(cstr, "name"), S_NARG(arg, "flags")), + SYSCALL(get_kernel_syms, 1, S_RET(rdec), S_NARG(ptr, "table")), + SYSCALL(query_module, 5, S_RET(rdec), S_NARG(cstr, "name"), S_NARG(arg, "which"), S_NARG(ptr, "buf"), S_NARG(arg, "bufsize"), S_NARG(ptr, "ret")), + SYSCALL(quotactl, 4, S_RET(rdec), S_NARG(arg, "cmd"), S_NARG(cstr, "special"), S_NARG(arg, "id"), S_NARG(ptr, "addr")), + SYSCALL(nfsservctl, 3, S_RET(rdec), S_NARG(arg, "cmd"), S_NARG(ptr, "argp"), S_NARG(ptr, "resp")), + SYSCALL(getpmsg, 5, S_RET(rdec), S_NARG(arg, "arg0"), S_NARG(arg, "arg1"), S_NARG(arg, "arg2"), S_NARG(arg, "arg3"), S_NARG(arg, "arg4")), + SYSCALL(putpmsg, 5, S_RET(rdec), S_NARG(arg, "arg0"), S_NARG(arg, "arg1"), S_NARG(arg, "arg2"), S_NARG(arg, "arg3"), S_NARG(arg, "arg4")), + SYSCALL(afs_syscall, 5, S_RET(rdec), S_NARG(arg, "arg0"), S_NARG(arg, "arg1"), S_NARG(arg, "arg2"), S_NARG(arg, "arg3"), S_NARG(arg, "arg4")), + SYSCALL(tuxcall, 3, S_RET(rdec), S_NARG(arg, "arg0"), S_NARG(arg, "arg1"), S_NARG(arg, "arg2")), + SYSCALL(security, 3, S_RET(rdec), S_NARG(arg, "arg0"), S_NARG(arg, "arg1"), S_NARG(arg, "arg2")), + SYSCALL(gettid, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(readahead, 3, S_RET(rdec), S_UARG(fd), S_UARG(offset), S_NARG(arg, "count")), + SYSCALL(setxattr, 5, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(cstr, "pathname"), S_NARG(ptr, "value"), S_NARG(dec, "size"), S_NARG(arg, "flags")), + SYSCALL(lsetxattr, 5, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(cstr, "pathname"), S_NARG(ptr, "value"), S_NARG(dec, "size"), S_NARG(arg, "flags")), + SYSCALL(fsetxattr, 5, S_RET(rdec), S_UARG(fd), S_NARG(cstr, "pathname"), S_NARG(ptr, "value"), S_NARG(dec, "size"), S_NARG(arg, "flags")), + SYSCALL(getxattr, 4, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(cstr, "pathname"), S_NARG(ptr, "value"), S_NARG(dec, "size")), + SYSCALL(lgetxattr, 4, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(cstr, "pathname"), S_NARG(ptr, "value"), S_NARG(dec, "size")), + SYSCALL(fgetxattr, 4, S_RET(rdec), S_UARG(fd), S_NARG(cstr, "pathname"), S_NARG(ptr, "value"), S_NARG(dec, "size")), + SYSCALL(listxattr, 3, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(ptr, "list"), S_NARG(dec, "size")), + SYSCALL(llistxattr, 3, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(ptr, "list"), S_NARG(dec, "size")), + SYSCALL(flistxattr, 3, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "list"), S_NARG(dec, "size")), + SYSCALL(removexattr, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(cstr, "pathname")), + SYSCALL(lremovexattr, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(cstr, "pathname")), + SYSCALL(fremovexattr, 2, S_RET(rdec), S_UARG(fd), S_NARG(cstr, "pathname")), + SYSCALL(tkill, 2, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(signum, "sig")), + SYSCALL(time, 1, S_RET(rdec), S_NARG(ptr, "tloc")), + SYSCALL(futex, 6, S_RET(rdec), S_NARG(ptr, "uaddr"), S_NARG(arg, "op"), S_NARG(arg, "val"), S_NARG(ptr, "utime"), S_NARG(ptr, "uaddr2"), S_NARG(arg, "val3")), + SYSCALL(sched_setaffinity, 3, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(arg, "length"), S_NARG(ptr, "mask")), + SYSCALL(sched_getaffinity, 3, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(arg, "length"), S_NARG(ptr, "mask")), + SYSCALL(set_thread_area, 1, S_RET(rdec), S_NARG(ptr, "u_info")), + SYSCALL(io_setup, 2, S_RET(rdec), S_NARG(dec, "nr_reqs"), S_NARG(ptr, "ctx")), + SYSCALL(io_destroy, 1, S_RET(rdec), S_NARG(ptr, "ctx")), + SYSCALL(io_getevents, 5, S_RET(rdec), S_NARG(ptr, "ctx_id"), S_NARG(dec, "min_nr"), S_NARG(dec, "nr"), S_NARG(ptr, "events"), S_NARG(ptr, "timeout")), + SYSCALL(io_submit, 3, S_RET(rdec), S_NARG(ptr, "ctx_id"), S_NARG(dec, "nr"), S_NARG(ptr, "iocbpp")), + SYSCALL(io_cancel, 3, S_RET(rdec), S_NARG(ptr, "ctx_id"), S_NARG(ptr, "iocb"), S_NARG(ptr, "result")), + SYSCALL(get_thread_area, 1, S_RET(rdec), S_NARG(ptr, "u_info")), + SYSCALL(lookup_dcookie, 3, S_RET(rdec), S_NARG(arg, "cookie64"), S_NARG(ptr, "buf"), S_NARG(dec, "length")), + SYSCALL(epoll_create, 3, S_RET(rdec), S_NARG(arg, "size")), + SYSCALL(epoll_ctl_old, 4, S_RET(rdec), S_NARG(arg, "arg0"), S_NARG(arg, "arg1"), S_NARG(arg, "arg2"), S_NARG(arg, "arg3")), + SYSCALL(epoll_wait_old, 4, S_RET(rdec), S_NARG(arg, "arg0"), S_NARG(arg, "arg1"), S_NARG(arg, "arg2"), S_NARG(arg, "arg3")), + SYSCALL(remap_file_pages, 5, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "size"), S_NARG(mmap_prot, "prot"), S_NARG(dec, "pgoff"), S_NARG(arg, "flags")), + SYSCALL(getdents64, 3, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "dirent"), S_NARG(arg, "count")), + SYSCALL(set_tid_address, 1, S_RET(rdec), S_NARG(ptr, "tidptr")), + SYSCALL(restart_syscall, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(semtimedop, 4, S_RET(rdec), S_NARG(dec, "semid"), S_NARG(ptr, "sops"), S_NARG(arg, "nsops"), S_NARG(ptr, "timeout")), + SYSCALL(fadvise64, 4, S_RET(rdec), S_UARG(fd), S_UARG(offset), S_NARG(dec, "length"), S_NARG(arg, "advice")), + SYSCALL(timer_create, 3, S_RET(rdec), S_NARG(arg, "which_clock"), S_NARG(ptr, "timer_event_spec"), S_NARG(ptr, "created_timer_id")), + SYSCALL(timer_settime, 4, S_RET(rdec), S_NARG(arg, "timer_id"), S_NARG(arg, "flags"), S_NARG(ptr, "new_setting"), S_NARG(ptr, "old_setting")), + SYSCALL(timer_gettime, 2, S_RET(rdec), S_NARG(arg, "timer_id"), S_NARG(ptr, "setting")), + SYSCALL(timer_getoverrun, 1, S_RET(rdec), S_NARG(arg, "timer_id")), + SYSCALL(timer_delete, 1, S_RET(rdec), S_NARG(arg, "timer_id")), + SYSCALL(clock_settime, 2, S_RET(rdec), S_NARG(arg, "which_clock"), S_NARG(ptr, "tp")), + SYSCALL(clock_gettime, 2, S_RET(rdec), S_NARG(arg, "which_clock"), S_NARG(ptr, "tp")), + SYSCALL(clock_getres, 2, S_RET(rdec), S_NARG(arg, "which_clock"), S_NARG(ptr, "tp")), + SYSCALL(clock_nanosleep, 4, S_RET(rdec), S_NARG(arg, "which_clock"), S_NARG(arg, "flags"), S_NARG(ptr, "rqtp"), S_NARG(ptr, "rmtp")), + SYSCALL(exit_group, 1, S_RET(rnone), S_NARG(dec, "status")), + SYSCALL(epoll_wait, 4, S_RET(rdec), S_NARG(dec, "epfd"), S_NARG(ptr, "events"), S_NARG(dec, "maxevents"), S_NARG(dec32, "timeout")), + SYSCALL(epoll_ctl, 4, S_RET(rdec), S_NARG(dec, "epfd"), S_NARG(arg, "op"), S_UARG(fd), S_NARG(ptr, "event")), + SYSCALL(tgkill, 3, S_RET(rdec), S_NARG(arg, "tgid"), S_NARG(dec, "pid"), S_NARG(signum, "sig")), + SYSCALL(utimes, 2, S_RET(rdec), S_NARG(cstr, "filename"), S_NARG(ptr, "utimes")), + SYSCALL(vserver, 5, S_RET(rdec), S_NARG(arg, "arg0"), S_NARG(arg, "arg1"), S_NARG(arg, "arg2"), S_NARG(arg, "arg3"), S_NARG(arg, "arg4")), + SYSCALL(mbind, 6, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length"), S_NARG(octal_mode, "mode"), S_NARG(ptr, "nmask"), S_NARG(arg, "maxnode"), S_NARG(arg, "flags")), + SYSCALL(set_mempolicy, 3, S_RET(rdec), S_NARG(octal_mode, "mode"), S_NARG(ptr, "nmask"), S_NARG(arg, "maxnode")), + SYSCALL(get_mempolicy, 5, S_RET(rdec), S_NARG(ptr, "policy"), S_NARG(ptr, "nmask"), S_NARG(arg, "maxnode"), S_NARG(ptr, "addr"), S_NARG(arg, "flags")), + SYSCALL(mq_open, 4, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(open_flags, "oflag"), S_NARG(octal_mode, "mode"), S_NARG(ptr, "attr")), + SYSCALL(mq_unlink, 1, S_RET(rdec), S_NARG(cstr, "pathname")), + SYSCALL(mq_timedsend, 5, S_RET(rdec), S_NARG(arg, "mqdes"), S_NARG(cstr, "msg_ptr"), S_NARG(dec, "msg_len"), S_NARG(arg, "msg_prio"), S_NARG(ptr, "abs_timeout")), + SYSCALL(mq_timedreceive, 5, S_RET(rdec), S_NARG(arg, "mqdes"), S_NARG(ptr, "msg_ptr"), S_NARG(dec, "msg_len"), S_NARG(ptr, "msg_prio"), S_NARG(ptr, "abs_timeout")), + SYSCALL(mq_notify, 2, S_RET(rdec), S_NARG(arg, "mqdes"), S_NARG(ptr, "notification")), + SYSCALL(mq_getsetattr, 3, S_RET(rdec), S_NARG(arg, "mqdes"), S_NARG(ptr, "mqstat"), S_NARG(ptr, "omqstat")), + SYSCALL(kexec_load, 4, S_RET(rdec), S_NARG(arg, "entry"), S_NARG(arg, "nr_segments"), S_NARG(ptr, "segments"), S_NARG(arg, "flags")), + SYSCALL(waitid, 5, S_RET(rdec), S_NARG(arg, "which"), S_NARG(dec, "pid"), S_NARG(ptr, "infop"), S_NARG(arg, "options"), S_NARG(ptr, "ru")), + SYSCALL(add_key, 5, S_RET(rdec), S_NARG(cstr, "type"), S_NARG(cstr, "description"), S_NARG(ptr, "payload"), S_NARG(dec, "plen"), S_NARG(arg, "destringid")), + SYSCALL(request_key, 4, S_RET(rdec), S_NARG(cstr, "type"), S_NARG(cstr, "description"), S_NARG(cstr, "callout_info"), S_NARG(arg, "destringid")), + SYSCALL(keyctl, 5, S_RET(rdec), S_NARG(arg, "cmd"), S_NARG(arg, "arg2"), S_NARG(arg, "arg3"), S_NARG(arg, "arg4"), S_NARG(arg, "arg5")), + SYSCALL(ioprio_set, 3, S_RET(rdec), S_NARG(arg, "which"), S_NARG(arg, "who"), S_NARG(dec, "ioprio")), + SYSCALL(ioprio_get, 2, S_RET(rdec), S_NARG(arg, "which"), S_NARG(arg, "who")), + SYSCALL(inotify_init, 0, S_RET(rdec), S_NOARGS()), + SYSCALL(inotify_add_watch, 3, S_RET(rdec), S_UARG(fd), S_NARG(cstr, "pathname"), S_NARG(arg, "mask")), + SYSCALL(inotify_rm_watch, 2, S_RET(rdec), S_UARG(fd), S_NARG(dec, "wd")), + SYSCALL(migrate_pages, 4, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(arg, "maxnode"), S_NARG(ptr, "from"), S_NARG(ptr, "to")), + SYSCALL(openat, 3, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(open_flags, "flags")), + SYSCALL(mkdirat, 3, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(octal_mode, "mode")), + SYSCALL(mknodat, 4, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "filename"), S_NARG(octal_mode, "mode"), S_NARG(arg, "dev")), + SYSCALL(fchownat, 5, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(dec, "user"), S_NARG(dec, "group"), S_NARG(arg, "flag")), + SYSCALL(futimesat, 3, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(ptr, "utimes")), + SYSCALL(newfstatat, 4, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(ptr, "statbuf"), S_NARG(arg, "flag")), + SYSCALL(unlinkat, 3, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(arg, "flag")), + SYSCALL(renameat, 4, S_RET(rdec), S_NARG(atfd, "olddfd"), S_NARG(cstr, "oldname"), S_NARG(atfd, "newdfd"), S_NARG(cstr, "newname")), + SYSCALL(linkat, 5, S_RET(rdec), S_NARG(atfd, "olddfd"), S_NARG(cstr, "oldpath"), S_NARG(atfd, "newdfd"), S_NARG(cstr, "newpath"), S_NARG(arg, "flags")), + SYSCALL(symlinkat, 3, S_RET(rdec), S_NARG(cstr, "oldname"), S_NARG(atfd, "newdfd"), S_NARG(cstr, "newname")), + SYSCALL(readlinkat, 4, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(ptr, "buf"), S_NARG(arg, "bufsiz")), + SYSCALL(fchmodat, 3, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "filename"), S_NARG(octal_mode, "mode")), + SYSCALL(faccessat, 3, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(octal_mode, "mode")), + SYSCALL(pselect6, 6, S_RET(rdec), S_NARG(dec, "nfds"), S_NARG(ptr, "readfds"), S_NARG(ptr, "writefds"), S_NARG(ptr, "exceptfds"), S_NARG(ptr, "timeval"), S_NARG(ptr, "sigmask")), + SYSCALL(ppoll, 5, S_RET(rdec), S_NARG(ptr, "fds"), S_NARG(dec, "nfds"), S_NARG(ptr, "tmo_p"), S_NARG(ptr, "sigmask"), S_NARG(dec, "sigsetsize")), + SYSCALL(unshare, 1, S_RET(rdec), S_NARG(arg, "unshare_flags")), + SYSCALL(set_robust_list, 2, S_RET(rdec), S_NARG(ptr, "head"), S_NARG(dec, "length")), + SYSCALL(get_robust_list, 3, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "head_ptr"), S_NARG(ptr, "len_ptr")), + SYSCALL(splice, 6, S_RET(rdec), S_NARG(dec, "fd_in"), S_NARG(ptr, "off_in"), S_NARG(dec, "fd_out"), S_NARG(ptr, "off_out"), S_NARG(dec, "length"), S_NARG(arg, "flags")), + SYSCALL(tee, 4, S_RET(rdec), S_NARG(dec, "fd_in"), S_NARG(dec, "fd_out"), S_NARG(dec, "length"), S_NARG(arg, "flags")), + SYSCALL(sync_file_range, 4, S_RET(rdec), S_UARG(fd), S_UARG(offset), S_NARG(offset, "nbytes"), S_NARG(arg, "flags")), + SYSCALL(vmsplice, 4, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "iov"), S_NARG(arg, "nr_segs"), S_NARG(arg, "flags")), + SYSCALL(move_pages, 6, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(arg, "nr_pages"), S_NARG(ptr, "pages"), S_NARG(ptr, "nodes"), S_NARG(ptr, "status"), S_NARG(arg, "flags")), + SYSCALL(utimensat, 4, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(ptr, "utimes"), S_NARG(arg, "flags")), + SYSCALL(epoll_pwait, 6, S_RET(rdec), S_NARG(fd, "epfd"), S_NARG(ptr, "events"), S_NARG(dec, "maxevents"), S_NARG(dec, "timeout"), S_NARG(ptr, "sigmask"), S_NARG(dec, "sigsetsize")), + SYSCALL(signalfd, 3, S_RET(rdec), S_NARG(dec, "ufd"), S_NARG(ptr, "user_mask"), S_NARG(dec, "sizemask")), + SYSCALL(timerfd_create, 2, S_RET(rdec), S_NARG(dec, "clockid"), S_NARG(arg, "flags")), + SYSCALL(eventfd, 1, S_RET(rdec), S_NARG(arg, "count")), + SYSCALL(fallocate, 4, S_RET(rdec), S_UARG(fd), S_NARG(octal_mode, "mode"), S_UARG(offset), S_NARG(offset, "length")), + SYSCALL(timerfd_settime, 4, S_RET(rdec), S_NARG(fd, "ufd"), S_NARG(arg, "flags"), S_NARG(ptr, "utmr"), S_NARG(ptr, "otmr")), + SYSCALL(timerfd_gettime, 2, S_RET(rdec), S_NARG(fd, "ufd"), S_NARG(ptr, "otmr")), + SYSCALL(accept4, 4, S_RET(rdec), S_NARG(fd, "sockfd"), S_NARG(ptr, "addr"), S_NARG(ptr, "addrlen"), S_NARG(arg, "flags")), + SYSCALL(signalfd4, 4, S_RET(rdec), S_NARG(fd, "ufd"), S_NARG(ptr, "user_mask"), S_NARG(dec, "sizemask"), S_NARG(arg, "flags")), + SYSCALL(eventfd2, 2, S_RET(rdec), S_NARG(arg, "count"), S_NARG(arg, "flags")), + SYSCALL(epoll_create1, 1, S_RET(rdec), S_NARG(arg, "flags")), + SYSCALL(dup3, 3, S_RET(rdec), S_NARG(fd, "oldfd"), S_NARG(fd, "newfd"), S_NARG(arg, "flags")), + SYSCALL(pipe2, 2, S_RET(rdec), S_NARG(ptr, "fildes"), S_NARG(arg, "flags")), + SYSCALL(inotify_init1, 1, S_RET(rdec), S_NARG(arg, "flags")), + SYSCALL(preadv, 5, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "iov"), S_NARG(dec, "iovcnt"), S_NARG(arg, "pos_l"), S_NARG(arg, "pos_h")), + SYSCALL(pwritev, 5, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "iov"), S_NARG(dec, "iovcnt"), S_NARG(arg, "pos_l"), S_NARG(arg, "pos_h")), + SYSCALL(rt_tgsigqueueinfo, 4, S_RET(rdec), S_NARG(arg, "tgid"), S_NARG(arg, "pid"), S_NARG(signum, "sig"), S_NARG(ptr, "uinfo")), + SYSCALL(perf_event_open, 5, S_RET(rdec), S_NARG(ptr, "attr_uptr"), S_NARG(dec, "pid"), S_NARG(dec, "cpu"), S_NARG(fd, "group_fd"), S_NARG(arg, "flags")), + SYSCALL(recvmmsg, 5, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "msg"), S_NARG(dec, "vlen"), S_NARG(arg, "flags"), S_NARG(ptr, "timeout")), + SYSCALL(fanotify_init, 2, S_RET(rdec), S_NARG(arg, "flags"), S_NARG(arg, "event_f_flags")), + SYSCALL(fanotify_mark, 5, S_RET(rdec), S_NARG(fd, "fanotify_fd"), S_NARG(arg, "flags"), S_NARG(arg, "mask"), S_UARG(fd), S_NARG(cstr, "pathname")), + SYSCALL(prlimit64, 4, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(arg, "resource"), S_NARG(ptr, "new_rlim"), S_NARG(ptr, "old_rlim")), + SYSCALL(name_to_handle_at, 5, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(ptr, "handle"), S_NARG(ptr, "mnt_id"), S_NARG(arg, "flag")), + SYSCALL(open_by_handle_at, 3, S_RET(rdec), S_NARG(fd, "mountdirfd"), S_NARG(ptr, "handle"), S_NARG(arg, "flags")), + SYSCALL(clock_adjtime, 2, S_RET(rdec), S_NARG(arg, "which_clock"), S_NARG(ptr, "tx")), + SYSCALL(syncfs, 2, S_RET(rdec), S_UARG(fd)), + SYSCALL(sendmmsg, 4, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "msg"), S_NARG(dec, "vlen"), S_NARG(arg, "flags")), + SYSCALL(setns, 2, S_RET(rdec), S_UARG(fd), S_NARG(arg, "nstype")), + SYSCALL(getcpu, 3, S_RET(rdec), S_NARG(ptr, "cpu"), S_NARG(ptr, "node"), S_NARG(ptr, "cache")), + SYSCALL(process_vm_readv, 6, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "local_iov"), S_NARG(dec, "liovcnt"), S_NARG(ptr, "remote_iov"), S_NARG(dec, "riovcnt"), S_NARG(arg, "flags")), + SYSCALL(process_vm_writev, 6, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "local_iov"), S_NARG(dec, "liovcnt"), S_NARG(ptr, "remote_iov"), S_NARG(dec, "riovcnt"), S_NARG(arg, "flags")), + SYSCALL(kcmp, 5, S_RET(rdec), S_NARG(arg, "pid1"), S_NARG(arg, "pid2"), S_NARG(arg, "type"), S_NARG(arg, "idx1"), S_NARG(arg, "idx2")), + SYSCALL(finit_module, 3, S_RET(rdec), S_UARG(fd), S_NARG(cstr, "param_values"), S_NARG(arg, "flags")), + SYSCALL(sched_setattr, 3, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "attr"), S_NARG(arg, "flags")), + SYSCALL(sched_getattr, 4, S_RET(rdec), S_NARG(dec, "pid"), S_NARG(ptr, "attr"), S_NARG(dec, "size"), S_NARG(arg, "flags")), + SYSCALL(renameat2, 5, S_RET(rdec), S_NARG(atfd, "olddfd"), S_NARG(cstr, "oldpath"), S_NARG(atfd, "newdfd"), S_NARG(cstr, "newpath"), S_NARG(arg, "flags")), + SYSCALL(seccomp, 3, S_RET(rdec), S_NARG(arg, "op"), S_NARG(arg, "flags"), S_NARG(ptr, "uargs")), + SYSCALL(getrandom, 3, S_RET(rdec), S_NARG(ptr, "buf"), S_NARG(arg, "count"), S_NARG(arg, "flags")), + SYSCALL(memfd_create, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(arg, "flags")), + SYSCALL(kexec_file_load, 5, S_RET(rdec), S_NARG(fd, "kernel_fd"), S_NARG(fd, "initrd_fd"), S_NARG(arg, "cmdline_len"), S_NARG(cstr, "cmdline"), S_NARG(arg, "flags")), + SYSCALL(bpf, 2, S_RET(rdec), S_NARG(arg, "cmd"), S_NARG(ptr, "attr"), S_NARG(arg, "size")), + SYSCALL(execveat, 5, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(ptr, "argv"), S_NARG(ptr, "envp"), S_NARG(arg, "flags")), + SYSCALL(userfaultfd, 2, S_RET(rdec), S_NARG(arg, "flags")), + SYSCALL(membarrier, 2, S_RET(rdec), S_NARG(arg, "cmd"), S_NARG(arg, "flags")), + SYSCALL(mlock2, 3, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length"), S_NARG(arg, "flags")), + SYSCALL(copy_file_range, 6, S_RET(rdec), S_NARG(fd, "fd_in"), S_NARG(ptr, "off_in"), S_NARG(fd, "fd_out"), S_NARG(ptr, "off_out"), S_NARG(dec, "length"), S_NARG(arg, "flags")), + SYSCALL(preadv2, 6, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "iov"), S_NARG(arg, "vlen"), S_NARG(arg, "pos_l"), S_NARG(arg, "pos_h"), S_NARG(arg, "flags")), + SYSCALL(pwritev2, 6, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "iov"), S_NARG(arg, "vlen"), S_NARG(arg, "pos_l"), S_NARG(arg, "pos_h"), S_NARG(arg, "flags")), + SYSCALL(pkey_mprotect, 4, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length"), S_NARG(mmap_prot, "prot"), S_NARG(dec, "pkey")), + SYSCALL(pkey_alloc, 2, S_RET(rdec), S_NARG(arg, "flags"), S_NARG(arg, "init_val")), + SYSCALL(pkey_free, 1, S_RET(rdec), S_NARG(dec, "pkey")), + SYSCALL(statx, 5, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(arg, "flags"), S_NARG(arg, "mask"), S_NARG(ptr, "buffer")), + SYSCALL(io_pgetevents, 6, S_RET(rdec), S_NARG(ptr, "ctx_id"), S_NARG(dec, "min_nr"), S_NARG(dec, "nr"), S_NARG(ptr, "events"), S_NARG(ptr, "timeout"), S_NARG(ptr, "sig")), + SYSCALL(rseq, 4, S_RET(rdec), S_NARG(ptr, "rseq"), S_NARG(dec, "rseq_len"), S_NARG(arg, "flags"), S_NARG(signum, "sig")) +}; + + +static const struct syscall_info unknown_syscall = { + .s_name = "unknown_syscall", + .s_nargs = MAX_SYSCALL_ARGS, + .s_return_type = S_RET(rdec), + .s_args = { + S_NARG(arg, "arg0"), + S_NARG(arg, "arg1"), + S_NARG(arg, "arg2"), + S_NARG(arg, "arg3"), + S_NARG(arg, "arg4"), + S_NARG(arg, "arg5"), + } +}; + +static const struct syscall_info open_with_o_creat = { + .s_name = "open", + .s_nargs = 3, + .s_return_type = S_RET(rdec), + .s_args = { + S_NARG(cstr, "pathname"), + S_NARG(open_flags, "flags"), + S_NARG(octal_mode, "mode") + } +}; + +static const struct syscall_info openat_with_o_creat = { + .s_name = "openat", + .s_nargs = 4, + .s_return_type = S_RET(rdec), + .s_args = { + S_NARG(atfd, "dfd"), + S_NARG(cstr, "pathname"), + S_NARG(open_flags, "flags"), + S_NARG(octal_mode, "mode") + } +}; + +static bool +requires_mode_arg(int flags) { + + if((flags & O_CREAT) == O_CREAT) { + return true; + } + +#ifdef O_TMPFILE + if ((flags & O_TMPFILE) == O_TMPFILE) { + return true; + } +#endif + + return false; +} + +#include + +/** + * get_syscall_info - Return a syscall descriptor + * + * This function returns a pointer to a syscall_info structure that + * appropriately describes the system call identified by 'syscall_number'. + */ +const struct syscall_info * +get_syscall_info(const long syscall_number, + const long* argv) { + + if(syscall_number < 0 || + syscall_number >= (long) ARRAY_SIZE(syscall_table)) { + return &unknown_syscall; + } + + if(syscall_table[syscall_number].s_name == NULL) { + return &unknown_syscall; + } + + if(argv == NULL) { + return &syscall_table[syscall_number]; + } + + if(syscall_number == SYS_open && requires_mode_arg(argv[1])) { + return &open_with_o_creat; + } + + if(syscall_number == SYS_openat && requires_mode_arg(argv[2])) { + return &openat_with_o_creat; + } + + return &syscall_table[syscall_number]; +} + +#define RETURN_TYPE(scinfo) \ + (scinfo)->s_return_type.r_type + +bool +syscall_never_returns(long syscall_number) { + return RETURN_TYPE(get_syscall_info(syscall_number, NULL)) == rnone; +} + + + +#undef SYSCALL +#undef S_NOARGS +#undef S_UARG +#undef S_NARG +#undef S_RET +#undef ARRAY_SIZE diff --git a/src/daemon/main.cpp b/src/daemon/main.cpp index 39a2362a4..45410e469 100644 --- a/src/daemon/main.cpp +++ b/src/daemon/main.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -262,7 +263,7 @@ void shutdown_handler(int dummy) { void initialize_loggers() { std::string path = DEFAULT_DAEMON_LOG_PATH; // Try to get log path from env variable - std::string env_path_key = ENV_PREFIX; + std::string env_path_key = DAEMON_ENV_PREFIX; env_path_key += "DAEMON_LOG_PATH"; char* env_path = getenv(env_path_key.c_str()); if (env_path != nullptr) { @@ -271,7 +272,7 @@ void initialize_loggers() { spdlog::level::level_enum level = get_spdlog_level(DEFAULT_DAEMON_LOG_LEVEL); // Try to get log path from env variable - std::string env_level_key = ENV_PREFIX; + std::string env_level_key = DAEMON_ENV_PREFIX; env_level_key += "LOG_LEVEL"; char* env_level = getenv(env_level_key.c_str()); if (env_level != nullptr) { @@ -350,11 +351,8 @@ int main(int argc, const char* argv[]) { if (vm.count("hosts-file")) { hosts_file = vm["hosts-file"].as(); } else { - try { - hosts_file = gkfs::get_env_own("HOSTS_FILE"); - } catch (const exception& e) { - hosts_file = DEFAULT_HOSTS_FILE; - } + hosts_file = + gkfs::env::get_var(gkfs::env::HOSTS_FILE, DEFAULT_HOSTS_FILE); } ADAFS_DATA->hosts_file(hosts_file); diff --git a/src/global/env_util.cpp b/src/global/env_util.cpp index 8cbf50811..60078c6eb 100644 --- a/src/global/env_util.cpp +++ b/src/global/env_util.cpp @@ -11,27 +11,20 @@ SPDX-License-Identifier: MIT */ -#include -#include +#include #include -#include - +#include namespace gkfs { +namespace env { -using namespace std; +std::string +get_var(const std::string& name, + const std::string& default_value) { -string get_env(const string& env_name) { - char* env_value = secure_getenv(env_name.c_str()); - if (env_value == nullptr) { - throw runtime_error("Environment variable not set: " + env_name); - } - return env_value; + const char* const val = ::secure_getenv(name.c_str()); + return val != nullptr ? std::string(val) : default_value; } -string get_env_own(const string& env_name) { - string env_key = ENV_PREFIX + env_name; - return get_env(env_key); -} - -} +} // namespace env +} // namespace gkfs -- GitLab From 5de6a762189cdb970cac4ea6916fc63a53f6a46f Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 4 Nov 2019 11:51:16 +0100 Subject: [PATCH 40/71] USE_BITSET_FOR_INTERNAL_FDS is now the default --- CMakeLists.txt | 25 +++++++------------------ include/client/preload_context.hpp | 7 ------- src/client/preload_context.cpp | 3 --- 3 files changed, 7 insertions(+), 28 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ae28bd595..87409bd21 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -122,25 +122,14 @@ set(MAX_INTERNAL_FDS 256 CACHE STRING "Number of file descriptors reserved for i add_definitions(-DMAX_INTERNAL_FDS=${MAX_INTERNAL_FDS}) message(STATUS "File descriptors reserved for internal use: ${MAX_INTERNAL_FDS}") -option(USE_BITSET_FOR_INTERNAL_FDS "Use std::bitset to track internal fds" ON) -if(USE_BITSET_FOR_INTERNAL_FDS) - add_definitions(-DUSE_BITSET_FOR_INTERNAL_FDS) - execute_process(COMMAND getconf OPEN_MAX - OUTPUT_VARIABLE GETCONF_MAX_FDS - OUTPUT_STRIP_TRAILING_WHITESPACE - ERROR_QUIET) - if(NOT GETCONF_MAX_FDS) - set(GETCONF_MAX_FDS=512) - endif() - add_definitions(-DMAX_OPEN_FDS=${GETCONF_MAX_FDS}) +execute_process(COMMAND getconf OPEN_MAX + OUTPUT_VARIABLE GETCONF_MAX_FDS + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET) +if(NOT GETCONF_MAX_FDS) + set(GETCONF_MAX_FDS=512) endif() - -message(STATUS "Use std::bitset for internal fd tracking: ${USE_BITSET_FOR_INTERNAL_FDS}") - -if(USE_BITSET_FOR_INTERNAL_FDS) - message(STATUS "Max open files: ${GETCONF_MAX_FDS}") -endif() - +add_definitions(-DMAX_OPEN_FDS=${GETCONF_MAX_FDS}) option(ENABLE_LOGGING "Disable all logging messages" ON) if(NOT ENABLE_LOGGING) diff --git a/include/client/preload_context.hpp b/include/client/preload_context.hpp index 32e1fced3..b660fb4c8 100644 --- a/include/client/preload_context.hpp +++ b/include/client/preload_context.hpp @@ -21,9 +21,7 @@ #include #include -#ifdef USE_BITSET_FOR_INTERNAL_FDS #include -#endif // USE_BITSET_FOR_INTERNAL_FDS /* Forward declarations */ class OpenFileMap; @@ -77,13 +75,8 @@ class PreloadContext { bool interception_enabled_; -#ifdef USE_BITSET_FOR_INTERNAL_FDS std::bitset internal_fds_; mutable std::mutex internal_fds_mutex_; -#else - std::set internal_fds_; -#endif // USE_BITSET_FOR_INTERNAL_FDS - public: static PreloadContext* getInstance() { diff --git a/src/client/preload_context.cpp b/src/client/preload_context.cpp index 2c7b21b2d..53ef881da 100644 --- a/src/client/preload_context.cpp +++ b/src/client/preload_context.cpp @@ -33,10 +33,7 @@ PreloadContext::PreloadContext(): ofm_(std::make_shared()), fs_conf_(std::make_shared()) { -#ifdef USE_BITSET_FOR_INTERNAL_FDS internal_fds_.set(); -#endif // USE_BITSET_FOR_INTERNAL_FDS - } void -- GitLab From 048f5d62824613b6b1ab4673ec9f5b786867dabf Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 4 Nov 2019 14:23:02 +0100 Subject: [PATCH 41/71] Ensure non-returning syscalls are logged --- src/client/logging.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/client/logging.cpp b/src/client/logging.cpp index 4e8cf2a0a..0140558d4 100644 --- a/src/client/logging.cpp +++ b/src/client/logging.cpp @@ -255,8 +255,8 @@ logger::log_syscall(syscall::info info, boost::optional result) { - const bool log_syscall_entry = !!(gkfs::log::syscall_at_entry & log_mask_); - const bool log_syscall_result = !!(gkfs::log::syscall & log_mask_); + const bool log_syscall_entry = !!(log::syscall_at_entry & log_mask_); + const bool log_syscall_result = !!(log::syscall & log_mask_); // log the syscall if and only if logging for syscalls is enabled if(!log_syscall_entry && !log_syscall_result) { @@ -266,21 +266,23 @@ logger::log_syscall(syscall::info info, // log the syscall even if we don't have information on it, since it may // be important to the user (we assume that the syscall has completed // though) - if(info == gkfs::syscall::no_info) { + if(info == syscall::no_info) { goto print_syscall; } - // log the syscall entry if the syscall may not return (e.g. execve), - // even if log::syscall_entry is disabled - if(gkfs::syscall::may_not_return(syscall_number)) { + // log the syscall entry if the syscall may not return (e.g. execve) or + // if we are sure that it won't ever return (e.g. exit), even if + // log::syscall_at_entry is disabled + if(syscall::may_not_return(syscall_number) || + syscall::never_returns(syscall_number)) { goto print_syscall; } - if(log_syscall_entry && gkfs::syscall::execution_is_pending(info)) { + if(log_syscall_entry && syscall::execution_is_pending(info)) { goto print_syscall; } - if(log_syscall_result && !gkfs::syscall::execution_is_pending(info)) { + if(log_syscall_result && !syscall::execution_is_pending(info)) { goto print_syscall; } -- GitLab From 4f4c5b1ce512d349a1bfc1d788367e6ae9876c76 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 4 Nov 2019 14:23:38 +0100 Subject: [PATCH 42/71] Stop interception later --- src/client/preload.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 5b6535e15..2efb37201 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -216,15 +216,15 @@ void init_preload() { */ void destroy_preload() { - stop_interception(); - CTX->disable_interception(); - LOG(DEBUG, "Syscall interception stopped"); - CTX->clear_hosts(); LOG(DEBUG, "Peer information deleted"); ld_network_service.reset(); LOG(DEBUG, "RPC subsystem shut down"); + stop_interception(); + CTX->disable_interception(); + LOG(DEBUG, "Syscall interception stopped"); + LOG(INFO, "All subsystems shut down. Client shutdown complete."); } -- GitLab From 6d49c1c89229427f299633ea5a94a6b1f5581d41 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 4 Nov 2019 15:14:12 +0100 Subject: [PATCH 43/71] Add patches and update deps scripts --- scripts/compile_dep.sh | 18 +-- scripts/dl_dep.sh | 140 ++++++++++++++++++++---- scripts/patches/syscall_intercept.patch | 131 ++++++++++++++++++++++ 3 files changed, 261 insertions(+), 28 deletions(-) create mode 100644 scripts/patches/syscall_intercept.patch diff --git a/scripts/compile_dep.sh b/scripts/compile_dep.sh index 3a73a0e07..6cbb2cc1b 100755 --- a/scripts/compile_dep.sh +++ b/scripts/compile_dep.sh @@ -1,23 +1,23 @@ #!/bin/bash -mogon1_deps=( +MOGON1_DEPS=( "zstd" "lz4" "snappy" "bmi" "libfabric" "mercury" "argobots" "margo" "rocksdb" "syscall_intercept date" ) -mogon2_deps=( +MOGON2_DEPS=( "zstd" "lz4" "snappy" "bmi" "mercury" "argobots" "margo" "rocksdb" "syscall_intercept date" ) -fh2_deps=( +FH2_DEPS=( "zstd" "lz4" "snappy" "bmi" "libfabric" "mercury" "argobots" "margo" "rocksdb" "syscall_intercept date" ) usage_short() { echo " -usage: compile_dep.sh [-h] [-n ] [-c ] [-d ] [-j ] +usage: compile_dep.sh [-h] [-l] [-n ] [-c ] [-d ] [-j ] source_path install_path " } @@ -59,21 +59,21 @@ list_dependencies() { echo "Available dependencies: " echo -n " Mogon 1: " - for d in "${mogon1_deps[@]}" + for d in "${MOGON1_DEPS[@]}" do echo -n "$d " done echo "" echo -n " Mogon 2: " - for d in "${mogon2_deps[@]}" + for d in "${MOGON2_DEPS[@]}" do echo -n "$d " done echo "" echo -n " fh2: " - for d in "${fh2_deps[@]}" + for d in "${FH2_DEPS[@]}" do echo -n "$d " done @@ -297,7 +297,7 @@ if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "mercury" ) ]]; then CURR=${SOURCE}/mercury prepare_build_dir ${CURR} cd ${CURR}/build - $CMAKE \ + PKG_CONFIG_PATH=${INSTALL}/lib/pkgconfig $CMAKE \ -DCMAKE_BUILD_TYPE:STRING=Debug \ -DBUILD_TESTING:BOOL=ON \ -DMERCURY_USE_SM_ROUTING:BOOL=OFF \ @@ -357,7 +357,7 @@ if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "syscall_intercept" ) ]] CURR=${SOURCE}/syscall_intercept prepare_build_dir ${CURR} cd ${CURR}/build - CC="" CXX="" $CMAKE -DCMAKE_INSTALL_PREFIX=${INSTALL} -DCMAKE_BUILD_TYPE:STRING=Debug -DBUILD_EXAMPLES:BOOL=OFF -DBUILD_TESTS:BOOK=OFF .. + $CMAKE -DCMAKE_INSTALL_PREFIX=${INSTALL} -DCMAKE_BUILD_TYPE:STRING=Debug -DBUILD_EXAMPLES:BOOL=OFF -DBUILD_TESTS:BOOK=OFF .. make install fi diff --git a/scripts/dl_dep.sh b/scripts/dl_dep.sh index 036ce615c..215919111 100755 --- a/scripts/dl_dep.sh +++ b/scripts/dl_dep.sh @@ -4,6 +4,27 @@ COMMON_CURL_FLAGS="--silent --fail --show-error --location -O" COMMON_GIT_FLAGS="--quiet --single-branch" +PATCH_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +PATCH_DIR="${PATCH_DIR}/patches" +CLUSTER="" +DEPENDENCY="" +NA_LAYER="" + +MOGON1_DEPS=( + "zstd" "lz4" "snappy" "bmi" "libfabric" "mercury" "argobots" "margo" + "rocksdb" "syscall_intercept date" +) + +MOGON2_DEPS=( + "zstd" "lz4" "snappy" "bmi" "mercury" "argobots" "margo" "rocksdb" + "syscall_intercept date" +) + +FH2_DEPS=( + "zstd" "lz4" "snappy" "bmi" "libfabric" "mercury" "argobots" "margo" + "rocksdb" "syscall_intercept date" +) + # Stop all backround jobs on interruption. # "kill -- -$$" sends a SIGTERM to the whole process group, @@ -23,14 +44,41 @@ error_exit() { exit "${2:-1}" ## Return a code specified by $2 or 1 by default. } +list_dependencies() { + + echo "Available dependencies: " + + echo -n " Mogon 1: " + for d in "${MOGON1_DEPS[@]}" + do + echo -n "$d " + done + echo "" + + echo -n " Mogon 2: " + for d in "${MOGON2_DEPS[@]}" + do + echo -n "$d " + done + echo "" + + echo -n " fh2: " + for d in "${FH2_DEPS[@]}" + do + echo -n "$d " + done + echo "" +} + clonedeps() { - set -e + set -ex trap exit_child EXIT local FOLDER=$1 local REPO=$2 local COMMIT=$3 local GIT_FLAGS=$4 + local PATCH=$5 local ACTION @@ -44,6 +92,12 @@ clonedeps() { # fix the version cd "${SOURCE}/${FOLDER}" && git checkout -qf ${COMMIT} echo "${ACTION} ${FOLDER} [$COMMIT]" + + # apply patch if provided + if [ ! -z ${PATCH} ]; then + git apply --verbose ${PATCH_DIR}/${PATCH} + fi + } wgetdeps() { @@ -69,7 +123,7 @@ wgetdeps() { usage_short() { echo " -usage: dl_dep.sh [-h] [-n ] [-c ] +usage: dl_dep.sh [-h] [-l] [-n ] [-c ] [-d ] source_path " } @@ -86,16 +140,19 @@ positional arguments: optional arguments: -h, --help shows this help message and exits + -l, --list-dependencies + list dependencies available for download -n , --na network layer that is used for communication. Valid: {bmi,ofi,all} defaults to 'all' -c , --cluster additional configurations for specific compute clusters supported clusters: {mogon1,mogon2,fh2} + -d , --dependency + download a specific dependency. If unspecified + all dependencies are built and installed. " } -CLUSTER="" -NA_LAYER="" POSITIONAL=() while [[ $# -gt 0 ]] @@ -113,6 +170,19 @@ case ${key} in shift # past argument shift # past value ;; + -d|--dependency) + if [[ -z "$2" ]]; then + echo "Missing argument for -d/--dependency option" + exit + fi + DEPENDENCY="$2" + shift # past argument + shift # past value + ;; + -l|--list-dependencies) + list_dependencies + exit + ;; -h|--help) help_msg exit @@ -166,40 +236,72 @@ mkdir -p ${SOURCE} # get cluster dependencies if [[ ( "${CLUSTER}" == "mogon1" ) || ( "${CLUSTER}" == "mogon2" ) || ( "${CLUSTER}" == "fh2" ) ]]; then + # get zstd for fast compression in rocksdb - wgetdeps "zstd" "https://github.com/facebook/zstd/archive/v1.3.2.tar.gz" & + if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "zstd" ) ]]; then + wgetdeps "zstd" "https://github.com/facebook/zstd/archive/v1.3.2.tar.gz" & + fi + # get zlib for rocksdb - wgetdeps "lz4" "https://github.com/lz4/lz4/archive/v1.8.0.tar.gz" & + if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "zstd" ) ]]; then + wgetdeps "lz4" "https://github.com/lz4/lz4/archive/v1.8.0.tar.gz" & + fi + # get snappy for rocksdb - wgetdeps "snappy" "https://github.com/google/snappy/archive/1.1.7.tar.gz" & + if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "snappy" ) ]]; then + wgetdeps "snappy" "https://github.com/google/snappy/archive/1.1.7.tar.gz" & + fi fi #if [ "${CLUSTER}" == "fh2" ]; then # no distinct 3rd party software needed as of now. #fi # get BMI -if [ "${NA_LAYER}" == "bmi" ] || [ "${NA_LAYER}" == "all" ]; then - clonedeps "bmi" "https://xgitlab.cels.anl.gov/sds/bmi.git" "81ad0575fc57a69269a16208417cbcbefa51f9ea" & +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "bmi" ) ]]; then + if [ "${NA_LAYER}" == "bmi" ] || [ "${NA_LAYER}" == "all" ]; then + clonedeps "bmi" "https://xgitlab.cels.anl.gov/sds/bmi.git" "81ad0575fc57a69269a16208417cbcbefa51f9ea" & + fi fi + # get libfabric -if [ "${NA_LAYER}" == "ofi" ] || [ "${NA_LAYER}" == "all" ]; then - # No need to get libfabric for mogon2 as it is already installed - if [[ ("${CLUSTER}" != "mogon2") ]]; then - wgetdeps "libfabric" "https://github.com/ofiwg/libfabric/releases/download/v1.7.2/libfabric-1.7.2.tar.gz" & +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "ofi" ) ]]; then + if [ "${NA_LAYER}" == "ofi" ] || [ "${NA_LAYER}" == "all" ]; then + # No need to get libfabric for mogon2 as it is already installed + if [[ ("${CLUSTER}" != "mogon2") ]]; then + wgetdeps "libfabric" "https://github.com/ofiwg/libfabric/releases/download/v1.7.2/libfabric-1.7.2.tar.gz" & + fi fi fi + # get Mercury -clonedeps "mercury" "https://github.com/mercury-hpc/mercury" "9906f25b6f9c52079d57006f199b3ea47960c435" "--recurse-submodules" & +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "mercury" ) ]]; then + clonedeps "mercury" "https://github.com/mercury-hpc/mercury" "9906f25b6f9c52079d57006f199b3ea47960c435" "--recurse-submodules" & +fi + # get Argobots -wgetdeps "argobots" "https://github.com/pmodels/argobots/archive/v1.0rc1.tar.gz" & +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "argobots" ) ]]; then + wgetdeps "argobots" "https://github.com/pmodels/argobots/archive/v1.0rc1.tar.gz" & +fi + # get Margo -clonedeps "margo" "https://xgitlab.cels.anl.gov/sds/margo.git" "6ed94e4f3a4d526b0a3b4e57be075461e86d3666" & +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "margo" ) ]]; then + clonedeps "margo" "https://xgitlab.cels.anl.gov/sds/margo.git" "6ed94e4f3a4d526b0a3b4e57be075461e86d3666" & +fi + # get rocksdb -wgetdeps "rocksdb" "https://github.com/facebook/rocksdb/archive/v6.1.2.tar.gz" & +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "rocksdb" ) ]]; then + wgetdeps "rocksdb" "https://github.com/facebook/rocksdb/archive/v6.1.2.tar.gz" & +fi + # get syscall_intercept -clonedeps "syscall_intercept" "https://github.com/pmem/syscall_intercept.git" "cc3412a2ad39f2e26cc307d5b155232811d7408e" & +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "syscall_intercept" ) ]]; then + clonedeps "syscall_intercept" "https://github.com/pmem/syscall_intercept.git" "cc3412a2ad39f2e26cc307d5b155232811d7408e" "" "syscall_intercept2.patch" & +fi + # get date -clonedeps "date" "https://github.com/HowardHinnant/date.git" "e7e1482087f58913b80a20b04d5c58d9d6d90155" +if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "date" ) ]]; then + clonedeps "date" "https://github.com/HowardHinnant/date.git" "e7e1482087f58913b80a20b04d5c58d9d6d90155" & +fi # Wait for all download to be completed wait diff --git a/scripts/patches/syscall_intercept.patch b/scripts/patches/syscall_intercept.patch new file mode 100644 index 000000000..00c7e2674 --- /dev/null +++ b/scripts/patches/syscall_intercept.patch @@ -0,0 +1,131 @@ +diff --git a/include/libsyscall_intercept_hook_point.h b/include/libsyscall_intercept_hook_point.h +index 2fe7d57..43a8974 100644 +--- a/include/libsyscall_intercept_hook_point.h ++++ b/include/libsyscall_intercept_hook_point.h +@@ -56,9 +56,18 @@ extern int (*intercept_hook_point)(long syscall_number, + long arg2, long arg3, + long arg4, long arg5, + long *result); +- +-extern void (*intercept_hook_point_clone_child)(void); +-extern void (*intercept_hook_point_clone_parent)(long pid); ++extern void (*intercept_hook_point_clone_child)( ++ unsigned long flags, void *child_stack, ++ int *ptid, int *ctid, long newtls); ++extern void (*intercept_hook_point_clone_parent)( ++ unsigned long flags, void *child_stack, ++ int *ptid, int *ctid, long newtls, ++ long returned_pid); ++extern void (*intercept_hook_point_post_kernel)(long syscall_number, ++ long arg0, long arg1, ++ long arg2, long arg3, ++ long arg4, long arg5, ++ long result); + + /* + * syscall_no_intercept - syscall without interception +diff --git a/src/intercept.c b/src/intercept.c +index 41fd95d..c0cd865 100644 +--- a/src/intercept.c ++++ b/src/intercept.c +@@ -67,9 +67,23 @@ int (*intercept_hook_point)(long syscall_number, + long *result) + __attribute__((visibility("default"))); + +-void (*intercept_hook_point_clone_child)(void) ++void (*intercept_hook_point_clone_child)( ++ unsigned long flags, void *child_stack, ++ int *ptid, int *ctid, ++ long newtls) + __attribute__((visibility("default"))); +-void (*intercept_hook_point_clone_parent)(long) ++ ++void (*intercept_hook_point_clone_parent)( ++ unsigned long flags, void *child_stack, ++ int *ptid, int *ctid, ++ long newtls, long returned_pid) ++ __attribute__((visibility("default"))); ++ ++void (*intercept_hook_point_post_kernel)(long syscall_number, ++ long arg0, long arg1, ++ long arg2, long arg3, ++ long arg4, long arg5, ++ long result) + __attribute__((visibility("default"))); + + bool debug_dumps_on; +@@ -655,6 +669,21 @@ intercept_routine(struct context *context) + desc.args[3], + desc.args[4], + desc.args[5]); ++ ++ /* ++ * some users might want to execute code after a syscall has ++ * been forwarded to the kernel (for example, to check its ++ * return value). ++ */ ++ if (intercept_hook_point_post_kernel != NULL) ++ intercept_hook_point_post_kernel(desc.nr, ++ desc.args[0], ++ desc.args[1], ++ desc.args[2], ++ desc.args[3], ++ desc.args[4], ++ desc.args[5], ++ result); + } + + intercept_log_syscall(patch, &desc, KNOWN, result); +@@ -670,12 +699,26 @@ intercept_routine(struct context *context) + struct wrapper_ret + intercept_routine_post_clone(struct context *context) + { ++ struct syscall_desc desc; ++ get_syscall_in_context(context, &desc); ++ + if (context->rax == 0) { + if (intercept_hook_point_clone_child != NULL) +- intercept_hook_point_clone_child(); ++ intercept_hook_point_clone_child( ++ (unsigned long)desc.args[0], ++ (void *)desc.args[1], ++ (int *)desc.args[2], ++ (int *)desc.args[3], ++ desc.args[4]); + } else { + if (intercept_hook_point_clone_parent != NULL) +- intercept_hook_point_clone_parent(context->rax); ++ intercept_hook_point_clone_parent( ++ (unsigned long)desc.args[0], ++ (void *)desc.args[1], ++ (int *)desc.args[2], ++ (int *)desc.args[3], ++ desc.args[4], ++ context->rax); + } + + return (struct wrapper_ret){.rax = context->rax, .rdx = 1 }; +diff --git a/test/test_clone_thread_preload.c b/test/test_clone_thread_preload.c +index c7663a2..bff239e 100644 +--- a/test/test_clone_thread_preload.c ++++ b/test/test_clone_thread_preload.c +@@ -96,8 +96,18 @@ hook(long syscall_number, + * of the clone syscall. + */ + static void +-hook_child(void) ++hook_child(unsigned long clone_flags, ++ void *child_stack, ++ int *ptid, ++ int *ctid, ++ long newtls) + { ++ (void) clone_flags; ++ (void) child_stack; ++ (void) ptid; ++ (void) ctid; ++ (void) newtls; ++ + static const char msg[] = "clone_hook_child called\n"; + + assert(flags != -1); -- GitLab From 284d7e5c7aaef3ab93983fb82fa500594c6b1ca8 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 4 Nov 2019 15:26:10 +0100 Subject: [PATCH 44/71] Force timezone initialization only if needed --- src/client/logging.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/client/logging.cpp b/src/client/logging.cpp index 0140558d4..bc6411d88 100644 --- a/src/client/logging.cpp +++ b/src/client/logging.cpp @@ -224,6 +224,7 @@ logger::logger(const std::string& opts, log_fd_ = fd; } +#if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) // Finding the current timezone implies accessing OS files (i.e. syscalls), // but current_zone() doesn't actually retrieve the time zone but rather // provides a descriptor to it that is **atomically initialized** upon its @@ -242,6 +243,7 @@ logger::logger(const std::string& opts, // be removed if the date API ends up providing this functionality. using namespace date; timezone_->get_info(date::sys_days{January/1/1970}); +#endif } logger::~logger() { -- GitLab From dfff56284d79992e0ecc7191f981c0fda9a6c4a0 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 4 Nov 2019 15:48:29 +0100 Subject: [PATCH 45/71] Update documentation --- README.md | 82 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 63 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index c5cb98b95..b8c140f5a 100644 --- a/README.md +++ b/README.md @@ -42,51 +42,62 @@ This is a file system. (execute the script for help): ```bash -usage: dl_dep.sh [-h] [-n ] [-c ] +usage: dl_dep.sh [-h] [-l] [-n ] [-c ] [-d ] source_path - + This script gets all GekkoFS dependency sources (excluding the fs itself) - + positional arguments: source_path path where the dependency downloads are put - - + + optional arguments: -h, --help shows this help message and exits + -l, --list-dependencies + list dependencies available for download -n , --na network layer that is used for communication. Valid: {bmi,ofi,all} defaults to 'all' -c , --cluster additional configurations for specific compute clusters - supported clusters: {mogon1,fh2} + supported clusters: {mogon1,mogon2,fh2} + -d , --dependency + download a specific dependency. If unspecified + all dependencies are built and installed. ``` - Now use the install script to compile them and install them to the desired directory. You can choose the according na_plugin (execute the script for help): ```bash -usage: compile_dep.sh [-h] [-n ] [-c ] [-j ] +usage: compile_dep.sh [-h] [-l] [-n ] [-c ] [-d ] [-j ] source_path install_path - + This script compiles all GekkoFS dependencies (excluding the fs itself) - + positional arguments: source_path path to the cloned dependencies path from clone_dep.sh install_path path to the install path of the compiled dependencies - - + + optional arguments: - -h, --help shows this help message and exits + -h, --help shows this help message and exits + -l, --list-dependencies + list dependencies available for building and installation -n , --na network layer that is used for communication. Valid: {bmi,ofi,all} defaults to 'all' -c , --cluster additional configurations for specific compute clusters supported clusters: {mogon1,mogon2,fh2} + -d , --dependency + build and install a specific dependency. If unspecified + all dependencies are built and installed. -j , --compilecores - number of cores that are used to compile the depdencies + number of cores that are used to compile the dependencies defaults to number of available cores + -t, --test Perform libraries tests. ``` ## Compile GekkoFS @@ -127,15 +138,48 @@ Run the application with the preload library: `LD_PRELOAD=/build/lib/libio an MPI application use the `{mpirun, mpiexec} -x` argument. ### Logging -To enable logging the following environment variables are used: -GKFS_PRELOAD_LOG_PATH="" to set the path to the logging file of the client library. -GKFS_DAEMON_LOG_PATH="" to set the path to the logging file of the daemon. -GKFS_LOG_LEVEL={off,critical,err,warn,info,debug,trace} to set the trace level verbosity. -Numbers from 0-6 may also be used where as 0 is off and 6 represents trace. +The following environment variables can be used to enable logging in the client +library: `LIBGKFS_LOG=` and `LIBGKFS_LOG_OUTPUT=` to +configure the output module and set the path to the log file of the client +library. + +The following modules are available: + + - `none`: don't print any messages + - `syscalls`: Trace system calls: print the name of each system call, its + arguments, and its return value. All system calls are printed after being + executed save for those that may not return, such as `execve()`, + `execve_at()`, `exit()`, and `exit_group()`. This module will only be + available if the client library is built in `Debug` mode. + - `syscalls_at_entry`: Trace system calls: print the name of each system call + and its arguments. All system calls are printed before being executed and + therefore their return values are not available in the log. This module will + only be available if the client library is built in `Debug` mode. + - `info`: Print information messages. + - `critical`: Print critical errors. + - `errors`: Print errors. + - `warnings`: Print warnings. + - `mercury`: Print Mercury messages. + - `debug`: Print debug messages. This module will only be available if the + client library is built in `Debug` mode. + - `most`: All previous options combined except `syscalls_at_entry`. This + module will only be available if the client library is built in `Debug` + mode. + - `all`: All previous options combined. + - `help`: Print a help message and exit. + +Additionally, setting the `LIBGKFS_LOG_OUTPUT_TRUNC` environment variable with +a value different from `0` will instruct the logging subsystem to truncate +the file used for logging, rather than append to it. + +For the daemon, the `GKFS_DAEMON_LOG_PATH=` environment variable +can be provided to set the path to the log file, and the log module can be +selected with the `GKFS_LOG_LEVEL={off,critical,err,warn,info,debug,trace}` +environment variable. ### Acknowledgment This software was partially supported by the EC H2020 funded project NEXTGenIO (Project ID: 671951, www.nextgenio.eu). -This software was partially supported by the ADA-FS project under the SPPEXA project funded by the DFG. \ No newline at end of file +This software was partially supported by the ADA-FS project under the SPPEXA project funded by the DFG. -- GitLab From 6855f57c35df69b3c8eb1a9407d4387b5780555a Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 4 Nov 2019 15:48:59 +0100 Subject: [PATCH 46/71] Fix typo --- scripts/compile_dep.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/compile_dep.sh b/scripts/compile_dep.sh index 6cbb2cc1b..a3d4edd9b 100755 --- a/scripts/compile_dep.sh +++ b/scripts/compile_dep.sh @@ -47,7 +47,7 @@ optional arguments: build and install a specific dependency. If unspecified all dependencies are built and installed. -j , --compilecores - number of cores that are used to compile the depdencies + number of cores that are used to compile the dependencies defaults to number of available cores -t, --test Perform libraries tests. " -- GitLab From 8359736713d773c39c9f3e244781a61ef00104dd Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 4 Nov 2019 15:53:48 +0100 Subject: [PATCH 47/71] Add missing information to README --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b8c140f5a..a960d97d9 100644 --- a/README.md +++ b/README.md @@ -141,7 +141,8 @@ an MPI application use the `{mpirun, mpiexec} -x` argument. The following environment variables can be used to enable logging in the client library: `LIBGKFS_LOG=` and `LIBGKFS_LOG_OUTPUT=` to configure the output module and set the path to the log file of the client -library. +library. If not path is specified in `LIBGKFS_LOG_OUTPUT`, the client library +will send log messages to `/tmp/gkfs_client.log`. The following modules are available: -- GitLab From aaded91786878eee8654ff59f3b9bde465d5c957 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 4 Nov 2019 15:54:34 +0100 Subject: [PATCH 48/71] Update CI environment variables to new names --- .gitlab-ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4fbcc4943..2972085e0 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -16,7 +16,8 @@ variables: # Configuration variables GKFS_LOG_LEVEL: "100" GKFS_DAEMON_LOG_PATH: "${CI_PROJECT_DIR}/logs/daemon.log" - GKFS_PRELOAD_LOG_PATH: "${CI_PROJECT_DIR}/logs/preload.log" + LIBGKFS_LOG: "all" + LIBGKFS_LOG_OUTPUT: "${CI_PROJECT_DIR}/logs/gkfs_client.log" GIT_SUBMODULE_STRATEGY: recursive image: gekkofs/gekkofs:build_env -- GitLab From 10b5f44f84075c088f6287bc11a0cf0eabfa55d6 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 4 Nov 2019 17:27:30 +0100 Subject: [PATCH 49/71] Add missing file to build system --- CMake/FindDate.cmake | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 CMake/FindDate.cmake diff --git a/CMake/FindDate.cmake b/CMake/FindDate.cmake new file mode 100644 index 000000000..c60ce7906 --- /dev/null +++ b/CMake/FindDate.cmake @@ -0,0 +1,40 @@ +find_path(DATE_INCLUDE_DIR + NAMES date/date.h +) + +find_path(TZ_INCLUDE_DIR + NAMES date/tz.h +) + +find_library(TZ_LIBRARY + NAMES tz +) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( Date + DEFAULT_MSG + DATE_INCLUDE_DIR + TZ_INCLUDE_DIR + TZ_LIBRARY +) + +if(Date_FOUND) + set(DATE_INCLUDE_DIRS ${DATE_INCLUDE_DIR}) + set(TZ_INCLUDE_DIRS ${TZ_INCLUDE_DIR}) + set(TZ_LIBRARIES ${TZ_LIBRARY}) + + if(NOT TARGET Date::TZ) + add_library(Date::TZ UNKNOWN IMPORTED) + set_target_properties(Date::TZ PROPERTIES + IMPORTED_LOCATION "${TZ_LIBRARY}" + INTERFACE_COMPILE_DEFINITIONS "USE_OS_TZDB=1" + INTERFACE_INCLUDE_DIRECTORIES "${TZ_INCLUDE_DIR}" + ) + endif() +endif() + +mark_as_advanced( + DATE_INCLUDE_DIR + TZ_INCLUDE_DIR + TZ_LIBRARY +) -- GitLab From 72d91d818b2c962cf81de11fce27e634bc022542 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 4 Nov 2019 17:28:46 +0100 Subject: [PATCH 50/71] Fix typo in patch name --- scripts/dl_dep.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/dl_dep.sh b/scripts/dl_dep.sh index 215919111..bcfbe0b36 100755 --- a/scripts/dl_dep.sh +++ b/scripts/dl_dep.sh @@ -295,7 +295,7 @@ fi # get syscall_intercept if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "syscall_intercept" ) ]]; then - clonedeps "syscall_intercept" "https://github.com/pmem/syscall_intercept.git" "cc3412a2ad39f2e26cc307d5b155232811d7408e" "" "syscall_intercept2.patch" & + clonedeps "syscall_intercept" "https://github.com/pmem/syscall_intercept.git" "cc3412a2ad39f2e26cc307d5b155232811d7408e" "" "syscall_intercept.patch" & fi # get date -- GitLab From abaa50e2ec17511b1da4c6f9a11e63b774f19064 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 4 Nov 2019 17:38:09 +0100 Subject: [PATCH 51/71] Add conditional descriptors for newer system calls --- src/client/syscalls/detail/syscall_info.c | 30 +++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/client/syscalls/detail/syscall_info.c b/src/client/syscalls/detail/syscall_info.c index a56122cd7..d5fb0e008 100644 --- a/src/client/syscalls/detail/syscall_info.c +++ b/src/client/syscalls/detail/syscall_info.c @@ -374,20 +374,50 @@ const struct syscall_info syscall_table[] = { SYSCALL(getrandom, 3, S_RET(rdec), S_NARG(ptr, "buf"), S_NARG(arg, "count"), S_NARG(arg, "flags")), SYSCALL(memfd_create, 2, S_RET(rdec), S_NARG(cstr, "pathname"), S_NARG(arg, "flags")), SYSCALL(kexec_file_load, 5, S_RET(rdec), S_NARG(fd, "kernel_fd"), S_NARG(fd, "initrd_fd"), S_NARG(arg, "cmdline_len"), S_NARG(cstr, "cmdline"), S_NARG(arg, "flags")), + +#ifdef SYS_bpf SYSCALL(bpf, 2, S_RET(rdec), S_NARG(arg, "cmd"), S_NARG(ptr, "attr"), S_NARG(arg, "size")), +#endif // SYS_bpf + +#ifdef SYS_execveat SYSCALL(execveat, 5, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(ptr, "argv"), S_NARG(ptr, "envp"), S_NARG(arg, "flags")), +#endif // SYS_execveat + SYSCALL(userfaultfd, 2, S_RET(rdec), S_NARG(arg, "flags")), + +#ifdef SYS_membarrier SYSCALL(membarrier, 2, S_RET(rdec), S_NARG(arg, "cmd"), S_NARG(arg, "flags")), +#endif // SYS_membarrier + +#ifdef SYS_mlock2 SYSCALL(mlock2, 3, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length"), S_NARG(arg, "flags")), +#endif // SYS_mlock2 + SYSCALL(copy_file_range, 6, S_RET(rdec), S_NARG(fd, "fd_in"), S_NARG(ptr, "off_in"), S_NARG(fd, "fd_out"), S_NARG(ptr, "off_out"), S_NARG(dec, "length"), S_NARG(arg, "flags")), + +#ifdef SYS_preadv2 SYSCALL(preadv2, 6, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "iov"), S_NARG(arg, "vlen"), S_NARG(arg, "pos_l"), S_NARG(arg, "pos_h"), S_NARG(arg, "flags")), +#endif // SYS_preadv2 + +#ifdef SYS_pwritev2 SYSCALL(pwritev2, 6, S_RET(rdec), S_UARG(fd), S_NARG(ptr, "iov"), S_NARG(arg, "vlen"), S_NARG(arg, "pos_l"), S_NARG(arg, "pos_h"), S_NARG(arg, "flags")), +#endif // SYS_pwritev2 + SYSCALL(pkey_mprotect, 4, S_RET(rdec), S_NARG(ptr, "addr"), S_NARG(dec, "length"), S_NARG(mmap_prot, "prot"), S_NARG(dec, "pkey")), SYSCALL(pkey_alloc, 2, S_RET(rdec), S_NARG(arg, "flags"), S_NARG(arg, "init_val")), SYSCALL(pkey_free, 1, S_RET(rdec), S_NARG(dec, "pkey")), + +#ifdef SYS_statx SYSCALL(statx, 5, S_RET(rdec), S_NARG(atfd, "dfd"), S_NARG(cstr, "pathname"), S_NARG(arg, "flags"), S_NARG(arg, "mask"), S_NARG(ptr, "buffer")), +#endif // SYS_statx + +#ifdef SYS_io_pgetevents SYSCALL(io_pgetevents, 6, S_RET(rdec), S_NARG(ptr, "ctx_id"), S_NARG(dec, "min_nr"), S_NARG(dec, "nr"), S_NARG(ptr, "events"), S_NARG(ptr, "timeout"), S_NARG(ptr, "sig")), +#endif // SYS_io_pgetevents + +#ifdef SYS_rseq SYSCALL(rseq, 4, S_RET(rdec), S_NARG(ptr, "rseq"), S_NARG(dec, "rseq_len"), S_NARG(arg, "flags"), S_NARG(signum, "sig")) +#endif // SYS_rseq }; -- GitLab From 1d604bcc0b9a50c81b21b9dc10e9196a39a92c2a Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 4 Nov 2019 21:52:43 +0100 Subject: [PATCH 52/71] Force abort if log message larger than log buffer --- CMakeLists.txt | 10 +++++++--- include/client/logging.hpp | 37 ++++++++++++++++++++++++++++--------- src/client/logging.cpp | 2 +- 3 files changed, 36 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 87409bd21..30d774701 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -131,11 +131,15 @@ if(NOT GETCONF_MAX_FDS) endif() add_definitions(-DMAX_OPEN_FDS=${GETCONF_MAX_FDS}) -option(ENABLE_LOGGING "Disable all logging messages" ON) -if(NOT ENABLE_LOGGING) +option(ENABLE_CLIENT_LOG "Enable logging messages" ON) +if(ENABLE_CLIENT_LOG) + set(CLIENT_LOG_MESSAGE_SIZE 1024 CACHE STRING "Maximum size of a log message in the client library") + add_definitions(-DLIBGKFS_LOG_MESSAGE_SIZE=${CLIENT_LOG_MESSAGE_SIZE}) + message(STATUS "Maximum log message size in the client library: ${CLIENT_LOG_MESSAGE_SIZE}") +else() add_definitions(-DGKFS_DISABLE_LOGGING) endif() -message(STATUS "Logging output: ${ENABLE_LOGGING}") +message(STATUS "Client logging output: ${ENABLE_CLIENT_LOGGING}") configure_file(include/global/configure.hpp.in include/global/configure.hpp) diff --git a/include/client/logging.hpp b/include/client/logging.hpp index 04fbdd032..6c762337e 100644 --- a/include/client/logging.hpp +++ b/include/client/logging.hpp @@ -134,10 +134,6 @@ struct logger; namespace detail { -enum { inline_buffer_size = 0x1000 }; - -using safe_buffer = fmt::basic_memory_buffer; - template static inline void log_buffer(std::FILE* fp, @@ -253,9 +249,16 @@ format_syscall_info_to(Buffer&& buffer, fmt::format_to(buffer, fmt::string_view(tmp.data(), tmp.size())); } +} // namespace detail +enum { max_buffer_size = LIBGKFS_LOG_MESSAGE_SIZE }; + +struct static_buffer : public fmt::basic_memory_buffer { + +protected: + void grow(std::size_t size) override final; +}; -} // namespace detail struct logger { @@ -276,7 +279,7 @@ struct logger { return; } - detail::safe_buffer buffer; + static_buffer buffer; detail::format_timestamp_to(buffer, timezone_); fmt::format_to(buffer, "[{}] [{}] ", ::syscall_no_intercept(SYS_gettid), @@ -324,14 +327,14 @@ struct logger { }; - char buffer[detail::inline_buffer_size]; - detail::safe_buffer prefix; + static_buffer prefix; detail::format_timestamp_to(prefix); fmt::format_to(prefix, "[{}] [{}] ", ::syscall_no_intercept(SYS_gettid), lookup_level_name(level)); + char buffer[max_buffer_size]; const int n = vsnprintf(buffer, sizeof(buffer), fmt, ap); std::array buffers{}; @@ -375,7 +378,7 @@ struct logger { throw std::runtime_error("Invalid file descriptor"); } - detail::safe_buffer buffer; + static_buffer buffer; fmt::format_to(buffer, std::forward(args)...); fmt::format_to(buffer, "\n"); detail::log_buffer(fd, buffer); @@ -425,6 +428,22 @@ destroy_global_logger() { logger::global_logger().reset(); } +inline void +static_buffer::grow(std::size_t size) { + + const auto logger = get_global_logger(); + + if(logger) { + logger->log_mask_ &= ~(syscall | syscall_at_entry); + } + + std::fprintf(stderr, +"FATAL: message too long for gkfs::log::static_buffer, increase the size of\n" +"LIBGKFS_LOG_MESSAGE_SIZE in CMake or reduce the length of the offending " +"message.\n"); + abort(); +} + } // namespace log } // namespace gkfs diff --git a/src/client/logging.cpp b/src/client/logging.cpp index bc6411d88..d0f73ea9a 100644 --- a/src/client/logging.cpp +++ b/src/client/logging.cpp @@ -292,7 +292,7 @@ logger::log_syscall(syscall::info info, print_syscall: - detail::safe_buffer buffer; + static_buffer buffer; detail::format_timestamp_to(buffer, timezone_); detail::format_syscall_info_to(buffer, info); -- GitLab From 7976f782c690da0145ff87e9e08d4907ef9bb0f3 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 4 Nov 2019 21:54:21 +0100 Subject: [PATCH 53/71] Format MAP_SHARED_VALIDATE only if available --- include/client/syscalls/args.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/client/syscalls/args.hpp b/include/client/syscalls/args.hpp index f089ef43f..c00778610 100644 --- a/include/client/syscalls/args.hpp +++ b/include/client/syscalls/args.hpp @@ -346,7 +346,9 @@ format_mmap_flags_arg_to(FmtBuffer& buffer, utils::make_array( FLAG_ENTRY(MAP_SHARED), FLAG_ENTRY(MAP_PRIVATE), +#ifdef MAP_SHARED_VALIDATE FLAG_ENTRY(MAP_SHARED_VALIDATE), +#endif FLAG_ENTRY(MAP_FIXED), FLAG_ENTRY(MAP_ANONYMOUS), FLAG_ENTRY(MAP_GROWSDOWN), -- GitLab From 0086eaa6f048f8558b66378bc30951919673143c Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 5 Nov 2019 10:51:28 +0100 Subject: [PATCH 54/71] Partial fix for post clone inconsistency --- src/client/intercept.cpp | 8 ++++++++ src/client/preload_context.cpp | 9 ++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/client/intercept.cpp b/src/client/intercept.cpp index 3a620cace..99a90207b 100644 --- a/src/client/intercept.cpp +++ b/src/client/intercept.cpp @@ -687,10 +687,14 @@ hook_clone_at_child(unsigned long flags, 0}; #endif + reentrance_guard_flag = true; + LOG(SYSCALL, ::get_current_syscall_info() | gkfs::syscall::executed, SYS_clone, args, 0); + + reentrance_guard_flag = false; } static void @@ -711,10 +715,14 @@ hook_clone_at_parent(unsigned long flags, 0}; #endif + reentrance_guard_flag = true; + LOG(SYSCALL, ::get_current_syscall_info() | gkfs::syscall::executed, SYS_clone, args, returned_pid); + + reentrance_guard_flag = false; } diff --git a/src/client/preload_context.cpp b/src/client/preload_context.cpp index 53ef881da..2fe1109f1 100644 --- a/src/client/preload_context.cpp +++ b/src/client/preload_context.cpp @@ -192,13 +192,14 @@ bool PreloadContext::interception_enabled() const { int PreloadContext::register_internal_fd(int fd) { + LOG(DEBUG, "registering fd {} as internal", fd); + assert(fd >= 0); std::lock_guard lock(internal_fds_mutex_); const int pos = internal_fds_._Find_first(); internal_fds_.reset(pos); - LOG(DEBUG, "registering internal fd: {} -> {}", fd, pos + INTERNAL_FD_BASE); #if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) long args[gkfs::syscall::MAX_ARGS]{fd, pos + INTERNAL_FD_BASE, O_CLOEXEC}; @@ -243,19 +244,21 @@ int PreloadContext::register_internal_fd(int fd) { gkfs::syscall::executed, SYS_close, args2, rv); + LOG(DEBUG, " (fd {} reassigned to ifd {})", fd, ifd); + return ifd; } void PreloadContext::unregister_internal_fd(int fd) { + LOG(DEBUG, "unregistering internal fd {}", fd); + assert(fd >= INTERNAL_FD_BASE); const auto pos = fd - INTERNAL_FD_BASE; std::lock_guard lock(internal_fds_mutex_); internal_fds_.set(pos); - - LOG(DEBUG, "unregistering internal fd: {}", fd); } bool PreloadContext::is_internal_fd(int fd) const { -- GitLab From b0dd25c77788507ba85fde393a77a75ef3c0df91 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 5 Nov 2019 11:53:02 +0100 Subject: [PATCH 55/71] Add conditional descriptors for newer syscalls/flags --- include/client/syscalls/args.hpp | 10 ++++++++-- src/client/intercept.cpp | 2 ++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/include/client/syscalls/args.hpp b/include/client/syscalls/args.hpp index c00778610..0677ea54f 100644 --- a/include/client/syscalls/args.hpp +++ b/include/client/syscalls/args.hpp @@ -359,8 +359,12 @@ format_mmap_flags_arg_to(FmtBuffer& buffer, FLAG_ENTRY(MAP_POPULATE), FLAG_ENTRY(MAP_NONBLOCK), FLAG_ENTRY(MAP_STACK), - FLAG_ENTRY(MAP_HUGETLB), - FLAG_ENTRY(MAP_SYNC)); + FLAG_ENTRY(MAP_HUGETLB) +#ifdef MAP_SYNC + , + FLAG_ENTRY(MAP_SYNC) +#endif + ); fmt::format_to(buffer, "{}=", parg.name); format_flag_set(buffer, parg.value, flag_names); @@ -397,7 +401,9 @@ format_clone_flags_arg_to(FmtBuffer& buffer, FLAG_ENTRY(CLONE_DETACHED), FLAG_ENTRY(CLONE_UNTRACED), FLAG_ENTRY(CLONE_CHILD_SETTID), +#ifdef CLONE_NEWCGROUP FLAG_ENTRY(CLONE_NEWCGROUP), +#endif FLAG_ENTRY(CLONE_NEWUTS), FLAG_ENTRY(CLONE_NEWIPC), FLAG_ENTRY(CLONE_NEWUSER), diff --git a/src/client/intercept.cpp b/src/client/intercept.cpp index 99a90207b..d00896c90 100644 --- a/src/client/intercept.cpp +++ b/src/client/intercept.cpp @@ -352,6 +352,7 @@ int hook(long syscall_number, reinterpret_cast(arg2)); break; +#ifdef SYS_execveat case SYS_execveat: *result = syscall_no_intercept(syscall_number, arg0, @@ -360,6 +361,7 @@ int hook(long syscall_number, reinterpret_cast(arg3), arg4); break; +#endif case SYS_open: *result = hook_openat(AT_FDCWD, -- GitLab From 5474d22b6f987da14a578e9292fd1e377fc2cf1d Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 5 Nov 2019 11:56:06 +0100 Subject: [PATCH 56/71] Add missing ifdef for SYS_execveat --- include/client/syscalls/syscall.hpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/client/syscalls/syscall.hpp b/include/client/syscalls/syscall.hpp index 82b7a2e74..7865dd608 100644 --- a/include/client/syscalls/syscall.hpp +++ b/include/client/syscalls/syscall.hpp @@ -87,8 +87,11 @@ always_returns(const long syscall_number) { static inline bool may_not_return(const long syscall_number) { - return syscall_number == SYS_execve || - syscall_number == SYS_execveat; + return syscall_number == SYS_execve +#ifdef SYS_execveat + || syscall_number == SYS_execveat +#endif + ; } -- GitLab From ac016ef21e990e9959bd0d53a2f2a5ccc550d594 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Thu, 7 Nov 2019 17:45:29 +0100 Subject: [PATCH 57/71] Fix issue with fds created by kernel modules --- include/client/logging.hpp | 1 - include/client/preload_context.hpp | 9 +++- src/client/logging.cpp | 14 ++--- src/client/preload.cpp | 14 ++++- src/client/preload_context.cpp | 84 ++++++++++++++++++++++++++---- 5 files changed, 98 insertions(+), 24 deletions(-) diff --git a/include/client/logging.hpp b/include/client/logging.hpp index 6c762337e..9f682eec7 100644 --- a/include/client/logging.hpp +++ b/include/client/logging.hpp @@ -400,7 +400,6 @@ struct logger { const date::time_zone * const timezone_; }; - // the following static functions can be used to interact // with a globally registered logger instance diff --git a/include/client/preload_context.hpp b/include/client/preload_context.hpp index b660fb4c8..3a4d288a3 100644 --- a/include/client/preload_context.hpp +++ b/include/client/preload_context.hpp @@ -56,8 +56,8 @@ enum class RelativizeStatus { class PreloadContext { - static const auto constexpr INTERNAL_FD_BASE = - MAX_OPEN_FDS - MAX_INTERNAL_FDS; + static auto constexpr MIN_INTERNAL_FD = MAX_OPEN_FDS - MAX_INTERNAL_FDS; + static auto constexpr MAX_USER_FDS = MIN_INTERNAL_FD; private: PreloadContext(); @@ -77,6 +77,8 @@ class PreloadContext { std::bitset internal_fds_; mutable std::mutex internal_fds_mutex_; + bool internal_fds_must_relocate_; + std::bitset protected_fds_; public: static PreloadContext* getInstance() { @@ -122,6 +124,9 @@ class PreloadContext { int register_internal_fd(int fd); void unregister_internal_fd(int fd); bool is_internal_fd(int fd) const; + + void protect_user_fds(); + void unprotect_user_fds(); }; diff --git a/src/client/logging.cpp b/src/client/logging.cpp index d0f73ea9a..ab9ca1111 100644 --- a/src/client/logging.cpp +++ b/src/client/logging.cpp @@ -192,15 +192,8 @@ logger::logger(const std::string& opts, bool trunc) : timezone_(date::current_zone()) { - /* use stderr (dup()ed to an internal fd) by default */ - log_fd_ = ::dup(2); - - if(log_fd_ == -1) { - log(gkfs::log::error, __func__, __LINE__, "Failed to dup stderr. " - "Logging will fall back to normal stderr", path); - log_fd_ = 2; - } - + /* use stderr by default */ + log_fd_ = 2; log_mask_ = process_log_options(opts); if(!path.empty()) { @@ -212,7 +205,8 @@ logger::logger(const std::string& opts, // we use ::open() here rather than ::syscall_no_intercept(SYS_open) // because we want the call to be intercepted by our hooks, which - // allows us to categorize the resulting fd as 'internal' + // allows us to categorize the resulting fd as 'internal' and + // relocate it to our private range int fd = ::open(path.c_str(), flags, 0600); if(fd == -1) { diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 2efb37201..547c97ab4 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -198,16 +198,28 @@ void init_preload() { start_self_interception(); CTX->init_logging(); - // from here ownwards it is safe to print messages LOG(DEBUG, "Logging subsystem initialized"); + // Kernel modules such as ib_uverbs may create fds in kernel space and pass + // them to user-space processes using ioctl()-like interfaces. if this + // happens during our internal initialization, there's no way for us to + // control this creation and the fd will be created in the + // [0, MAX_USER_FDS) range rather than in our private + // [MAX_USER_FDS, MAX_OPEN_FDS) range. To prevent this for our internal + // initialization code, we forcefully occupy the user fd range to force + // such modules to create fds in our private range. + CTX->protect_user_fds(); + log_prog_name(); init_cwd(); LOG(DEBUG, "Current working directory: '{}'", CTX->cwd()); init_ld_env_if_needed(); CTX->enable_interception(); + + CTX->unprotect_user_fds(); + start_interception(); } diff --git a/src/client/preload_context.cpp b/src/client/preload_context.cpp index 2fe1109f1..b63c0f3cf 100644 --- a/src/client/preload_context.cpp +++ b/src/client/preload_context.cpp @@ -28,12 +28,17 @@ #include #include +decltype(PreloadContext::MIN_INTERNAL_FD) constexpr +PreloadContext::MIN_INTERNAL_FD; +decltype(PreloadContext::MAX_USER_FDS) constexpr +PreloadContext::MAX_USER_FDS; PreloadContext::PreloadContext(): ofm_(std::make_shared()), fs_conf_(std::make_shared()) { internal_fds_.set(); + internal_fds_must_relocate_ = true; } void @@ -192,17 +197,30 @@ bool PreloadContext::interception_enabled() const { int PreloadContext::register_internal_fd(int fd) { - LOG(DEBUG, "registering fd {} as internal", fd); - assert(fd >= 0); + if(!internal_fds_must_relocate_) { + LOG(DEBUG, "registering fd {} as internal (no relocation needed)", fd); + assert(fd >= MIN_INTERNAL_FD); + internal_fds_.reset(fd - MIN_INTERNAL_FD); + return fd; + } + + LOG(DEBUG, "registering fd {} as internal (needs relocation)", fd); + std::lock_guard lock(internal_fds_mutex_); const int pos = internal_fds_._Find_first(); + + if(static_cast(pos) == internal_fds_.size()) { + throw std::runtime_error( +"Internal GekkoFS file descriptors exhausted, increase MAX_INTERNAL_FDS in " +"CMake, rebuild GekkoFS and try again."); + } internal_fds_.reset(pos); #if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) - long args[gkfs::syscall::MAX_ARGS]{fd, pos + INTERNAL_FD_BASE, O_CLOEXEC}; + long args[gkfs::syscall::MAX_ARGS]{fd, pos + MIN_INTERNAL_FD, O_CLOEXEC}; #endif LOG(SYSCALL, @@ -212,7 +230,7 @@ int PreloadContext::register_internal_fd(int fd) { SYS_dup3, args); const int ifd = - ::syscall_no_intercept(SYS_dup3, fd, pos + INTERNAL_FD_BASE, O_CLOEXEC); + ::syscall_no_intercept(SYS_dup3, fd, pos + MIN_INTERNAL_FD, O_CLOEXEC); LOG(SYSCALL, gkfs::syscall::from_internal_code | @@ -220,7 +238,7 @@ int PreloadContext::register_internal_fd(int fd) { gkfs::syscall::executed, SYS_dup3, args, ifd); - assert(::syscall_error_code(ifd) != -1); + assert(::syscall_error_code(ifd) == 0); #if !defined(GKFS_DISABLE_LOGGING) && defined(GKFS_DEBUG_BUILD) long args2[gkfs::syscall::MAX_ARGS]{fd}; @@ -244,7 +262,7 @@ int PreloadContext::register_internal_fd(int fd) { gkfs::syscall::executed, SYS_close, args2, rv); - LOG(DEBUG, " (fd {} reassigned to ifd {})", fd, ifd); + LOG(DEBUG, " (fd {} relocated to ifd {})", fd, ifd); return ifd; } @@ -253,9 +271,9 @@ void PreloadContext::unregister_internal_fd(int fd) { LOG(DEBUG, "unregistering internal fd {}", fd); - assert(fd >= INTERNAL_FD_BASE); + assert(fd >= MIN_INTERNAL_FD); - const auto pos = fd - INTERNAL_FD_BASE; + const auto pos = fd - MIN_INTERNAL_FD; std::lock_guard lock(internal_fds_mutex_); internal_fds_.set(pos); @@ -263,14 +281,60 @@ void PreloadContext::unregister_internal_fd(int fd) { bool PreloadContext::is_internal_fd(int fd) const { - if(fd < INTERNAL_FD_BASE) { + if(fd < MIN_INTERNAL_FD) { return false; } - const auto pos = fd - INTERNAL_FD_BASE; + const auto pos = fd - MIN_INTERNAL_FD; std::lock_guard lock(internal_fds_mutex_); return !internal_fds_.test(pos); } +void +PreloadContext::protect_user_fds() { + LOG(DEBUG, "Protecting application fds [{}, {}]", 0, MAX_USER_FDS - 1); + + const int nullfd = ::syscall_no_intercept(SYS_open, "/dev/null", O_RDONLY); + assert(::syscall_error_code(nullfd) == 0); + protected_fds_.set(nullfd); + + const auto fd_is_open = [](int fd) -> bool { + const int ret = ::syscall_no_intercept(SYS_fcntl, fd, F_GETFD); + return ::syscall_error_code(ret) == 0 || + ::syscall_error_code(ret) != EBADF; + }; + + for(int fd = 0; fd < MAX_USER_FDS; ++fd) { + if(fd_is_open(fd)) { + LOG(DEBUG, " fd {} was already in use, skipping", fd); + continue; + } + + const int ret = ::syscall_no_intercept(SYS_dup3, nullfd, fd, O_CLOEXEC); + assert(::syscall_error_code(ret) == 0); + protected_fds_.set(fd); + } + + internal_fds_must_relocate_ = false; +} + +void +PreloadContext::unprotect_user_fds() { + + for(std::size_t fd = 0; fd < protected_fds_.size(); ++fd) { + if(!protected_fds_[fd]) { + continue; + } + + const int ret = + ::syscall_error_code(::syscall_no_intercept(SYS_close, fd)); + + if(ret != 0) { + LOG(ERROR, "Failed to unprotect fd") + } + } + + internal_fds_must_relocate_ = true; +} -- GitLab From 9464ffd379d6eab4c0fab91a0e4643322feaf465 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Thu, 7 Nov 2019 17:51:56 +0100 Subject: [PATCH 58/71] Adding "none" to LIBGKFS_LOG disables previous flags --- src/client/logging.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/client/logging.cpp b/src/client/logging.cpp index ab9ca1111..c6457533f 100644 --- a/src/client/logging.cpp +++ b/src/client/logging.cpp @@ -140,6 +140,12 @@ process_log_options(const std::string gkfs_debug) { bool is_known = false; for(const auto& opt : debug_opts) { + + // none disables any future and previous flags observed + if(t == "none") { + return log::none; + } + if(t == opt.name_) { dm |= opt.mask_; is_known = true; -- GitLab From 473ef38e4059673ee09e18e376d4fdd4e587a06a Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Thu, 7 Nov 2019 23:26:29 +0100 Subject: [PATCH 59/71] Specific syscalls can now be removed from log output --- README.md | 5 + include/client/env.hpp | 15 +- include/client/logging.hpp | 15 +- include/client/syscalls/decoder.hpp | 4 +- include/client/syscalls/detail/syscall_info.h | 5 + include/client/syscalls/syscall.hpp | 12 +- src/client/logging.cpp | 46 +- src/client/preload_context.cpp | 8 +- src/client/syscalls/detail/syscall_info.c | 393 +++++++++++++++++- 9 files changed, 489 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index a960d97d9..4918f429c 100644 --- a/README.md +++ b/README.md @@ -169,6 +169,11 @@ The following modules are available: - `all`: All previous options combined. - `help`: Print a help message and exit. +When tracing sytem calls, specific syscalls can be removed from log messages by +setting the `LIBGKFS_LOG_SYSCALL_FILTER` environment variable. For instance, +setting it to `LIBGKFS_LOG_SYSCALL_FILTER=epoll_wait,epoll_create` will filter +out any log entries from the `epoll_wait()` and `epoll_create()` system calls. + Additionally, setting the `LIBGKFS_LOG_OUTPUT_TRUNC` environment variable with a value different from `0` will instruct the logging subsystem to truncate the file used for logging, rather than append to it. diff --git a/include/client/env.hpp b/include/client/env.hpp index 61f92981b..47e4b3692 100644 --- a/include/client/env.hpp +++ b/include/client/env.hpp @@ -22,11 +22,16 @@ namespace gkfs { namespace env { -static constexpr auto LOG = ADD_PREFIX("LOG"); -static constexpr auto LOG_OUTPUT = ADD_PREFIX("LOG_OUTPUT"); -static constexpr auto LOG_OUTPUT_TRUNC = ADD_PREFIX("LOG_OUTPUT_TRUNC"); -static constexpr auto CWD = ADD_PREFIX("CWD"); -static constexpr auto HOSTS_FILE = ADD_PREFIX("HOSTS_FILE"); +static constexpr auto LOG = ADD_PREFIX("LOG"); + +#ifdef GKFS_DEBUG_BUILD +static constexpr auto LOG_SYSCALL_FILTER = ADD_PREFIX("LOG_SYSCALL_FILTER"); +#endif + +static constexpr auto LOG_OUTPUT = ADD_PREFIX("LOG_OUTPUT"); +static constexpr auto LOG_OUTPUT_TRUNC = ADD_PREFIX("LOG_OUTPUT_TRUNC"); +static constexpr auto CWD = ADD_PREFIX("CWD"); +static constexpr auto HOSTS_FILE = ADD_PREFIX("HOSTS_FILE"); } // namespace env } // namespace gkfs diff --git a/include/client/logging.hpp b/include/client/logging.hpp index 9f682eec7..2fe5c8945 100644 --- a/include/client/logging.hpp +++ b/include/client/logging.hpp @@ -24,6 +24,10 @@ #include #include +#ifdef GKFS_DEBUG_BUILD +#include +#endif + namespace gkfs { namespace log { @@ -264,7 +268,11 @@ struct logger { logger(const std::string& opts, const std::string& path, - bool trunc); + bool trunc, +#ifdef GKFS_DEBUG_BUILD + const std::string& filter +#endif + ); ~logger(); @@ -397,6 +405,11 @@ struct logger { int log_fd_; log_level log_mask_; + +#ifdef GKFS_DEBUG_BUILD + std::bitset<512> filtered_syscalls_; +#endif + const date::time_zone * const timezone_; }; diff --git a/include/client/syscalls/decoder.hpp b/include/client/syscalls/decoder.hpp index cd69711bd..d7068962d 100644 --- a/include/client/syscalls/decoder.hpp +++ b/include/client/syscalls/decoder.hpp @@ -45,7 +45,7 @@ decode(FmtBuffer& buffer, detail::errno_saver _(errno); - const auto sc = lookup_syscall(syscall_number, argv); + const auto sc = lookup_by_number(syscall_number, argv); fmt::format_to(buffer, "{}(", sc.name()); @@ -71,7 +71,7 @@ decode(FmtBuffer& buffer, detail::errno_saver _(errno); - const auto sc = lookup_syscall(syscall_number, argv); + const auto sc = lookup_by_number(syscall_number, argv); fmt::format_to(buffer, "{}(", sc.name()); diff --git a/include/client/syscalls/detail/syscall_info.h b/include/client/syscalls/detail/syscall_info.h index 0ce0c6420..327cd9dbc 100644 --- a/include/client/syscalls/detail/syscall_info.h +++ b/include/client/syscalls/detail/syscall_info.h @@ -66,9 +66,14 @@ struct syscall_info { }; extern const struct syscall_info syscall_table[]; + extern const struct syscall_info* get_syscall_info(const long syscall_number, const long* argv); + +extern const struct syscall_info* +get_syscall_info_by_name(const char* syscall_name); + extern bool syscall_never_returns(long); diff --git a/include/client/syscalls/syscall.hpp b/include/client/syscalls/syscall.hpp index 7865dd608..8bd0b1a66 100644 --- a/include/client/syscalls/syscall.hpp +++ b/include/client/syscalls/syscall.hpp @@ -62,21 +62,27 @@ struct descriptor : private ::syscall_info { }; static inline descriptor -lookup_syscall(const long syscall_number) { +lookup_by_number(const long syscall_number) { const auto* info = ::get_syscall_info(syscall_number, nullptr); return *reinterpret_cast(info); } static inline descriptor -lookup_syscall(const long syscall_number, +lookup_by_number(const long syscall_number, const long argv[MAX_ARGS]) { const auto* info = ::get_syscall_info(syscall_number, argv); return *reinterpret_cast(info); } +static inline descriptor +lookup_by_name(const std::string syscall_name) { + const auto* info = ::get_syscall_info_by_name(syscall_name.c_str()); + return *reinterpret_cast(info); +} + static inline bool never_returns(const long syscall_number) { - const auto desc = lookup_syscall(syscall_number); + const auto desc = lookup_by_number(syscall_number); return desc.return_type() == ret::none; } diff --git a/src/client/logging.cpp b/src/client/logging.cpp index c6457533f..19a2fe594 100644 --- a/src/client/logging.cpp +++ b/src/client/logging.cpp @@ -192,16 +192,54 @@ process_log_options(const std::string gkfs_debug) { return dm; } +#ifdef GKFS_DEBUG_BUILD +std::bitset<512> +process_log_filter(const std::string& log_filter) { + + std::bitset<512> filtered_syscalls; + std::vector tokens; + + if(log_filter.empty()) { + return filtered_syscalls; + } + + // skip separating white spaces and commas + boost::split(tokens, log_filter, + [](char c) { return c == ' ' || c == ','; }); + + for(const auto& t : tokens) { + const auto sc = syscall::lookup_by_name(t); + + if(std::strcmp(sc.name(), "unknown_syscall") == 0) { + logger::log_message(stdout, "warning: system call '{}' unknown; " + "will not filter", t); + continue; + } + + filtered_syscalls.set(sc.number()); + } + + return filtered_syscalls; +} +#endif // GKFS_DEBUG_BUILD logger::logger(const std::string& opts, const std::string& path, - bool trunc) : + bool trunc, +#ifdef GKFS_DEBUG_BUILD + const std::string& filter +#endif + ) : timezone_(date::current_zone()) { /* use stderr by default */ log_fd_ = 2; log_mask_ = process_log_options(opts); +#ifdef GKFS_DEBUG_BUILD + filtered_syscalls_ = process_log_filter(filter); +#endif + if(!path.empty()) { int flags = O_CREAT | O_RDWR | O_APPEND | O_TRUNC; @@ -265,6 +303,12 @@ logger::log_syscall(syscall::info info, return; } +#ifdef GKFS_DEBUG_BUILD + if(filtered_syscalls_[syscall_number]) { + return; + } +#endif + // log the syscall even if we don't have information on it, since it may // be important to the user (we assume that the syscall has completed // though) diff --git a/src/client/preload_context.cpp b/src/client/preload_context.cpp index b63c0f3cf..485a3b212 100644 --- a/src/client/preload_context.cpp +++ b/src/client/preload_context.cpp @@ -50,12 +50,18 @@ PreloadContext::init_logging() { const std::string log_output = gkfs::env::get_var(gkfs::env::LOG_OUTPUT, DEFAULT_CLIENT_LOG_PATH); +#ifdef GKFS_DEBUG_BUILD + const std::string log_filter = + gkfs::env::get_var(gkfs::env::LOG_SYSCALL_FILTER, ""); +#endif + const std::string trunc_val = gkfs::env::get_var(gkfs::env::LOG_OUTPUT_TRUNC); const bool log_trunc = !(!trunc_val.empty() && trunc_val[0] == 0); - gkfs::log::create_global_logger(log_opts, log_output, log_trunc); + gkfs::log::create_global_logger(log_opts, log_output, + log_trunc, log_filter); } void PreloadContext::mountdir(const std::string& path) { diff --git a/src/client/syscalls/detail/syscall_info.c b/src/client/syscalls/detail/syscall_info.c index d5fb0e008..9f7008ebc 100644 --- a/src/client/syscalls/detail/syscall_info.c +++ b/src/client/syscalls/detail/syscall_info.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) @@ -420,7 +422,6 @@ const struct syscall_info syscall_table[] = { #endif // SYS_rseq }; - static const struct syscall_info unknown_syscall = { .s_name = "unknown_syscall", .s_nargs = MAX_SYSCALL_ARGS, @@ -510,6 +511,395 @@ get_syscall_info(const long syscall_number, return &syscall_table[syscall_number]; } +struct named_syscall_entry { + const char * s_name; + const struct syscall_info * s_info; +}; + +#define SYSCALL_BY_NAME(id) \ +{ \ + .s_name = #id, \ + .s_info = &syscall_table[SYS_##id] \ +} + +/** Linux syscalls ordered by name */ +const struct named_syscall_entry syscalls_by_name[] = { + SYSCALL_BY_NAME(_sysctl), + SYSCALL_BY_NAME(accept), + SYSCALL_BY_NAME(accept4), + SYSCALL_BY_NAME(access), + SYSCALL_BY_NAME(acct), + SYSCALL_BY_NAME(add_key), + SYSCALL_BY_NAME(adjtimex), + SYSCALL_BY_NAME(afs_syscall), + SYSCALL_BY_NAME(alarm), + SYSCALL_BY_NAME(arch_prctl), + SYSCALL_BY_NAME(bind), +#ifdef SYS_bpf + SYSCALL_BY_NAME(bpf), +#endif // SYS_bpf + SYSCALL_BY_NAME(brk), + SYSCALL_BY_NAME(capget), + SYSCALL_BY_NAME(capset), + SYSCALL_BY_NAME(chdir), + SYSCALL_BY_NAME(chmod), + SYSCALL_BY_NAME(chown), + SYSCALL_BY_NAME(chroot), + SYSCALL_BY_NAME(clock_adjtime), + SYSCALL_BY_NAME(clock_getres), + SYSCALL_BY_NAME(clock_gettime), + SYSCALL_BY_NAME(clock_nanosleep), + SYSCALL_BY_NAME(clock_settime), + SYSCALL_BY_NAME(clone), + SYSCALL_BY_NAME(close), + SYSCALL_BY_NAME(connect), + SYSCALL_BY_NAME(copy_file_range), + SYSCALL_BY_NAME(creat), + SYSCALL_BY_NAME(create_module), + SYSCALL_BY_NAME(delete_module), + SYSCALL_BY_NAME(dup), + SYSCALL_BY_NAME(dup2), + SYSCALL_BY_NAME(dup3), + SYSCALL_BY_NAME(epoll_create), + SYSCALL_BY_NAME(epoll_create1), + SYSCALL_BY_NAME(epoll_ctl), + SYSCALL_BY_NAME(epoll_ctl_old), + SYSCALL_BY_NAME(epoll_pwait), + SYSCALL_BY_NAME(epoll_wait), + SYSCALL_BY_NAME(epoll_wait_old), + SYSCALL_BY_NAME(eventfd), + SYSCALL_BY_NAME(eventfd2), + SYSCALL_BY_NAME(execve), +#ifdef SYS_execveat + SYSCALL_BY_NAME(execveat), +#endif // SYS_execveat + SYSCALL_BY_NAME(exit), + SYSCALL_BY_NAME(exit_group), + SYSCALL_BY_NAME(faccessat), + SYSCALL_BY_NAME(fadvise64), + SYSCALL_BY_NAME(fallocate), + SYSCALL_BY_NAME(fanotify_init), + SYSCALL_BY_NAME(fanotify_mark), + SYSCALL_BY_NAME(fchdir), + SYSCALL_BY_NAME(fchmod), + SYSCALL_BY_NAME(fchmodat), + SYSCALL_BY_NAME(fchown), + SYSCALL_BY_NAME(fchownat), + SYSCALL_BY_NAME(fcntl), + SYSCALL_BY_NAME(fdatasync), + SYSCALL_BY_NAME(fgetxattr), + SYSCALL_BY_NAME(finit_module), + SYSCALL_BY_NAME(flistxattr), + SYSCALL_BY_NAME(flock), + SYSCALL_BY_NAME(fork), + SYSCALL_BY_NAME(fremovexattr), + SYSCALL_BY_NAME(fsetxattr), + SYSCALL_BY_NAME(fstat), + SYSCALL_BY_NAME(fstatfs), + SYSCALL_BY_NAME(fsync), + SYSCALL_BY_NAME(ftruncate), + SYSCALL_BY_NAME(futex), + SYSCALL_BY_NAME(futimesat), + SYSCALL_BY_NAME(get_kernel_syms), + SYSCALL_BY_NAME(get_mempolicy), + SYSCALL_BY_NAME(get_robust_list), + SYSCALL_BY_NAME(get_thread_area), + SYSCALL_BY_NAME(getcpu), + SYSCALL_BY_NAME(getcwd), + SYSCALL_BY_NAME(getdents), + SYSCALL_BY_NAME(getdents64), + SYSCALL_BY_NAME(getegid), + SYSCALL_BY_NAME(geteuid), + SYSCALL_BY_NAME(getgid), + SYSCALL_BY_NAME(getgroups), + SYSCALL_BY_NAME(getitimer), + SYSCALL_BY_NAME(getpeername), + SYSCALL_BY_NAME(getpgid), + SYSCALL_BY_NAME(getpgrp), + SYSCALL_BY_NAME(getpid), + SYSCALL_BY_NAME(getpmsg), + SYSCALL_BY_NAME(getppid), + SYSCALL_BY_NAME(getpriority), + SYSCALL_BY_NAME(getrandom), + SYSCALL_BY_NAME(getresgid), + SYSCALL_BY_NAME(getresuid), + SYSCALL_BY_NAME(getrlimit), + SYSCALL_BY_NAME(getrusage), + SYSCALL_BY_NAME(getsid), + SYSCALL_BY_NAME(getsockname), + SYSCALL_BY_NAME(getsockopt), + SYSCALL_BY_NAME(gettid), + SYSCALL_BY_NAME(gettimeofday), + SYSCALL_BY_NAME(getuid), + SYSCALL_BY_NAME(getxattr), + SYSCALL_BY_NAME(init_module), + SYSCALL_BY_NAME(inotify_add_watch), + SYSCALL_BY_NAME(inotify_init), + SYSCALL_BY_NAME(inotify_init1), + SYSCALL_BY_NAME(inotify_rm_watch), + SYSCALL_BY_NAME(io_cancel), + SYSCALL_BY_NAME(io_destroy), + SYSCALL_BY_NAME(io_getevents), +#ifdef SYS_io_pgetevents + SYSCALL_BY_NAME(io_pgetevents), +#endif // SYS_io_pgetevents + SYSCALL_BY_NAME(io_setup), + SYSCALL_BY_NAME(io_submit), + SYSCALL_BY_NAME(ioctl), + SYSCALL_BY_NAME(ioperm), + SYSCALL_BY_NAME(iopl), + SYSCALL_BY_NAME(ioprio_get), + SYSCALL_BY_NAME(ioprio_set), + SYSCALL_BY_NAME(kcmp), + SYSCALL_BY_NAME(kexec_file_load), + SYSCALL_BY_NAME(kexec_load), + SYSCALL_BY_NAME(keyctl), + SYSCALL_BY_NAME(kill), + SYSCALL_BY_NAME(lchown), + SYSCALL_BY_NAME(lgetxattr), + SYSCALL_BY_NAME(link), + SYSCALL_BY_NAME(linkat), + SYSCALL_BY_NAME(listen), + SYSCALL_BY_NAME(listxattr), + SYSCALL_BY_NAME(llistxattr), + SYSCALL_BY_NAME(lookup_dcookie), + SYSCALL_BY_NAME(lremovexattr), + SYSCALL_BY_NAME(lseek), + SYSCALL_BY_NAME(lsetxattr), + SYSCALL_BY_NAME(lstat), + SYSCALL_BY_NAME(madvise), + SYSCALL_BY_NAME(mbind), +#ifdef SYS_membarrier + SYSCALL_BY_NAME(membarrier), +#endif // SYS_membarrier + SYSCALL_BY_NAME(memfd_create), + SYSCALL_BY_NAME(migrate_pages), + SYSCALL_BY_NAME(mincore), + SYSCALL_BY_NAME(mkdir), + SYSCALL_BY_NAME(mkdirat), + SYSCALL_BY_NAME(mknod), + SYSCALL_BY_NAME(mknodat), + SYSCALL_BY_NAME(mlock), +#ifdef SYS_mlock2 + SYSCALL_BY_NAME(mlock2), +#endif // SYS_mlock2 + SYSCALL_BY_NAME(mlockall), + SYSCALL_BY_NAME(mmap), + SYSCALL_BY_NAME(modify_ldt), + SYSCALL_BY_NAME(mount), + SYSCALL_BY_NAME(move_pages), + SYSCALL_BY_NAME(mprotect), + SYSCALL_BY_NAME(mq_getsetattr), + SYSCALL_BY_NAME(mq_notify), + SYSCALL_BY_NAME(mq_open), + SYSCALL_BY_NAME(mq_timedreceive), + SYSCALL_BY_NAME(mq_timedsend), + SYSCALL_BY_NAME(mq_unlink), + SYSCALL_BY_NAME(mremap), + SYSCALL_BY_NAME(msgctl), + SYSCALL_BY_NAME(msgget), + SYSCALL_BY_NAME(msgrcv), + SYSCALL_BY_NAME(msgsnd), + SYSCALL_BY_NAME(msync), + SYSCALL_BY_NAME(munlock), + SYSCALL_BY_NAME(munlockall), + SYSCALL_BY_NAME(munmap), + SYSCALL_BY_NAME(name_to_handle_at), + SYSCALL_BY_NAME(nanosleep), + SYSCALL_BY_NAME(newfstatat), + SYSCALL_BY_NAME(nfsservctl), + SYSCALL_BY_NAME(open), + SYSCALL_BY_NAME(open_by_handle_at), + SYSCALL_BY_NAME(openat), + SYSCALL_BY_NAME(pause), + SYSCALL_BY_NAME(perf_event_open), + SYSCALL_BY_NAME(personality), + SYSCALL_BY_NAME(pipe), + SYSCALL_BY_NAME(pipe2), + SYSCALL_BY_NAME(pivot_root), + SYSCALL_BY_NAME(poll), + SYSCALL_BY_NAME(ppoll), + SYSCALL_BY_NAME(prctl), + SYSCALL_BY_NAME(pread64), + SYSCALL_BY_NAME(preadv), +#ifdef SYS_preadv2 + SYSCALL_BY_NAME(preadv2), +#endif // SYS_preadv2 + SYSCALL_BY_NAME(pkey_mprotect), + SYSCALL_BY_NAME(pkey_alloc), + SYSCALL_BY_NAME(pkey_free), + SYSCALL_BY_NAME(prlimit64), + SYSCALL_BY_NAME(process_vm_readv), + SYSCALL_BY_NAME(process_vm_writev), + SYSCALL_BY_NAME(pselect6), + SYSCALL_BY_NAME(ptrace), + SYSCALL_BY_NAME(putpmsg), + SYSCALL_BY_NAME(pwrite64), + SYSCALL_BY_NAME(pwritev), +#ifdef SYS_pwritev2 + SYSCALL_BY_NAME(pwritev2), +#endif // SYS_pwritev2 + SYSCALL_BY_NAME(query_module), + SYSCALL_BY_NAME(quotactl), + SYSCALL_BY_NAME(read), + SYSCALL_BY_NAME(readahead), + SYSCALL_BY_NAME(readlink), + SYSCALL_BY_NAME(readlinkat), + SYSCALL_BY_NAME(readv), + SYSCALL_BY_NAME(reboot), + SYSCALL_BY_NAME(recvfrom), + SYSCALL_BY_NAME(recvmmsg), + SYSCALL_BY_NAME(recvmsg), + SYSCALL_BY_NAME(remap_file_pages), + SYSCALL_BY_NAME(removexattr), + SYSCALL_BY_NAME(rename), + SYSCALL_BY_NAME(renameat), + SYSCALL_BY_NAME(renameat2), + SYSCALL_BY_NAME(request_key), + SYSCALL_BY_NAME(restart_syscall), + SYSCALL_BY_NAME(rmdir), +#ifdef SYS_rseq + SYSCALL_BY_NAME(rseq), +#endif // SYS_rseq + SYSCALL_BY_NAME(rt_sigaction), + SYSCALL_BY_NAME(rt_sigpending), + SYSCALL_BY_NAME(rt_sigprocmask), + SYSCALL_BY_NAME(rt_sigqueueinfo), + SYSCALL_BY_NAME(rt_sigreturn), + SYSCALL_BY_NAME(rt_sigsuspend), + SYSCALL_BY_NAME(rt_sigtimedwait), + SYSCALL_BY_NAME(rt_tgsigqueueinfo), + SYSCALL_BY_NAME(sched_get_priority_max), + SYSCALL_BY_NAME(sched_get_priority_min), + SYSCALL_BY_NAME(sched_getaffinity), + SYSCALL_BY_NAME(sched_getattr), + SYSCALL_BY_NAME(sched_getparam), + SYSCALL_BY_NAME(sched_getscheduler), + SYSCALL_BY_NAME(sched_rr_get_interval), + SYSCALL_BY_NAME(sched_setaffinity), + SYSCALL_BY_NAME(sched_setattr), + SYSCALL_BY_NAME(sched_setparam), + SYSCALL_BY_NAME(sched_setscheduler), + SYSCALL_BY_NAME(sched_yield), + SYSCALL_BY_NAME(seccomp), + SYSCALL_BY_NAME(security), + SYSCALL_BY_NAME(select), + SYSCALL_BY_NAME(semctl), + SYSCALL_BY_NAME(semget), + SYSCALL_BY_NAME(semop), + SYSCALL_BY_NAME(semtimedop), + SYSCALL_BY_NAME(sendfile), + SYSCALL_BY_NAME(sendmmsg), + SYSCALL_BY_NAME(sendmsg), + SYSCALL_BY_NAME(sendto), + SYSCALL_BY_NAME(set_mempolicy), + SYSCALL_BY_NAME(set_robust_list), + SYSCALL_BY_NAME(set_thread_area), + SYSCALL_BY_NAME(set_tid_address), + SYSCALL_BY_NAME(setdomainname), + SYSCALL_BY_NAME(setfsgid), + SYSCALL_BY_NAME(setfsuid), + SYSCALL_BY_NAME(setgid), + SYSCALL_BY_NAME(setgroups), + SYSCALL_BY_NAME(sethostname), + SYSCALL_BY_NAME(setitimer), + SYSCALL_BY_NAME(setns), + SYSCALL_BY_NAME(setpgid), + SYSCALL_BY_NAME(setpriority), + SYSCALL_BY_NAME(setregid), + SYSCALL_BY_NAME(setresgid), + SYSCALL_BY_NAME(setresuid), + SYSCALL_BY_NAME(setreuid), + SYSCALL_BY_NAME(setrlimit), + SYSCALL_BY_NAME(setsid), + SYSCALL_BY_NAME(setsockopt), + SYSCALL_BY_NAME(settimeofday), + SYSCALL_BY_NAME(setuid), + SYSCALL_BY_NAME(setxattr), + SYSCALL_BY_NAME(shmat), + SYSCALL_BY_NAME(shmctl), + SYSCALL_BY_NAME(shmdt), + SYSCALL_BY_NAME(shmget), + SYSCALL_BY_NAME(shutdown), + SYSCALL_BY_NAME(sigaltstack), + SYSCALL_BY_NAME(signalfd), + SYSCALL_BY_NAME(signalfd4), + SYSCALL_BY_NAME(socket), + SYSCALL_BY_NAME(socketpair), + SYSCALL_BY_NAME(splice), + SYSCALL_BY_NAME(stat), + SYSCALL_BY_NAME(statfs), +#ifdef SYS_statx + SYSCALL_BY_NAME(statx), +#endif // SYS_statx + SYSCALL_BY_NAME(swapoff), + SYSCALL_BY_NAME(swapon), + SYSCALL_BY_NAME(symlink), + SYSCALL_BY_NAME(symlinkat), + SYSCALL_BY_NAME(sync), + SYSCALL_BY_NAME(sync_file_range), + SYSCALL_BY_NAME(syncfs), + SYSCALL_BY_NAME(sysfs), + SYSCALL_BY_NAME(sysinfo), + SYSCALL_BY_NAME(syslog), + SYSCALL_BY_NAME(tee), + SYSCALL_BY_NAME(tgkill), + SYSCALL_BY_NAME(time), + SYSCALL_BY_NAME(timer_create), + SYSCALL_BY_NAME(timer_delete), + SYSCALL_BY_NAME(timer_getoverrun), + SYSCALL_BY_NAME(timer_gettime), + SYSCALL_BY_NAME(timer_settime), + SYSCALL_BY_NAME(timerfd_create), + SYSCALL_BY_NAME(timerfd_gettime), + SYSCALL_BY_NAME(timerfd_settime), + SYSCALL_BY_NAME(times), + SYSCALL_BY_NAME(tkill), + SYSCALL_BY_NAME(truncate), + SYSCALL_BY_NAME(tuxcall), + SYSCALL_BY_NAME(umask), + SYSCALL_BY_NAME(umount2), + SYSCALL_BY_NAME(uname), + SYSCALL_BY_NAME(unlink), + SYSCALL_BY_NAME(unlinkat), + SYSCALL_BY_NAME(unshare), + SYSCALL_BY_NAME(uselib), + SYSCALL_BY_NAME(userfaultfd), + SYSCALL_BY_NAME(ustat), + SYSCALL_BY_NAME(utime), + SYSCALL_BY_NAME(utimensat), + SYSCALL_BY_NAME(utimes), + SYSCALL_BY_NAME(vfork), + SYSCALL_BY_NAME(vhangup), + SYSCALL_BY_NAME(vmsplice), + SYSCALL_BY_NAME(vserver), + SYSCALL_BY_NAME(wait4), + SYSCALL_BY_NAME(waitid), + SYSCALL_BY_NAME(write), + SYSCALL_BY_NAME(writev), +}; + +static int +compare_named_entries(const void *k, const void *e) { + const char* name = (const char*) k; + struct named_syscall_entry* entry = (struct named_syscall_entry*) e; + return strcmp(name, entry->s_name); +} + +const struct syscall_info * +get_syscall_info_by_name(const char* syscall_name) { + + struct named_syscall_entry* res = + bsearch(syscall_name, &syscalls_by_name[0], ARRAY_SIZE(syscalls_by_name), + sizeof(struct named_syscall_entry), compare_named_entries); + + if(res == NULL) { + return &unknown_syscall; + } + + return res->s_info; +} + #define RETURN_TYPE(scinfo) \ (scinfo)->s_return_type.r_type @@ -525,4 +915,5 @@ syscall_never_returns(long syscall_number) { #undef S_UARG #undef S_NARG #undef S_RET +#undef SYSCALL_BY_NAME #undef ARRAY_SIZE -- GitLab From 45657e43b2b51eb00de0fc7e535a0b1257bea0c0 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 18 Nov 2019 15:11:15 +0100 Subject: [PATCH 60/71] Ensure engine_options are initialized correctly Fixes #47 --- src/client/preload.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/client/preload.cpp b/src/client/preload.cpp index 547c97ab4..9503ee31c 100644 --- a/src/client/preload.cpp +++ b/src/client/preload.cpp @@ -96,7 +96,8 @@ bool init_hermes_client(const std::string& transport_prefix) { try { - hermes::engine_options opts; + hermes::engine_options opts{}; + #if USE_SHM opts |= hermes::use_auto_sm; #endif -- GitLab From e373501b285b2f0d1340f32ae1f79dc2bbbe27b5 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 18 Nov 2019 15:12:32 +0100 Subject: [PATCH 61/71] Update Hermes submodule --- external/hermes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/hermes b/external/hermes index 2e578554d..66a421d09 160000 --- a/external/hermes +++ b/external/hermes @@ -1 +1 @@ -Subproject commit 2e578554d52d734eec83b5def2602dde7b6ce570 +Subproject commit 66a421d0994fa3e2861e1a7a5dafccf9f0bdeb36 -- GitLab From ab6be5f86423ea245b8fb6efe65c28800e358754 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 18 Nov 2019 15:31:31 +0100 Subject: [PATCH 62/71] Enable SM in Mercury deps for testing --- scripts/compile_dep.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/compile_dep.sh b/scripts/compile_dep.sh index a3d4edd9b..c10b0f1b8 100755 --- a/scripts/compile_dep.sh +++ b/scripts/compile_dep.sh @@ -298,10 +298,10 @@ if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "mercury" ) ]]; then prepare_build_dir ${CURR} cd ${CURR}/build PKG_CONFIG_PATH=${INSTALL}/lib/pkgconfig $CMAKE \ - -DCMAKE_BUILD_TYPE:STRING=Debug \ + -DCMAKE_BUILD_TYPE:STRING=Release \ -DBUILD_TESTING:BOOL=ON \ - -DMERCURY_USE_SM_ROUTING:BOOL=OFF \ - -DMERCURY_USE_SELF_FORWARD:BOOL=OFF \ + -DMERCURY_USE_SM_ROUTING:BOOL=ON \ + -DMERCURY_USE_SELF_FORWARD:BOOL=ON \ -DMERCURY_USE_CHECKSUMS:BOOL=OFF \ -DMERCURY_USE_BOOST_PP:BOOL=ON \ -DMERCURY_USE_EAGER_BULK:BOOL=ON \ -- GitLab From a6f96df02fec4e46cdf0c088b7281b303cc4bde2 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 19 Nov 2019 11:58:49 +0100 Subject: [PATCH 63/71] Update hermes submodule --- external/hermes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/hermes b/external/hermes index 66a421d09..32356080b 160000 --- a/external/hermes +++ b/external/hermes @@ -1 +1 @@ -Subproject commit 66a421d0994fa3e2861e1a7a5dafccf9f0bdeb36 +Subproject commit 32356080bcb1b089957252e25ff3dceb715b572b -- GitLab From b06ed34593cdf20f314b2894a36f1fa4f5a4d46e Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 19 Nov 2019 12:41:05 +0100 Subject: [PATCH 64/71] Create CI artifacts if tests fail --- .gitlab-ci.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 2972085e0..276a18419 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -51,6 +51,7 @@ compile GekkoFS: -Wdev -Wdeprecate -DCMAKE_BUILD_TYPE=Debug + -DRPC_PROTOCOL="ofi+sockets" -DCMAKE_PREFIX_PATH=${DEPS_INSTALL_PATH} -DCMAKE_INSTALL_PREFIX=${INSTALL_PATH} ${CI_PROJECT_DIR} @@ -79,6 +80,7 @@ test wr: - sleep 4 - LD_PRELOAD=${INSTALL_PATH}/lib/libgkfs_intercept.so ${TESTS_BUILD_PATH}/gkfs_test_wr artifacts: + when: on_failure paths: - "${LOG_PATH}" @@ -90,6 +92,7 @@ test directories: - sleep 4 - LD_PRELOAD=${INSTALL_PATH}/lib/libgkfs_intercept.so ${TESTS_BUILD_PATH}/gkfs_test_dir artifacts: + when: on_failure paths: - "${LOG_PATH}" @@ -101,6 +104,7 @@ test truncate: - sleep 4 - LD_PRELOAD=${INSTALL_PATH}/lib/libgkfs_intercept.so ${TESTS_BUILD_PATH}/gkfs_test_truncate artifacts: + when: on_failure paths: - "${LOG_PATH}" @@ -112,6 +116,7 @@ test path resolution: - sleep 4 - LD_PRELOAD=${INSTALL_PATH}/lib/libgkfs_intercept.so ${TESTS_BUILD_PATH}/gkfs_test_path_resolution artifacts: + when: on_failure paths: - "${LOG_PATH}" @@ -123,6 +128,6 @@ test lseek: - sleep 4 - LD_PRELOAD=${INSTALL_PATH}/lib/libgkfs_intercept.so ${TESTS_BUILD_PATH}/gkfs_test_lseek artifacts: + when: on_failure paths: - "${LOG_PATH}" - -- GitLab From 1775272935147d359d4258ae10748133a906c41a Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Thu, 21 Nov 2019 11:06:18 +0100 Subject: [PATCH 65/71] Additional debug messages --- src/client/preload_util.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/client/preload_util.cpp b/src/client/preload_util.cpp index 155a9a821..32dd3ede3 100644 --- a/src/client/preload_util.cpp +++ b/src/client/preload_util.cpp @@ -191,6 +191,8 @@ void load_hosts() { CTX->local_host_id(id); local_host_found = true; } + + LOG(DEBUG, "Found peer: {}", addrs[id].to_string()); } if (!local_host_found) { -- GitLab From 579997533b991c44bfe63531ce333953d06cd891 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Thu, 21 Nov 2019 11:11:22 +0100 Subject: [PATCH 66/71] Update Hermes submodule --- external/hermes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/hermes b/external/hermes index 32356080b..0c62b3319 160000 --- a/external/hermes +++ b/external/hermes @@ -1 +1 @@ -Subproject commit 32356080bcb1b089957252e25ff3dceb715b572b +Subproject commit 0c62b3319e660a5a30d0ad24a8ddaec8924b6388 -- GitLab From 6a21e250da3e03b13c5491f86e5158f1fffbf1ce Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Thu, 21 Nov 2019 11:07:37 +0100 Subject: [PATCH 67/71] Fix plugins if building standalone Mercury --- scripts/compile_dep.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scripts/compile_dep.sh b/scripts/compile_dep.sh index c10b0f1b8..17847159c 100755 --- a/scripts/compile_dep.sh +++ b/scripts/compile_dep.sh @@ -293,6 +293,15 @@ fi # Mercury if [[ ( "${DEPENDENCY}" == "" ) || ( "${DEPENDENCY}" == "mercury" ) ]]; then + + if [ "$NA_LAYER" == "bmi" ] || [ "$NA_LAYER" == "all" ]; then + USE_BMI="-DNA_USE_BMI:BOOL=ON" + fi + + if [ "$NA_LAYER" == "ofi" ] || [ "$NA_LAYER" == "all" ]; then + USE_OFI="-DNA_USE_OFI:BOOL=ON" + fi + echo "############################################################ Installing: Mercury" CURR=${SOURCE}/mercury prepare_build_dir ${CURR} -- GitLab From e65f4053cc3550e923f24c0be613c46bd3eff99e Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Thu, 21 Nov 2019 16:47:46 +0100 Subject: [PATCH 68/71] Handle eventfd/eventfd2 in internal syscalls Fixes #63. --- src/client/intercept.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/client/intercept.cpp b/src/client/intercept.cpp index d00896c90..1d2afeba6 100644 --- a/src/client/intercept.cpp +++ b/src/client/intercept.cpp @@ -282,6 +282,27 @@ hook_internal(long syscall_number, break; + case SYS_eventfd: + + *result = syscall_no_intercept(syscall_number, + static_cast(arg0)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + break; + + case SYS_eventfd2: + + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + static_cast(arg1)); + + if(*result >= 0) { + *result = CTX->register_internal_fd(*result); + } + break; + case SYS_accept: *result = syscall_no_intercept(syscall_number, static_cast(arg0), -- GitLab From 4f45961831bbdf4b069b67373354178174b6ec31 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Thu, 21 Nov 2019 16:56:44 +0100 Subject: [PATCH 69/71] Handle extra internal fds passed ini recvmsg() Fixes #64. --- src/client/intercept.cpp | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/client/intercept.cpp b/src/client/intercept.cpp index 1d2afeba6..a0b6309ce 100644 --- a/src/client/intercept.cpp +++ b/src/client/intercept.cpp @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include @@ -303,6 +305,43 @@ hook_internal(long syscall_number, } break; + case SYS_recvmsg: + { + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + reinterpret_cast(arg1), + static_cast(arg2)); + + // The recvmsg() syscall can receive file descriptors from another + // process that the kernel automatically adds to the client's fds + // as if dup2 had been called. Whenever that happens, we need to + // make sure that we register these additional fds as internal, or + // we could inadvertently overwrite them + if(*result >= 0) { + auto* hdr = reinterpret_cast(arg1); + struct cmsghdr* cmsg = CMSG_FIRSTHDR(hdr); + + for(; cmsg != NULL; cmsg = CMSG_NXTHDR(hdr, cmsg)) { + if(cmsg->cmsg_type == SCM_RIGHTS) { + + size_t nfd = cmsg->cmsg_len > CMSG_LEN(0) ? + (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int) : + 0; + + const int* fds = + reinterpret_cast(CMSG_DATA(cmsg)); + + for(size_t i = 0; i < nfd; ++i) { + LOG(DEBUG, "recvmsg() provided extra fd {}", fds[i]); + CTX->register_internal_fd(fds[i]); + } + } + } + } + + break; + } + case SYS_accept: *result = syscall_no_intercept(syscall_number, static_cast(arg0), -- GitLab From 8acecff400f3aaf9b6017f81f098e1683247c088 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Fri, 22 Nov 2019 07:48:56 +0100 Subject: [PATCH 70/71] Handle internal fds created by fcntl() Fixes #65. --- src/client/intercept.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/client/intercept.cpp b/src/client/intercept.cpp index a0b6309ce..ef7b70b02 100644 --- a/src/client/intercept.cpp +++ b/src/client/intercept.cpp @@ -353,6 +353,18 @@ hook_internal(long syscall_number, } break; + case SYS_fcntl: + *result = syscall_no_intercept(syscall_number, + static_cast(arg0), + static_cast(arg1), + arg2); + + if(*result >= 0 && + (static_cast(arg1) == F_DUPFD || + static_cast(arg1) == F_DUPFD_CLOEXEC)) { + *result = CTX->register_internal_fd(*result); + } + break; case SYS_close: *result = syscall_no_intercept(syscall_number, -- GitLab From 9c423aecbe0139abd345cce11e886ccc03137f84 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Fri, 22 Nov 2019 07:50:27 +0100 Subject: [PATCH 71/71] Ensure fds in CMSG_DATA are relocated if needed --- src/client/intercept.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/client/intercept.cpp b/src/client/intercept.cpp index ef7b70b02..f4da38937 100644 --- a/src/client/intercept.cpp +++ b/src/client/intercept.cpp @@ -328,12 +328,15 @@ hook_internal(long syscall_number, (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int) : 0; - const int* fds = + int* fds = reinterpret_cast(CMSG_DATA(cmsg)); for(size_t i = 0; i < nfd; ++i) { LOG(DEBUG, "recvmsg() provided extra fd {}", fds[i]); - CTX->register_internal_fd(fds[i]); + + // ensure we update the fds in cmsg + // if they have been relocated + fds[i] = CTX->register_internal_fd(fds[i]); } } } -- GitLab