From 188e436f8e9e69932b4affc2a12115defc68b2ea Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Sat, 4 Feb 2023 10:52:44 +0100 Subject: [PATCH 01/23] CMake: Add mochi-thallium as new dependency --- CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 118c19e2..f0c48487 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -189,6 +189,10 @@ find_package(Argobots 1.1 REQUIRED) message(STATUS "[${PROJECT_NAME}] Checking for Margo") find_package(Margo 0.9.6 REQUIRED) +### Thallium +message(STATUS "[${PROJECT_NAME}] Checking for Thallium") +find_package(Thallium REQUIRED) + ### {fmt}: required for sensible output formatting message(STATUS "[${PROJECT_NAME}] Downloading and building {fmt}") FetchContent_Declare( -- GitLab From 31682a7035c63ec216e8caa38db5b8c0e8a787aa Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Sat, 4 Feb 2023 10:59:32 +0100 Subject: [PATCH 02/23] rpc_server: Use thallium internally and refactor interface --- examples/cxx/CMakeLists.txt | 2 +- src/common/CMakeLists.txt | 7 +- src/common/net/CMakeLists.txt | 17 ++--- src/common/net/client.cpp | 59 ++++++++++++++++ src/common/net/client.hpp | 50 +++++++++++++ src/common/net/endpoint.cpp | 40 +++++++++++ src/common/net/endpoint.hpp | 82 +++++++++++++++++++++ src/common/net/server.cpp | 129 ++++++++++------------------------ src/common/net/server.hpp | 70 ++++++++---------- src/lib/CMakeLists.txt | 3 +- src/lib/detail/impl.cpp | 4 +- src/scord-ctl/scord-ctl.cpp | 5 +- src/scord/scord.cpp | 6 +- 13 files changed, 324 insertions(+), 150 deletions(-) create mode 100644 src/common/net/client.cpp create mode 100644 src/common/net/client.hpp create mode 100644 src/common/net/endpoint.cpp create mode 100644 src/common/net/endpoint.hpp diff --git a/examples/cxx/CMakeLists.txt b/examples/cxx/CMakeLists.txt index 7c9d6f3c..24d14c57 100644 --- a/examples/cxx/CMakeLists.txt +++ b/examples/cxx/CMakeLists.txt @@ -53,7 +53,7 @@ foreach(example IN LISTS examples_cxx) add_executable(${example}_cxx) target_sources(${example}_cxx PRIVATE ${example}.cpp) target_link_libraries(${example}_cxx - PUBLIC common::network::engine fmt::fmt adm_iosched cxx_examples_common) + PUBLIC fmt::fmt adm_iosched cxx_examples_common) set_target_properties(${example}_cxx PROPERTIES OUTPUT_NAME ${example}) endforeach() diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 01004e96..1ce1ad4b 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -38,13 +38,12 @@ target_include_directories(_logger INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) add_library(common::logger ALIAS _logger) add_subdirectory(net) -target_include_directories(_network_engine INTERFACE - ${CMAKE_CURRENT_SOURCE_DIR}) -add_library(common::network::engine ALIAS _network_engine) target_include_directories(_rpc_server INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) add_library(common::network::rpc_server ALIAS _rpc_server) - +target_include_directories(_rpc_client INTERFACE + ${CMAKE_CURRENT_SOURCE_DIR}) +add_library(common::network::rpc_client ALIAS _rpc_client) target_include_directories(_rpc_types INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/src/common/net/CMakeLists.txt b/src/common/net/CMakeLists.txt index 63049547..3e987198 100644 --- a/src/common/net/CMakeLists.txt +++ b/src/common/net/CMakeLists.txt @@ -22,18 +22,15 @@ # SPDX-License-Identifier: GPL-3.0-or-later # ################################################################################ -add_library(_network_engine STATIC) +add_library(_rpc_client STATIC) target_sources( - _network_engine - INTERFACE engine.hpp - PRIVATE detail/address.hpp + _rpc_client + INTERFACE endpoint.hpp client.hpp request.hpp + PRIVATE endpoint.cpp client.cpp ) -target_link_libraries( - _network_engine PUBLIC common::logger transport_library Mercury::Mercury - Argobots::Argobots Margo::Margo -) -set_property(TARGET _network_engine PROPERTY POSITION_INDEPENDENT_CODE ON) +target_link_libraries(_rpc_client PUBLIC common::config common::logger thallium) +set_property(TARGET _rpc_client PROPERTY POSITION_INDEPENDENT_CODE ON) add_library(_rpc_server STATIC) target_sources( @@ -42,6 +39,6 @@ target_sources( PRIVATE server.cpp ) -target_link_libraries(_rpc_server PUBLIC common::config _network_engine) +target_link_libraries(_rpc_server PUBLIC common::config common::logger thallium) add_subdirectory(proto) diff --git a/src/common/net/client.cpp b/src/common/net/client.cpp new file mode 100644 index 00000000..6617beac --- /dev/null +++ b/src/common/net/client.cpp @@ -0,0 +1,59 @@ +/****************************************************************************** + * Copyright 2021-2023, Barcelona Supercomputing Center (BSC), Spain + * + * This software was partially supported by the EuroHPC-funded project ADMIRE + * (Project ID: 956748, https://www.admire-eurohpc.eu). + * + * This file is part of scord. + * + * scord is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * scord is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with scord. If not, see . + * + * SPDX-License-Identifier: GPL-3.0-or-later + *****************************************************************************/ + +#include +#include +#include "client.hpp" +#include "endpoint.hpp" + +using namespace std::literals; + +namespace scord::network { + + +client::client(const std::string& protocol) + : m_engine(std::make_shared(protocol, + THALLIUM_CLIENT_MODE)) {} + +std::optional +client::lookup(const std::string& address) noexcept { + try { + return endpoint{m_engine, m_engine->lookup(address)}; + } catch(const std::exception& ex) { + LOGGER_ERROR("client::lookup() failed: {}", ex.what()); + return std::nullopt; + } +} + +std::string +client::self_address() const noexcept { + try { + return m_engine->self(); + } catch(const std::exception& ex) { + LOGGER_ERROR("client::self_address() failed: {}", ex.what()); + return "unknown"s; + } +} + +} // namespace scord::network diff --git a/src/common/net/client.hpp b/src/common/net/client.hpp new file mode 100644 index 00000000..c5877350 --- /dev/null +++ b/src/common/net/client.hpp @@ -0,0 +1,50 @@ +/****************************************************************************** + * Copyright 2021-2023, Barcelona Supercomputing Center (BSC), Spain + * + * This software was partially supported by the EuroHPC-funded project ADMIRE + * (Project ID: 956748, https://www.admire-eurohpc.eu). + * + * This file is part of scord. + * + * scord is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * scord is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with scord. If not, see . + * + * SPDX-License-Identifier: GPL-3.0-or-later + *****************************************************************************/ + +#ifndef SCORD_CLIENT_HPP +#define SCORD_CLIENT_HPP + +#include +#include + +namespace scord::network { + +class endpoint; + +class client { + +public: + explicit client(const std::string& protocol); + std::optional + lookup(const std::string& address) noexcept; + std::string + self_address() const noexcept; + +private: + std::shared_ptr m_engine; +}; + +} // namespace scord::network + +#endif // SCORD_CLIENT_HPP diff --git a/src/common/net/endpoint.cpp b/src/common/net/endpoint.cpp new file mode 100644 index 00000000..81ec5204 --- /dev/null +++ b/src/common/net/endpoint.cpp @@ -0,0 +1,40 @@ +/****************************************************************************** + * Copyright 2021-2023, Barcelona Supercomputing Center (BSC), Spain + * + * This software was partially supported by the EuroHPC-funded project ADMIRE + * (Project ID: 956748, https://www.admire-eurohpc.eu). + * + * This file is part of scord. + * + * scord is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * scord is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with scord. If not, see . + * + * SPDX-License-Identifier: GPL-3.0-or-later + *****************************************************************************/ + +#include "endpoint.hpp" + +#include + +namespace scord::network { + +endpoint::endpoint(std::shared_ptr engine, + thallium::endpoint endpoint) + : m_engine(std::move(engine)), m_endpoint(std::move(endpoint)) {} + +std::string +endpoint::address() const { + return m_endpoint; +} + +} // namespace scord::network diff --git a/src/common/net/endpoint.hpp b/src/common/net/endpoint.hpp new file mode 100644 index 00000000..4f0dee24 --- /dev/null +++ b/src/common/net/endpoint.hpp @@ -0,0 +1,82 @@ +/****************************************************************************** + * Copyright 2021-2023, Barcelona Supercomputing Center (BSC), Spain + * + * This software was partially supported by the EuroHPC-funded project ADMIRE + * (Project ID: 956748, https://www.admire-eurohpc.eu). + * + * This file is part of scord. + * + * scord is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * scord is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with scord. If not, see . + * + * SPDX-License-Identifier: GPL-3.0-or-later + *****************************************************************************/ + +#ifndef SCORD_ENDPOINT_HPP +#define SCORD_ENDPOINT_HPP + +#include +#include +#include + +namespace scord::network { + +class endpoint { + +public: + endpoint(std::shared_ptr engine, + thallium::endpoint endpoint); + + std::string + address() const; + + template + auto + call(const std::string& rpc_name, Args&&... args) const { + + // deduce the return type of the expression in the try-block below so + // that we know the type to return within std::optional + using rpc_function_type = + decltype(m_engine->define(std::declval())); + using rpc_return_type = decltype(std::declval().on( + m_endpoint)(std::forward(args)...)); + using return_type = std::optional; + + try { + const auto& rpc = m_engine->define(rpc_name); + const auto& rv = rpc.on(m_endpoint)(std::forward(args)...); + return return_type{rv}; + } catch(const std::exception& ex) { + LOGGER_ERROR("endpoint::call() failed: {}", ex.what()); + return return_type{}; + } + } + + auto + endp() const { + return m_endpoint; + } + + auto + engine() const { + return m_engine; + } + +private: + std::shared_ptr m_engine; + thallium::endpoint m_endpoint; +}; + +} // namespace scord::network + +#endif // SCORD_ENDPOINT_HPP diff --git a/src/common/net/server.cpp b/src/common/net/server.cpp index 99293371..533ba698 100644 --- a/src/common/net/server.cpp +++ b/src/common/net/server.cpp @@ -40,12 +40,11 @@ #include #include #include -#include "engine.hpp" #include "server.hpp" -namespace scord { +using namespace std::literals; -server::server() : m_settings(std::make_unique()) {} +namespace scord::network { server::~server() = default; @@ -143,7 +142,7 @@ server::daemonize() { */ int pfd; - if((pfd = ::open(m_settings->pidfile().c_str(), O_RDWR | O_CREAT, 0640)) == + if((pfd = ::open(m_settings.pidfile().c_str(), O_RDWR | O_CREAT, 0640)) == -1) { LOGGER_ERRNO("Failed to create daemon lock file"); exit(EXIT_FAILURE); @@ -178,14 +177,9 @@ server::daemonize() { return 0; } -void -server::configure(const config::settings& settings) { - m_settings = std::make_unique(settings); -} - config::settings server::get_configuration() const { - return *m_settings; + return m_settings; } void @@ -215,16 +209,15 @@ server::signal_handler(int signum) { void server::init_logger() { - if(m_settings->use_console()) { - logger::create_global_logger(m_settings->progname(), "console color"); + if(m_settings.use_console()) { + logger::create_global_logger(m_settings.progname(), "console color"); return; - ; } - if(m_settings->use_syslog()) { - logger::create_global_logger(m_settings->progname(), "syslog"); + if(m_settings.use_syslog()) { + logger::create_global_logger(m_settings.progname(), "syslog"); - if(!m_settings->daemonize()) { + if(!m_settings.daemonize()) { fmt::print(stderr, "PSA: Output sent to syslog while in " "non-daemon mode\n"); } @@ -232,13 +225,13 @@ server::init_logger() { return; } - if(!m_settings->log_file().empty()) { - logger::create_global_logger(m_settings->progname(), "file", - m_settings->log_file()); + if(!m_settings.log_file().empty()) { + logger::create_global_logger(m_settings.progname(), "file", + m_settings.log_file()); return; } - logger::create_global_logger(m_settings->progname(), "console color"); + logger::create_global_logger(m_settings.progname(), "console color"); } void @@ -246,58 +239,22 @@ server::install_signal_handlers() { LOGGER_INFO(" * Installing signal handlers..."); - m_signal_listener = std::make_unique(); - - m_signal_listener->set_handler(std::bind(&server::signal_handler, // NOLINT - this, std::placeholders::_1), - SIGHUP, SIGTERM, SIGINT); + m_signal_listener.set_handler(std::bind(&server::signal_handler, // NOLINT + this, std::placeholders::_1), + SIGHUP, SIGTERM, SIGINT); // This call does not block. Instead, it starts an internal std::thread // responsible for processing incoming signals - m_signal_listener->run(); -} - -void -server::install_rpc_handlers() { - - LOGGER_INFO(" * Creating RPC listener..."); - - // create (but not start) the API listener - // and register handlers for each request type - m_network_engine = std::make_unique( - m_settings->transport_protocol(), m_settings->bind_address(), - m_settings->remote_port()); - - if(m_rpc_registration_callback) { - m_rpc_registration_callback(m_network_engine); - } + m_signal_listener.run(); } void -server::check_configuration() { - - // // check that the staging directory exists and that we can write to it - // if(!fs::exists(m_settings->staging_directory())) { - // LOGGER_ERROR("Staging directory {} does not exist", - // m_settings->staging_directory()); - // teardown_and_exit(); - // } - - // auto s = fs::status(m_settings->staging_directory()); - // - // auto expected_perms = fs::perms::owner_read | fs::perms::owner_write; - // - // if((s.permissions() & expected_perms) != expected_perms) { - // LOGGER_ERROR("Unable to read from/write to staging directory {}", - // m_settings->staging_directory()); - // teardown_and_exit(); - // } -} +server::check_configuration() {} void server::print_greeting() { const auto greeting = fmt::format("Starting {} daemon (pid {})", - m_settings->progname(), getpid()); + m_settings.progname(), getpid()); LOGGER_INFO("{:=>{}}", "", greeting.size()); LOGGER_INFO(greeting); @@ -309,28 +266,26 @@ server::print_configuration() { LOGGER_INFO(""); LOGGER_INFO("[[ Configuration ]]"); LOGGER_INFO(" - running as daemon?: {}", - (m_settings->daemonize() ? "yes" : "no")); + (m_settings.daemonize() ? "yes" : "no")); - if(!m_settings->log_file().empty()) { - LOGGER_INFO(" - log file: {}", m_settings->log_file()); + if(!m_settings.log_file().empty()) { + LOGGER_INFO(" - log file: {}", m_settings.log_file()); LOGGER_INFO(" - log file maximum size: {}", - m_settings->log_file_max_size()); + m_settings.log_file_max_size()); } else { LOGGER_INFO(" - log file: none"); } - LOGGER_INFO(" - pidfile: {}", m_settings->pidfile()); - // LOGGER_INFO(" - staging directory: {}", - // m_settings->staging_directory()); - LOGGER_INFO(" - port for remote requests: {}", m_settings->remote_port()); - LOGGER_INFO(" - workers: {}", m_settings->workers_in_pool()); + LOGGER_INFO(" - pidfile: {}", m_settings.pidfile()); + LOGGER_INFO(" - port for remote requests: {}", m_settings.remote_port()); + LOGGER_INFO(" - workers: {}", m_settings.workers_in_pool()); LOGGER_INFO(""); } void server::print_farewell() { const auto farewell = fmt::format("Stopping {} daemon (pid {})", - m_settings->progname(), getpid()); + m_settings.progname(), getpid()); LOGGER_INFO("{:=>{}}", "", farewell.size()); LOGGER_INFO(farewell); @@ -354,7 +309,7 @@ server::run() { #endif // daemonize if needed - if(m_settings->daemonize() && daemonize() != 0) { + if(m_settings.daemonize() && daemonize() != 0) { /* parent clean ups and exits, child continues */ teardown(); return EXIT_SUCCESS; @@ -367,14 +322,13 @@ server::run() { LOGGER_INFO("[[ Starting up ]]"); install_signal_handlers(); - install_rpc_handlers(); LOGGER_INFO(""); LOGGER_INFO("[[ Start up successful, awaiting requests... ]]"); // N.B. This call blocks here, which means that everything after it // will only run when a shutdown command is received - m_network_engine->listen(); + m_network_engine.wait_for_finalize(); print_farewell(); teardown(); @@ -388,22 +342,15 @@ server::run() { void server::teardown() { - if(m_signal_listener) { - LOGGER_INFO("* Stopping signal listener..."); - m_signal_listener->stop(); - } - - if(m_settings) { - std::error_code ec; + LOGGER_INFO("* Stopping signal listener..."); + m_signal_listener.stop(); - fs::remove(m_settings->pidfile(), ec); - - if(ec) { - LOGGER_ERROR("Failed to remove pidfile {}: {}", - m_settings->pidfile(), ec.message()); - } + std::error_code ec; + fs::remove(m_settings.pidfile(), ec); - m_settings.reset(); + if(ec) { + LOGGER_ERROR("Failed to remove pidfile {}: {}", m_settings.pidfile(), + ec.message()); } } @@ -415,7 +362,7 @@ server::teardown_and_exit() { void server::shutdown() { - m_network_engine->stop(); + m_network_engine.finalize(); } -} // namespace scord +} // namespace scord::network diff --git a/src/common/net/server.hpp b/src/common/net/server.hpp index 930cc847..05f6d708 100644 --- a/src/common/net/server.hpp +++ b/src/common/net/server.hpp @@ -1,5 +1,5 @@ /****************************************************************************** - * Copyright 2021, Barcelona Supercomputing Center (BSC), Spain + * Copyright 2021-2023, Barcelona Supercomputing Center (BSC), Spain * * This software was partially supported by the EuroHPC-funded project ADMIRE * (Project ID: 956748, https://www.admire-eurohpc.eu). @@ -25,36 +25,38 @@ #ifndef SCORD_SERVER_HPP #define SCORD_SERVER_HPP -#include -#include -#include "engine.hpp" +#include +#include +#include +#include +#include -namespace scord { +namespace scord::network { -namespace config { -struct settings; -} // namespace config - -namespace utils { -struct signal_listener; -} // namespace utils +using request = thallium::request; class server { public: - server(); - ~server(); - void - configure(const config::settings& settings); + template + explicit server(config::settings cfg, Handlers&&... handlers) + : m_settings(std::move(cfg)) { - template - void - configure(const config::settings& settings, - Callback rpc_registration_callback) { - configure(settings); - m_rpc_registration_callback = rpc_registration_callback; + using namespace std::literals; + + const std::string thallim_address = + m_settings.transport_protocol() + "://"s + + m_settings.bind_address() + ":"s + + std::to_string(m_settings.remote_port()); + + m_network_engine = + thallium::engine(thallim_address, THALLIUM_SERVER_MODE); + + (set_handler(std::forward(handlers)), ...); } + ~server(); + config::settings get_configuration() const; int @@ -66,16 +68,10 @@ public: void teardown_and_exit(); - template void - install_rpc_handlers(Callable fun) { - - install_rpc_handlers(); - - // FIXME: improve network_engine so that we don't need to rely on - // calling a lambda here to register RPCs - fun(m_network_engine); + set_handler(const std::string& name, Callable&& handler) { + m_network_engine.define(name, handler); } private: @@ -88,8 +84,7 @@ private: init_logger(); void install_signal_handlers(); - void - install_rpc_handlers(); + void check_configuration(); void @@ -100,14 +95,11 @@ private: print_farewell(); private: - std::unique_ptr m_settings; - std::unique_ptr m_network_engine; - std::unique_ptr m_signal_listener; - std::function&)> - m_rpc_registration_callback; + scord::config::settings m_settings; + thallium::engine m_network_engine; + scord::utils::signal_listener m_signal_listener; }; - -} // namespace scord +} // namespace scord::network #endif // SCORD_SERVER_HPP diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 69263c2a..e3817f51 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -32,7 +32,8 @@ set_target_properties(adm_iosched PROPERTIES PUBLIC_HEADER "admire.h;admire.hpp" target_include_directories(adm_iosched PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -target_link_libraries(adm_iosched PRIVATE common::network::engine +target_link_libraries(adm_iosched PRIVATE + common::network::rpc_client common::network::rpc_types PUBLIC tl::expected common::api::types) diff --git a/src/lib/detail/impl.cpp b/src/lib/detail/impl.cpp index bad2ab03..8017e7e0 100644 --- a/src/lib/detail/impl.cpp +++ b/src/lib/detail/impl.cpp @@ -23,7 +23,9 @@ *****************************************************************************/ #include -#include +#include +#include +#include #include #include #include diff --git a/src/scord-ctl/scord-ctl.cpp b/src/scord-ctl/scord-ctl.cpp index 7a930271..375efea5 100644 --- a/src/scord-ctl/scord-ctl.cpp +++ b/src/scord-ctl/scord-ctl.cpp @@ -176,7 +176,9 @@ main(int argc, char* argv[]) { } try { - scord::server daemon; + scord::network::server daemon(cfg); + +#if 0 const auto rpc_registration_cb = [](auto&& ctx) { LOGGER_INFO(" * Registering RPCs handlers..."); @@ -186,6 +188,7 @@ main(int argc, char* argv[]) { }; daemon.configure(cfg, rpc_registration_cb); +#endif return daemon.run(); } catch(const std::exception& ex) { fmt::print(stderr, diff --git a/src/scord/scord.cpp b/src/scord/scord.cpp index 77b09e57..f162c788 100644 --- a/src/scord/scord.cpp +++ b/src/scord/scord.cpp @@ -42,6 +42,7 @@ namespace fs = std::filesystem; namespace bpo = boost::program_options; +using namespace std::literals; void print_version(const std::string& progname) { @@ -178,8 +179,9 @@ main(int argc, char* argv[]) { } try { - scord::server daemon; + scord::network::server daemon(cfg); +#if 0 const auto rpc_registration_cb = [](auto&& ctx) { LOGGER_INFO(" * Registering RPCs handlers..."); @@ -327,8 +329,8 @@ main(int argc, char* argv[]) { // TODO: add internal RPCs for communication with scord-ctl }; +#endif - daemon.configure(cfg, rpc_registration_cb); return daemon.run(); } catch(const std::exception& ex) { fmt::print(stderr, -- GitLab From 8afe4ac1b047e4bdcacec84f3a688f739eec2946 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 6 Feb 2023 09:59:56 +0100 Subject: [PATCH 03/23] Disable old Margo code --- src/lib/admire.cpp | 30 ++++++++++ src/lib/detail/impl.cpp | 128 +++++++++++++++++++++++++++++++++++----- 2 files changed, 144 insertions(+), 14 deletions(-) diff --git a/src/lib/admire.cpp b/src/lib/admire.cpp index 84f54de7..7a94f2bc 100644 --- a/src/lib/admire.cpp +++ b/src/lib/admire.cpp @@ -68,6 +68,8 @@ init_logger() { } } + +#if 0 void rpc_registration_cb(scord::network::rpc_client* client) { @@ -192,6 +194,7 @@ rpc_registration_cb(scord::network::rpc_client* client) { REGISTER_RPC(client, "ADM_get_statistics", ADM_get_statistics_in_t, ADM_get_statistics_out_t, NULL, true); } +#endif } // namespace @@ -359,6 +362,7 @@ set_dataset_information(const server& srv, ADM_job_t job, ADM_dataset_t target, (void) target; (void) info; +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; auto endp = rpc_client.lookup(srv.address()); @@ -378,6 +382,7 @@ set_dataset_information(const server& srv, ADM_job_t job, ADM_dataset_t target, } LOGGER_INFO("ADM_set_dataset_information() = {}", ADM_SUCCESS); +#endif return ADM_SUCCESS; } @@ -389,6 +394,7 @@ set_io_resources(const server& srv, ADM_job_t job, ADM_adhoc_storage_t tier, (void) tier; (void) resources; +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; auto endp = rpc_client.lookup(srv.address()); @@ -408,6 +414,7 @@ set_io_resources(const server& srv, ADM_job_t job, ADM_adhoc_storage_t tier, } LOGGER_INFO("ADM_set_io_resources() = {}", ADM_SUCCESS); +#endif return ADM_SUCCESS; } @@ -419,6 +426,7 @@ get_transfer_priority(const server& srv, ADM_job_t job, ADM_transfer_t transfer, (void) transfer; (void) priority; +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; auto endp = rpc_client.lookup(srv.address()); @@ -436,6 +444,7 @@ get_transfer_priority(const server& srv, ADM_job_t job, ADM_transfer_t transfer, } LOGGER_INFO("ADM_get_transfer_priority() = {}", ADM_SUCCESS); +#endif return ADM_SUCCESS; } @@ -447,6 +456,7 @@ set_transfer_priority(const server& srv, ADM_job_t job, ADM_transfer_t transfer, (void) transfer; (void) incr; +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; auto endp = rpc_client.lookup(srv.address()); @@ -464,15 +474,18 @@ set_transfer_priority(const server& srv, ADM_job_t job, ADM_transfer_t transfer, } LOGGER_INFO("ADM_set_transfer_priority() = {}", ADM_SUCCESS); +#endif return ADM_SUCCESS; } ADM_return_t cancel_transfer(const server& srv, ADM_job_t job, ADM_transfer_t transfer) { + (void) srv; (void) job; (void) transfer; +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; auto endp = rpc_client.lookup(srv.address()); @@ -491,6 +504,7 @@ cancel_transfer(const server& srv, ADM_job_t job, ADM_transfer_t transfer) { } LOGGER_INFO("ADM_cancel_transfer() = {}", ADM_SUCCESS); +#endif return ADM_SUCCESS; } @@ -501,6 +515,7 @@ get_pending_transfers(const server& srv, ADM_job_t job, (void) job; (void) pending_transfers; +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; auto endp = rpc_client.lookup(srv.address()); @@ -519,6 +534,7 @@ get_pending_transfers(const server& srv, ADM_job_t job, } LOGGER_INFO("ADM_get_pending_transfers() = {}", ADM_SUCCESS); +#endif return ADM_SUCCESS; } @@ -530,6 +546,7 @@ set_qos_constraints(const server& srv, ADM_job_t job, ADM_qos_entity_t entity, (void) entity; (void) limit; +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; auto endp = rpc_client.lookup(srv.address()); @@ -552,6 +569,7 @@ set_qos_constraints(const server& srv, ADM_job_t job, ADM_qos_entity_t entity, } LOGGER_INFO("ADM_set_qos_constraints() = {}", ADM_SUCCESS); +#endif return ADM_SUCCESS; } @@ -563,6 +581,7 @@ get_qos_constraints(const server& srv, ADM_job_t job, ADM_qos_entity_t entity, (void) entity; (void) limits; +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; auto endp = rpc_client.lookup(srv.address()); @@ -583,6 +602,7 @@ get_qos_constraints(const server& srv, ADM_job_t job, ADM_qos_entity_t entity, } LOGGER_INFO("ADM_get_qos_constraints() = {}", ADM_SUCCESS); +#endif return ADM_SUCCESS; } @@ -595,6 +615,7 @@ define_data_operation(const server& srv, ADM_job_t job, const char* path, (void) op; (void) args; +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; auto endp = rpc_client.lookup(srv.address()); @@ -617,6 +638,7 @@ define_data_operation(const server& srv, ADM_job_t job, const char* path, } LOGGER_INFO("ADM_define_data_operation() = {}", ADM_SUCCESS); +#endif return ADM_SUCCESS; } @@ -630,6 +652,7 @@ connect_data_operation(const server& srv, ADM_job_t job, ADM_dataset_t input, (void) should_stream; (void) args; +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; auto endp = rpc_client.lookup(srv.address()); @@ -651,6 +674,7 @@ connect_data_operation(const server& srv, ADM_job_t job, ADM_dataset_t input, } LOGGER_INFO("ADM_connect_data_operation() = {}", ADM_SUCCESS); +#endif return ADM_SUCCESS; } @@ -663,6 +687,7 @@ finalize_data_operation(const server& srv, ADM_job_t job, (void) op; (void) status; +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; auto endp = rpc_client.lookup(srv.address()); @@ -681,6 +706,7 @@ finalize_data_operation(const server& srv, ADM_job_t job, } LOGGER_INFO("ADM_finalize_data_operation() = {}", ADM_SUCCESS); +#endif return ADM_SUCCESS; } @@ -696,6 +722,7 @@ link_transfer_to_data_operation(const server& srv, ADM_job_t job, (void) should_stream; (void) args; +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; auto endp = rpc_client.lookup(srv.address()); @@ -717,6 +744,7 @@ link_transfer_to_data_operation(const server& srv, ADM_job_t job, } LOGGER_INFO("ADM_link_transfer_to_data_operation() = {}", ADM_SUCCESS); +#endif return ADM_SUCCESS; } @@ -726,6 +754,7 @@ get_statistics(const server& srv, ADM_job_t job, ADM_job_stats_t** stats) { (void) job; (void) stats; +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; auto endp = rpc_client.lookup(srv.address()); @@ -744,6 +773,7 @@ get_statistics(const server& srv, ADM_job_t job, ADM_job_stats_t** stats) { } LOGGER_INFO("ADM_get_statistics() = {}", ADM_SUCCESS); +#endif return ADM_SUCCESS; } diff --git a/src/lib/detail/impl.cpp b/src/lib/detail/impl.cpp index 8017e7e0..2057c653 100644 --- a/src/lib/detail/impl.cpp +++ b/src/lib/detail/impl.cpp @@ -33,6 +33,7 @@ using namespace std::literals; +#if 0 void rpc_registration_cb(scord::network::rpc_client* client) { @@ -157,6 +158,7 @@ rpc_registration_cb(scord::network::rpc_client* client) { REGISTER_RPC(client, "ADM_get_statistics", ADM_get_statistics_in_t, ADM_get_statistics_out_t, NULL, true); } +#endif namespace api { @@ -175,33 +177,48 @@ namespace admire::detail { admire::error_code ping(const server& srv) { - scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; + scord::network::client rpc_client{srv.protocol()}; const auto rpc_id = ::api::remote_procedure::new_id(); - auto endp = rpc_client.lookup(srv.address()); + if(const auto lookup_rv = rpc_client.lookup(srv.address()); lookup_rv) { + const auto& endp = lookup_rv.value(); - LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{}}", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address())); + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{}}", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(rpc_client.self_address().value_or("unknown"))); - ADM_ping_out_t out; + if(const auto call_rv = endp.call("ADM_"s + __FUNCTION__); call_rv) { - const auto rpc = endp.call("ADM_ping", nullptr, &out); + const scord::network::generic_response resp{call_rv.value()}; - LOGGER_INFO("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc.origin()), admire::error_code{out.retval}, - out.op_id); - return admire::error_code::success; + LOGGER_INFO("rpc id: {} name: {} from: {} <= " + "body: {{retval: {}}} [op_id: {}]", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(endp.address()), resp.error_code(), + resp.op_id()); + + return admire::error_code::success; + } + } + + LOGGER_ERROR("rpc call failed"); + return admire::error_code::other; } tl::expected register_job(const server& srv, const job::resources& job_resources, const job_requirements& reqs, admire::slurm_job_id slurm_id) { + (void) srv; + (void) job_resources; + (void) reqs; + (void) slurm_id; + + return tl::make_unexpected(admire::error_code::snafu); + +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; const auto rpc_id = ::api::remote_procedure::new_id(); @@ -240,12 +257,20 @@ register_job(const server& srv, const job::resources& job_resources, out.op_id); return job; +#endif } admire::error_code update_job(const server& srv, const job& job, const job::resources& job_resources) { + (void) srv; + (void) job; + (void) job_resources; + + return admire::error_code::snafu; + +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; const auto rpc_id = ::api::remote_procedure::new_id(); @@ -278,11 +303,19 @@ update_job(const server& srv, const job& job, std::quoted(rpc.origin()), admire::error_code::success, out.op_id); return admire::error_code::success; +#endif } admire::error_code remove_job(const server& srv, const job& job) { + (void) srv; + (void) job; + + return admire::error_code::snafu; + +#if 0 + scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; const auto rpc_id = ::api::remote_procedure::new_id(); @@ -314,6 +347,7 @@ remove_job(const server& srv, const job& job) { std::quoted(rpc.origin()), admire::error_code::success, out.op_id); return admire::error_code::success; +#endif } tl::expected @@ -321,6 +355,14 @@ register_adhoc_storage(const server& srv, const std::string& name, enum adhoc_storage::type type, const adhoc_storage::ctx& ctx) { + (void) srv; + (void) name; + (void) type; + (void) ctx; + + return tl::make_unexpected(admire::error_code::snafu); + +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; const auto rpc_id = ::api::remote_procedure::new_id(); @@ -357,11 +399,18 @@ register_adhoc_storage(const server& srv, const std::string& name, admire::error_code::success, out.id, out.op_id); return rpc_adhoc_storage; +#endif } admire::error_code deploy_adhoc_storage(const server& srv, const adhoc_storage& adhoc_storage) { + (void) srv; + (void) adhoc_storage; + + return admire::error_code::snafu; + +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; const auto rpc_id = ::api::remote_procedure::new_id(); @@ -392,6 +441,7 @@ deploy_adhoc_storage(const server& srv, const adhoc_storage& adhoc_storage) { admire::error_code::success, out.op_id); return admire::error_code::success; +#endif } tl::expected @@ -401,6 +451,16 @@ transfer_datasets(const server& srv, const job& job, const std::vector& limits, transfer::mapping mapping) { + (void) srv; + (void) job; + (void) sources; + (void) targets; + (void) limits; + (void) mapping; + + return tl::make_unexpected(admire::error_code::snafu); + +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; const auto rpc_id = ::api::remote_procedure::new_id(); @@ -442,6 +502,7 @@ transfer_datasets(const server& srv, const job& job, std::quoted(rpc.origin()), admire::error_code::success, tx, out.op_id); return tx; +#endif } admire::error_code @@ -449,6 +510,13 @@ update_adhoc_storage(const server& srv, const adhoc_storage::ctx& adhoc_storage_ctx, const adhoc_storage& adhoc_storage) { + (void) srv; + (void) adhoc_storage_ctx; + (void) adhoc_storage; + + return admire::error_code::snafu; + +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; const auto rpc_id = ::api::remote_procedure::new_id(); @@ -481,11 +549,18 @@ update_adhoc_storage(const server& srv, out.op_id); return admire::error_code::success; +#endif } admire::error_code remove_adhoc_storage(const server& srv, const adhoc_storage& adhoc_storage) { + (void) srv; + (void) adhoc_storage; + + return admire::error_code::snafu; + +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; const auto rpc_id = ::api::remote_procedure::new_id(); @@ -515,12 +590,21 @@ remove_adhoc_storage(const server& srv, const adhoc_storage& adhoc_storage) { std::quoted(rpc.origin()), admire::error_code::success, out.op_id); return admire::error_code::success; +#endif } tl::expected register_pfs_storage(const server& srv, const std::string& name, enum pfs_storage::type type, const pfs_storage::ctx& ctx) { + (void) srv; + (void) name; + (void) type; + (void) ctx; + + return tl::make_unexpected(admire::error_code::snafu); + +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; const auto rpc_id = ::api::remote_procedure::new_id(); @@ -557,12 +641,20 @@ register_pfs_storage(const server& srv, const std::string& name, admire::error_code::success, out.id, out.op_id); return rpc_pfs_storage; +#endif } admire::error_code update_pfs_storage(const server& srv, const pfs_storage& pfs_storage, const admire::pfs_storage::ctx& pfs_storage_ctx) { + (void) srv; + (void) pfs_storage; + (void) pfs_storage_ctx; + + return admire::error_code::snafu; + +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; const auto rpc_id = ::api::remote_procedure::new_id(); @@ -595,11 +687,18 @@ update_pfs_storage(const server& srv, const pfs_storage& pfs_storage, out.op_id); return admire::error_code::success; +#endif } admire::error_code remove_pfs_storage(const server& srv, const pfs_storage& pfs_storage) { + (void) srv; + (void) pfs_storage; + + return admire::error_code::snafu; + +#if 0 scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; const auto rpc_id = ::api::remote_procedure::new_id(); @@ -630,6 +729,7 @@ remove_pfs_storage(const server& srv, const pfs_storage& pfs_storage) { out.op_id); return admire::error_code::success; +#endif } } // namespace admire::detail -- GitLab From 2b617b51bb4fca34036c23b5a50a6ca3e7339b5f Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 7 Feb 2023 10:30:08 +0100 Subject: [PATCH 04/23] logger: Add LOGGER_EVAL(expr, LEVEL1, LEVEL2) --- src/common/logger/logger.hpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/common/logger/logger.hpp b/src/common/logger/logger.hpp index 1dfa3d23..30503dfb 100644 --- a/src/common/logger/logger.hpp +++ b/src/common/logger/logger.hpp @@ -350,4 +350,14 @@ private: } \ } while(0); + +#define LOGGER_EVAL(expr, L1, L2, ...) \ + do { \ + if(expr) { \ + LOGGER_##L1(__VA_ARGS__); \ + } else { \ + LOGGER_##L2(__VA_ARGS__); \ + } \ + } while(0); + #endif /* SCORD_LOGGER_HPP */ -- GitLab From 178b2cdb3e4fbb18c97286f3b36ea07e60e99e72 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Sat, 4 Feb 2023 11:06:19 +0100 Subject: [PATCH 05/23] scord: Rewrite `ADM_ping` RPC --- src/common/api/admire_types.hpp | 6 +++ src/common/net/CMakeLists.txt | 2 +- src/common/net/request.hpp | 74 +++++++++++++++++++++++++++++++++ src/lib/admire.cpp | 1 - src/lib/detail/impl.cpp | 11 +++-- src/scord/rpc_handlers.cpp | 34 +++++++-------- src/scord/rpc_handlers.hpp | 9 ++-- src/scord/scord.cpp | 2 + 8 files changed, 112 insertions(+), 27 deletions(-) create mode 100644 src/common/net/request.hpp diff --git a/src/common/api/admire_types.hpp b/src/common/api/admire_types.hpp index 49eed8ec..ee3599c8 100644 --- a/src/common/api/admire_types.hpp +++ b/src/common/api/admire_types.hpp @@ -92,6 +92,12 @@ struct error_code { std::string_view message() const; + template + void + serialize(Archive&& ar) { + ar& m_value; + } + private: ADM_return_t m_value; }; diff --git a/src/common/net/CMakeLists.txt b/src/common/net/CMakeLists.txt index 3e987198..1b7f449d 100644 --- a/src/common/net/CMakeLists.txt +++ b/src/common/net/CMakeLists.txt @@ -35,7 +35,7 @@ set_property(TARGET _rpc_client PROPERTY POSITION_INDEPENDENT_CODE ON) add_library(_rpc_server STATIC) target_sources( _rpc_server - INTERFACE server.hpp + INTERFACE server.hpp request.hpp PRIVATE server.cpp ) diff --git a/src/common/net/request.hpp b/src/common/net/request.hpp new file mode 100644 index 00000000..32482543 --- /dev/null +++ b/src/common/net/request.hpp @@ -0,0 +1,74 @@ +/****************************************************************************** + * Copyright 2021-2023, Barcelona Supercomputing Center (BSC), Spain + * + * This software was partially supported by the EuroHPC-funded project ADMIRE + * (Project ID: 956748, https://www.admire-eurohpc.eu). + * + * This file is part of scord. + * + * scord is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * scord is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with scord. If not, see . + * + * SPDX-License-Identifier: GPL-3.0-or-later + *****************************************************************************/ + +#ifndef SCORD_NET_REQUEST_HPP +#define SCORD_NET_REQUEST_HPP + +#include +#include "admire_types.hpp" + +namespace scord::network { + +using request = thallium::request; + +template +inline std::string +get_address(Request&& req) { + return std::forward(req).get_endpoint(); +} + +class generic_response { + +public: + constexpr generic_response() noexcept = default; + constexpr generic_response(std::uint64_t op_id, + admire::error_code ec) noexcept + : m_op_id(op_id), m_error_code(ec) {} + + constexpr std::uint64_t + op_id() const noexcept { + return m_op_id; + } + + constexpr admire::error_code + error_code() const noexcept { + return m_error_code; + } + + template + constexpr void + serialize(Archive&& ar) { + ar& m_op_id; + ar& m_error_code; + } + +private: + std::uint64_t m_op_id; + admire::error_code m_error_code; +}; + + +} // namespace scord::network + +#endif // SCORD_NET_REQUEST_HPP diff --git a/src/lib/admire.cpp b/src/lib/admire.cpp index 7a94f2bc..e94384b9 100644 --- a/src/lib/admire.cpp +++ b/src/lib/admire.cpp @@ -23,7 +23,6 @@ *****************************************************************************/ #include -#include #include #include #include diff --git a/src/lib/detail/impl.cpp b/src/lib/detail/impl.cpp index 2057c653..b38d5400 100644 --- a/src/lib/detail/impl.cpp +++ b/src/lib/detail/impl.cpp @@ -181,7 +181,8 @@ ping(const server& srv) { const auto rpc_id = ::api::remote_procedure::new_id(); - if(const auto lookup_rv = rpc_client.lookup(srv.address()); lookup_rv) { + if(const auto lookup_rv = rpc_client.lookup(srv.address()); + lookup_rv.has_value()) { const auto& endp = lookup_rv.value(); LOGGER_INFO("rpc id: {} name: {} from: {} => " @@ -189,17 +190,19 @@ ping(const server& srv) { rpc_id, std::quoted("ADM_"s + __FUNCTION__), std::quoted(rpc_client.self_address().value_or("unknown"))); - if(const auto call_rv = endp.call("ADM_"s + __FUNCTION__); call_rv) { + if(const auto call_rv = endp.call("ADM_"s + __FUNCTION__); + call_rv.has_value()) { const scord::network::generic_response resp{call_rv.value()}; - LOGGER_INFO("rpc id: {} name: {} from: {} <= " + LOGGER_EVAL(resp.error_code(), INFO, ERROR, + "rpc id: {} name: {} from: {} <= " "body: {{retval: {}}} [op_id: {}]", rpc_id, std::quoted("ADM_"s + __FUNCTION__), std::quoted(endp.address()), resp.error_code(), resp.op_id()); - return admire::error_code::success; + return resp.error_code(); } } diff --git a/src/scord/rpc_handlers.cpp b/src/scord/rpc_handlers.cpp index 190af867..4c710432 100644 --- a/src/scord/rpc_handlers.cpp +++ b/src/scord/rpc_handlers.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include "rpc_handlers.hpp" #include "job_manager.hpp" #include "adhoc_storage_manager.hpp" @@ -36,6 +37,8 @@ #include #include +using namespace std::literals; + struct remote_procedure { static std::uint64_t new_id() { @@ -44,38 +47,33 @@ struct remote_procedure { } }; -static void -ADM_ping(hg_handle_t h) { - - using scord::network::utils::get_address; +namespace scord::network::handlers { - [[maybe_unused]] hg_return_t ret; +void +ping(const scord::network::request& req) { - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); + using scord::network::generic_response; + using scord::network::get_address; - const auto id = remote_procedure::new_id(); + const auto rpc_name = "ADM_"s + __FUNCTION__; + const auto rpc_id = remote_procedure::new_id(); LOGGER_INFO("rpc id: {} name: {} from: {} => " "body: {{}}", - id, std::quoted(__FUNCTION__), std::quoted(get_address(h))); + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req))); - ADM_ping_out_t out; - out.op_id = id; - out.retval = ADM_SUCCESS; + const auto resp = generic_response{rpc_id, admire::error_code::success}; LOGGER_INFO("rpc id: {} name: {} to: {} <= " "body: {{retval: {}}}", - id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), admire::error_code::success); - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); + req.respond(resp); } -DEFINE_MARGO_RPC_HANDLER(ADM_ping); +} // namespace scord::network::handlers + static void ADM_register_job(hg_handle_t h) { diff --git a/src/scord/rpc_handlers.hpp b/src/scord/rpc_handlers.hpp index 85dbc1d7..d4204e5f 100644 --- a/src/scord/rpc_handlers.hpp +++ b/src/scord/rpc_handlers.hpp @@ -26,6 +26,12 @@ #ifndef SCORD_RPC_HANDLERS_HPP #define SCORD_RPC_HANDLERS_HPP +namespace scord::network::handlers { + +void ping(const scord::network::request& req); + +} + #include #ifdef __cplusplus @@ -35,9 +41,6 @@ extern "C" { // FIXME: cannot be in a namespace due to Margo limitations // namespace scord::network::rpc { -/// ADM_ping -DECLARE_MARGO_RPC_HANDLER(ADM_ping); - /// ADM_register_job DECLARE_MARGO_RPC_HANDLER(ADM_register_job); diff --git a/src/scord/scord.cpp b/src/scord/scord.cpp index f162c788..342c57ea 100644 --- a/src/scord/scord.cpp +++ b/src/scord/scord.cpp @@ -181,6 +181,8 @@ main(int argc, char* argv[]) { try { scord::network::server daemon(cfg); + daemon.set_handler("ADM_ping"s, scord::network::handlers::ping); + #if 0 const auto rpc_registration_cb = [](auto&& ctx) { LOGGER_INFO(" * Registering RPCs handlers..."); -- GitLab From 222406846ff3d9daf2f9d2611cd5ee8ab7f1c8d7 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 6 Feb 2023 16:25:04 +0100 Subject: [PATCH 06/23] scord: Rewrite `ADM_register_adhoc_storage` RPC --- src/common/api/admire_types.hpp | 26 ++++++++ src/common/api/types.cpp | 42 +++++++++++++ src/common/net/CMakeLists.txt | 4 +- src/common/net/request.hpp | 31 ++++++++++ src/common/net/serialization.hpp | 45 ++++++++++++++ src/lib/detail/impl.cpp | 64 +++++++++---------- src/scord/rpc_handlers.cpp | 103 +++++++++++++------------------ src/scord/rpc_handlers.hpp | 7 ++- src/scord/scord.cpp | 3 + 9 files changed, 223 insertions(+), 102 deletions(-) create mode 100644 src/common/net/serialization.hpp diff --git a/src/common/api/admire_types.hpp b/src/common/api/admire_types.hpp index ee3599c8..ce7ed5ad 100644 --- a/src/common/api/admire_types.hpp +++ b/src/common/api/admire_types.hpp @@ -141,6 +141,7 @@ private: struct node { + node(); explicit node(std::string hostname); explicit node(const ADM_node_t& srv); node(const node&) noexcept; @@ -154,6 +155,12 @@ struct node { std::string hostname() const; + // The implementation for this must be deferred until + // after the declaration of the PIMPL class + template + void + serialize(Archive& ar); + private: class impl; std::unique_ptr m_pimpl; @@ -339,18 +346,27 @@ struct adhoc_storage { }; struct resources { + resources() = default; explicit resources(std::vector nodes); explicit resources(ADM_adhoc_resources_t res); std::vector nodes() const; + template + void + serialize(Archive&& ar) { + ar& m_nodes; + } + private: std::vector m_nodes; }; struct ctx { + ctx() = default; + ctx(execution_mode exec_mode, access_type access_type, adhoc_storage::resources resources, std::uint32_t walltime, bool should_flush); @@ -368,6 +384,16 @@ struct adhoc_storage { bool should_flush() const; + template + void + serialize(Archive&& ar) { + ar& m_exec_mode; + ar& m_access_type; + ar& m_resources; + ar& m_walltime; + ar& m_should_flush; + } + private: execution_mode m_exec_mode; enum access_type m_access_type; diff --git a/src/common/api/types.cpp b/src/common/api/types.cpp index 98074ca6..982169d9 100644 --- a/src/common/api/types.cpp +++ b/src/common/api/types.cpp @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -1117,6 +1118,7 @@ server::address() const { class node::impl { public: + impl() = default; explicit impl(std::string hostname) : m_hostname(std::move(hostname)) {} std::string @@ -1124,10 +1126,24 @@ public: return m_hostname; } + template + void + load(Archive& ar) { + ar(SCORD_SERIALIZATION_NVP(m_hostname)); + } + + template + void + save(Archive& ar) const { + ar(SCORD_SERIALIZATION_NVP(m_hostname)); + } + private: std::string m_hostname; }; +node::node() = default; + node::node(std::string hostname) : m_pimpl(std::make_unique(std::move(hostname))) {} @@ -1154,6 +1170,32 @@ node::hostname() const { return m_pimpl->hostname(); } +// since the PIMPL class is fully defined at this point, we can now +// define the serialization function +template +inline void +node::serialize(Archive& ar) { + ar(SCORD_SERIALIZATION_NVP(m_pimpl)); +} + +// we must also explicitly instantiate our template functions for +// serialization in the desired archives +template void +node::impl::save( + scord::network::serialization::output_archive&) const; + +template void +node::impl::load( + scord::network::serialization::input_archive&); + +template void +node::serialize( + scord::network::serialization::output_archive&); + +template void +node::serialize( + scord::network::serialization::input_archive&); + class job::impl { public: diff --git a/src/common/net/CMakeLists.txt b/src/common/net/CMakeLists.txt index 1b7f449d..c05d2713 100644 --- a/src/common/net/CMakeLists.txt +++ b/src/common/net/CMakeLists.txt @@ -25,7 +25,7 @@ add_library(_rpc_client STATIC) target_sources( _rpc_client - INTERFACE endpoint.hpp client.hpp request.hpp + INTERFACE endpoint.hpp client.hpp request.hpp serialization.hpp PRIVATE endpoint.cpp client.cpp ) @@ -35,7 +35,7 @@ set_property(TARGET _rpc_client PROPERTY POSITION_INDEPENDENT_CODE ON) add_library(_rpc_server STATIC) target_sources( _rpc_server - INTERFACE server.hpp request.hpp + INTERFACE server.hpp request.hpp serialization.hpp PRIVATE server.cpp ) diff --git a/src/common/net/request.hpp b/src/common/net/request.hpp index 32482543..5c36cb88 100644 --- a/src/common/net/request.hpp +++ b/src/common/net/request.hpp @@ -68,6 +68,37 @@ private: admire::error_code m_error_code; }; +template +class response_with_value : public generic_response { + +public: + constexpr response_with_value() noexcept = default; + + constexpr response_with_value(std::uint64_t op_id, admire::error_code ec, + std::optional value) noexcept + : generic_response(op_id, ec), m_value(std::move(value)) {} + + constexpr auto + value() const noexcept { + return m_value.value(); + } + + constexpr auto + has_value() const noexcept { + return m_value.has_value(); + } + + template + constexpr void + serialize(Archive&& ar) { + ar(cereal::base_class(this), m_value); + } + +private: + std::optional m_value; +}; + +using response_with_id = response_with_value; } // namespace scord::network diff --git a/src/common/net/serialization.hpp b/src/common/net/serialization.hpp new file mode 100644 index 00000000..c8c731d0 --- /dev/null +++ b/src/common/net/serialization.hpp @@ -0,0 +1,45 @@ +/****************************************************************************** + * Copyright 2021-2023, Barcelona Supercomputing Center (BSC), Spain + * + * This software was partially supported by the EuroHPC-funded project ADMIRE + * (Project ID: 956748, https://www.admire-eurohpc.eu). + * + * This file is part of scord. + * + * scord is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * scord is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with scord. If not, see . + * + * SPDX-License-Identifier: GPL-3.0-or-later + *****************************************************************************/ + +#ifndef SCORD_SERIALIZATION_HPP +#define SCORD_SERIALIZATION_HPP + +#include +#include +#include +#include +#include +#include +#include + +namespace scord::network::serialization { + +#define SCORD_SERIALIZATION_NVP CEREAL_NVP + +using input_archive = thallium::proc_input_archive<>; +using output_archive = thallium::proc_output_archive<>; + +} // namespace scord::network::serialization + +#endif // SCORD_SERIALIZATION_HPP diff --git a/src/lib/detail/impl.cpp b/src/lib/detail/impl.cpp index b38d5400..0faa60e4 100644 --- a/src/lib/detail/impl.cpp +++ b/src/lib/detail/impl.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -188,7 +189,7 @@ ping(const server& srv) { LOGGER_INFO("rpc id: {} name: {} from: {} => " "body: {{}}", rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address().value_or("unknown"))); + std::quoted(rpc_client.self_address())); if(const auto call_rv = endp.call("ADM_"s + __FUNCTION__); call_rv.has_value()) { @@ -358,51 +359,42 @@ register_adhoc_storage(const server& srv, const std::string& name, enum adhoc_storage::type type, const adhoc_storage::ctx& ctx) { - (void) srv; - (void) name; - (void) type; - (void) ctx; - - return tl::make_unexpected(admire::error_code::snafu); - -#if 0 - scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; + scord::network::client rpc_client{srv.protocol()}; const auto rpc_id = ::api::remote_procedure::new_id(); - auto endp = rpc_client.lookup(srv.address()); - LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{name: {}, type: {}, adhoc_ctx: {}}}", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address()), name, type, ctx); + if(const auto& lookup_rv = rpc_client.lookup(srv.address()); + lookup_rv.has_value()) { + const auto& endp = lookup_rv.value(); - const auto rpc_name = name.c_str(); - const auto rpc_type = static_cast(type); - const auto rpc_ctx = api::convert(ctx); + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{name: {}, type: {}, adhoc_ctx: {}}}", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(rpc_client.self_address()), name, type, ctx); - ADM_register_adhoc_storage_in_t in{rpc_name, rpc_type, rpc_ctx.get()}; - ADM_register_adhoc_storage_out_t out; + if(const auto& call_rv = + endp.call("ADM_"s + __FUNCTION__, name, type, ctx); + call_rv.has_value()) { - const auto rpc = endp.call("ADM_register_adhoc_storage", &in, &out); + const scord::network::response_with_id resp{call_rv.value()}; - if(const auto rv = admire::error_code{out.retval}; !rv) { - LOGGER_ERROR("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address()), rv, out.op_id); - return tl::make_unexpected(rv); - } + LOGGER_EVAL(resp.error_code(), INFO, ERROR, + "rpc id: {} name: {} from: {} <= " + "body: {{retval: {}, adhoc_id: {}}} [op_id: {}]", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(endp.address()), resp.error_code(), + resp.value(), resp.op_id()); - auto rpc_adhoc_storage = admire::adhoc_storage{type, name, out.id, ctx}; + if(const auto ec = resp.error_code(); !ec) { + return tl::make_unexpected(ec); + } - LOGGER_INFO("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}, id: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address()), - admire::error_code::success, out.id, out.op_id); + return admire::adhoc_storage{type, name, resp.value(), ctx}; + } + } - return rpc_adhoc_storage; -#endif + LOGGER_ERROR("rpc call failed"); + return tl::make_unexpected(admire::error_code::other); } admire::error_code diff --git a/src/scord/rpc_handlers.cpp b/src/scord/rpc_handlers.cpp index 4c710432..a2ee31c7 100644 --- a/src/scord/rpc_handlers.cpp +++ b/src/scord/rpc_handlers.cpp @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -72,6 +73,47 @@ ping(const scord::network::request& req) { req.respond(resp); } + +void +register_adhoc_storage(const request& req, const std::string& name, + enum admire::adhoc_storage::type type, + const admire::adhoc_storage::ctx& ctx) { + + using scord::network::get_address; + + const auto rpc_name = "ADM_"s + __FUNCTION__; + const auto rpc_id = remote_procedure::new_id(); + + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{name: {}, type: {}, adhoc_ctx: {}}}", + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + name, type, ctx); + + admire::error_code ec; + std::optional adhoc_id; + auto& adhoc_manager = scord::adhoc_storage_manager::instance(); + + if(const auto am_result = adhoc_manager.create(type, name, ctx); + am_result.has_value()) { + const auto& adhoc_storage_info = am_result.value(); + adhoc_id = adhoc_storage_info->adhoc_storage().id(); + } else { + LOGGER_ERROR("rpc id: {} error_msg: \"Error creating adhoc_storage: " + "{}\"", + rpc_id, am_result.error()); + ec = am_result.error(); + } + + const auto resp = response_with_id{rpc_id, ec, adhoc_id}; + + LOGGER_INFO("rpc id: {} name: {} to: {} <= " + "body: {{retval: {}, adhoc_id: {}}}", + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + ec, adhoc_id); + + req.respond(resp); +} + } // namespace scord::network::handlers @@ -263,67 +305,6 @@ ADM_remove_job(hg_handle_t h) { DEFINE_MARGO_RPC_HANDLER(ADM_remove_job); -static void -ADM_register_adhoc_storage(hg_handle_t h) { - - using scord::network::utils::get_address; - - [[maybe_unused]] hg_return_t ret; - - ADM_register_adhoc_storage_in_t in; - ADM_register_adhoc_storage_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - const std::string name(in.name); - const auto type = static_cast(in.type); - const admire::adhoc_storage::ctx ctx(in.ctx); - - const auto rpc_id = remote_procedure::new_id(); - LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{name: {}, type: {}, adhoc_ctx: {}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), - name, type, ctx); - - admire::error_code ec; - std::uint64_t out_adhoc_id = 0; - auto& adhoc_manager = scord::adhoc_storage_manager::instance(); - - if(const auto am_result = adhoc_manager.create(type, name, ctx); - am_result.has_value()) { - const auto& adhoc_storage_info = am_result.value(); - out_adhoc_id = adhoc_storage_info->adhoc_storage().id(); - } else { - LOGGER_ERROR("rpc id: {} error_msg: \"Error creating adhoc_storage: " - "{}\"", - rpc_id, am_result.error()); - ec = am_result.error(); - } - - out.op_id = rpc_id; - out.retval = ec; - out.id = out_adhoc_id; - - LOGGER_INFO("rpc id: {} name: {} to: {} => " - "body: {{retval: {}, id: {}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), - ec, out.id); - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_register_adhoc_storage); - static void ADM_update_adhoc_storage(hg_handle_t h) { diff --git a/src/scord/rpc_handlers.hpp b/src/scord/rpc_handlers.hpp index d4204e5f..d3ac5109 100644 --- a/src/scord/rpc_handlers.hpp +++ b/src/scord/rpc_handlers.hpp @@ -26,9 +26,13 @@ #ifndef SCORD_RPC_HANDLERS_HPP #define SCORD_RPC_HANDLERS_HPP +#include + namespace scord::network::handlers { void ping(const scord::network::request& req); +void register_adhoc_storage(const request& req, const std::string& name, +enum admire::adhoc_storage::type type, const admire::adhoc_storage::ctx& ctx); } @@ -50,9 +54,6 @@ DECLARE_MARGO_RPC_HANDLER(ADM_update_job); /// ADM_remove_job DECLARE_MARGO_RPC_HANDLER(ADM_remove_job); -/// ADM_register_adhoc_storage -DECLARE_MARGO_RPC_HANDLER(ADM_register_adhoc_storage); - /// ADM_update_adhoc_storage DECLARE_MARGO_RPC_HANDLER(ADM_update_adhoc_storage); diff --git a/src/scord/scord.cpp b/src/scord/scord.cpp index 342c57ea..84418288 100644 --- a/src/scord/scord.cpp +++ b/src/scord/scord.cpp @@ -33,6 +33,7 @@ #include #include +#include #include #include #include "rpc_handlers.hpp" @@ -182,6 +183,8 @@ main(int argc, char* argv[]) { scord::network::server daemon(cfg); daemon.set_handler("ADM_ping"s, scord::network::handlers::ping); + daemon.set_handler("ADM_register_adhoc_storage"s, + scord::network::handlers::register_adhoc_storage); #if 0 const auto rpc_registration_cb = [](auto&& ctx) { -- GitLab From 565d98921f907372b65b7cf9d2fe6bd6427e17c1 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Mon, 6 Feb 2023 12:27:08 +0100 Subject: [PATCH 07/23] scord: Rewrite `ADM_register_job` RPC --- src/common/api/admire_types.hpp | 36 +++++++ src/common/api/types.cpp | 169 +++++++++++++++++++++++++++++++- src/lib/detail/impl.cpp | 65 ++++++------ src/lib/detail/impl.hpp | 3 +- src/scord/rpc_handlers.cpp | 123 ++++++++++------------- src/scord/rpc_handlers.hpp | 10 +- src/scord/scord.cpp | 3 +- 7 files changed, 295 insertions(+), 114 deletions(-) diff --git a/src/common/api/admire_types.hpp b/src/common/api/admire_types.hpp index ce7ed5ad..395bb5e1 100644 --- a/src/common/api/admire_types.hpp +++ b/src/common/api/admire_types.hpp @@ -32,6 +32,7 @@ #include #include #include +#include #include "admire_types.h" namespace admire { @@ -171,16 +172,24 @@ struct job_requirements; struct job { struct resources { + resources(); explicit resources(std::vector nodes); explicit resources(ADM_job_resources_t res); std::vector nodes() const; + template + void + serialize(Archive&& ar) { + ar& m_nodes; + } + private: std::vector m_nodes; }; + job(); job(job_id id, slurm_job_id slurm_id); explicit job(ADM_job_t job); job(const job&) noexcept; @@ -200,6 +209,11 @@ struct job { private: class impl; std::unique_ptr m_pimpl; + + friend class cereal::access; + template + void + serialize(Archive& ar); }; struct transfer { @@ -305,6 +319,7 @@ private: struct dataset { + dataset(); explicit dataset(std::string id); explicit dataset(ADM_dataset_t dataset); dataset(const dataset&) noexcept; @@ -318,6 +333,12 @@ struct dataset { std::string id() const; + // The implementation for this must be deferred until + // after the declaration of the PIMPL class + template + void + serialize(Archive& ar); + private: class impl; std::unique_ptr m_pimpl; @@ -402,6 +423,7 @@ struct adhoc_storage { bool m_should_flush; }; + adhoc_storage(); adhoc_storage(enum adhoc_storage::type type, std::string name, std::uint64_t id, execution_mode exec_mode, access_type access_type, adhoc_storage::resources res, @@ -430,6 +452,12 @@ struct adhoc_storage { void update(admire::adhoc_storage::ctx new_ctx); + // The implementation for this must be deferred until + // after the declaration of the PIMPL class + template + void + serialize(Archive& ar); + private: class impl; std::unique_ptr m_pimpl; @@ -490,6 +518,8 @@ private: struct job_requirements { + job_requirements(); + job_requirements(std::vector inputs, std::vector outputs); @@ -515,6 +545,12 @@ struct job_requirements { std::optional adhoc_storage() const; + // The implementation for this must be deferred until + // after the declaration of the PIMPL class + template + void + serialize(Archive& ar); + private: class impl; std::unique_ptr m_pimpl; diff --git a/src/common/api/types.cpp b/src/common/api/types.cpp index 982169d9..6bc9624a 100644 --- a/src/common/api/types.cpp +++ b/src/common/api/types.cpp @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include "admire_types.hpp" #include "internal_types.hpp" @@ -1199,6 +1201,7 @@ node::serialize( class job::impl { public: + impl() {} impl(job_id id, slurm_job_id slurm_job_id) : m_id(id), m_slurm_job_id(slurm_job_id) {} impl(const impl& rhs) = default; @@ -1219,10 +1222,26 @@ public: } private: + friend class cereal::access; + + template + void + load(Archive& ar) { + ar(CEREAL_NVP(m_id)); + } + + template + void + save(Archive& ar) const { + ar(CEREAL_NVP(m_id)); + } + job_id m_id; slurm_job_id m_slurm_job_id; }; +job::resources::resources() = default; + job::resources::resources(std::vector nodes) : m_nodes(std::move(nodes)) {} @@ -1240,6 +1259,8 @@ job::resources::nodes() const { return m_nodes; } +job::job() = default; + job::job(job_id id, slurm_job_id slurm_job_id) : m_pimpl(std::make_unique(id, slurm_job_id)) {} @@ -1271,6 +1292,19 @@ job::slurm_id() const { return m_pimpl->slurm_id(); } +template +inline void +job::serialize(Archive& ar) { + ar(CEREAL_NVP(m_pimpl)); +} + +template void +job::serialize>(thallium::proc_input_archive<>&); +template void +job::serialize>( + thallium::proc_output_archive<>&); + + class transfer::impl { public: @@ -1321,8 +1355,8 @@ transfer::id() const { class dataset::impl { public: + impl() = default; explicit impl(std::string id) : m_id(std::move(id)) {} - impl(const impl& rhs) = default; impl(impl&& rhs) = default; impl& @@ -1336,10 +1370,24 @@ public: return m_id; } + template + void + load(Archive& ar) { + ar(SCORD_SERIALIZATION_NVP(m_id)); + } + + template + void + save(Archive& ar) const { + ar(SCORD_SERIALIZATION_NVP(m_id)); + } + private: std::string m_id; }; +dataset::dataset() = default; + dataset::dataset(std::string id) : m_pimpl(std::make_unique(std::move(id))) {} @@ -1366,6 +1414,32 @@ dataset::id() const { return m_pimpl->id(); } +// since the PIMPL class is fully defined at this point, we can now +// define the serialization function +template +inline void +dataset::serialize(Archive& ar) { + ar(SCORD_SERIALIZATION_NVP(m_pimpl)); +} + +// we must also explicitly instantiate our template functions for +// serialization in the desired archives +template void +dataset::impl::save( + scord::network::serialization::output_archive&) const; + +template void +dataset::impl::load( + scord::network::serialization::input_archive&); + +template void +dataset::serialize( + scord::network::serialization::output_archive&); + +template void +dataset::serialize( + scord::network::serialization::input_archive&); + adhoc_storage::resources::resources(std::vector nodes) : m_nodes(std::move(nodes)) {} @@ -1425,6 +1499,7 @@ adhoc_storage::ctx::should_flush() const { class adhoc_storage::impl { public: + impl() = default; explicit impl(enum adhoc_storage::type type, std::string name, std::uint64_t id, adhoc_storage::ctx ctx) : m_type(type), m_name(std::move(name)), m_id(id), @@ -1462,6 +1537,25 @@ public: m_ctx = std::move(new_ctx); } + template + void + load(Archive& ar) { + ar(SCORD_SERIALIZATION_NVP(m_type)); + ar(SCORD_SERIALIZATION_NVP(m_name)); + ar(SCORD_SERIALIZATION_NVP(m_id)); + ar(SCORD_SERIALIZATION_NVP(m_ctx)); + } + + template + void + save(Archive& ar) const { + ar(SCORD_SERIALIZATION_NVP(m_type)); + ar(SCORD_SERIALIZATION_NVP(m_name)); + ar(SCORD_SERIALIZATION_NVP(m_id)); + ar(SCORD_SERIALIZATION_NVP(m_ctx)); + } + + private: enum type m_type; std::string m_name; @@ -1469,6 +1563,8 @@ private: adhoc_storage::ctx m_ctx; }; +adhoc_storage::adhoc_storage() = default; + adhoc_storage::adhoc_storage(enum adhoc_storage::type type, std::string name, std::uint64_t id, execution_mode exec_mode, access_type access_type, @@ -1527,6 +1623,32 @@ adhoc_storage::update(admire::adhoc_storage::ctx new_ctx) { return m_pimpl->update(std::move(new_ctx)); } +// since the PIMPL class is fully defined at this point, we can now +// define the serialization function +template +inline void +adhoc_storage::serialize(Archive& ar) { + ar(SCORD_SERIALIZATION_NVP(m_pimpl)); +} + +// we must also explicitly instantiate our template functions for +// serialization in the desired archives +template void +adhoc_storage::impl::save( + scord::network::serialization::output_archive&) const; + +template void +adhoc_storage::impl::load( + scord::network::serialization::input_archive&); + +template void +adhoc_storage::serialize( + scord::network::serialization::output_archive&); + +template void +adhoc_storage::serialize( + scord::network::serialization::input_archive&); + adhoc_storage::~adhoc_storage() = default; pfs_storage::ctx::ctx(std::filesystem::path mount_point) @@ -1645,6 +1767,7 @@ pfs_storage::update(admire::pfs_storage::ctx new_ctx) { class job_requirements::impl { public: + impl() = default; impl(std::vector inputs, std::vector outputs) : m_inputs(std::move(inputs)), m_outputs(std::move(outputs)) {} @@ -1696,6 +1819,22 @@ public: return m_adhoc_storage; } + template + void + load(Archive& ar) { + ar(SCORD_SERIALIZATION_NVP(m_inputs)); + ar(SCORD_SERIALIZATION_NVP(m_outputs)); + ar(SCORD_SERIALIZATION_NVP(m_adhoc_storage)); + } + + template + void + save(Archive& ar) const { + ar(SCORD_SERIALIZATION_NVP(m_inputs)); + ar(SCORD_SERIALIZATION_NVP(m_outputs)); + ar(SCORD_SERIALIZATION_NVP(m_adhoc_storage)); + } + private: std::vector m_inputs; std::vector m_outputs; @@ -1703,6 +1842,8 @@ private: }; +job_requirements::job_requirements() = default; + job_requirements::job_requirements(std::vector inputs, std::vector outputs) : m_pimpl(std::make_unique(std::move(inputs), std::move(outputs))) {} @@ -1747,6 +1888,32 @@ job_requirements::adhoc_storage() const { return m_pimpl->adhoc_storage(); } +// since the PIMPL class is fully defined at this point, we can now +// define the serialization function +template +inline void +job_requirements::serialize(Archive& ar) { + ar(SCORD_SERIALIZATION_NVP(m_pimpl)); +} + +// we must also explicitly instantiate our template functions for +// serialization in the desired archives +template void +job_requirements::impl::save( + scord::network::serialization::output_archive&) const; + +template void +job_requirements::impl::load( + scord::network::serialization::input_archive&); + +template void +job_requirements::serialize( + scord::network::serialization::output_archive&); + +template void +job_requirements::serialize( + scord::network::serialization::input_archive&); + namespace qos { class entity::impl { diff --git a/src/lib/detail/impl.cpp b/src/lib/detail/impl.cpp index 0faa60e4..3df941b1 100644 --- a/src/lib/detail/impl.cpp +++ b/src/lib/detail/impl.cpp @@ -213,55 +213,48 @@ ping(const server& srv) { tl::expected register_job(const server& srv, const job::resources& job_resources, - const job_requirements& reqs, admire::slurm_job_id slurm_id) { + const job_requirements& job_requirements, + admire::slurm_job_id slurm_id) { - (void) srv; - (void) job_resources; - (void) reqs; - (void) slurm_id; - - return tl::make_unexpected(admire::error_code::snafu); - -#if 0 - scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; + scord::network::client rpc_client{srv.protocol()}; const auto rpc_id = ::api::remote_procedure::new_id(); - auto endp = rpc_client.lookup(srv.address()); - LOGGER_INFO("rpc id: {} name: {} from: {} => " + if(const auto lookup_rv = rpc_client.lookup(srv.address()); + lookup_rv.has_value()) { + const auto& endp = lookup_rv.value(); + + LOGGER_INFO( + "rpc id: {} name: {} from: {} => " "body: {{job_resources: {}, job_requirements: {}, slurm_id: " "{}}}", rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address()), job_resources, reqs, - slurm_id); - - auto rpc_job_resources = api::convert(job_resources); - auto rpc_reqs = api::convert(reqs); + std::quoted(rpc_client.self_address()), job_resources, + job_requirements, slurm_id); - ADM_register_job_in_t in{rpc_job_resources.get(), *rpc_reqs.get(), - slurm_id}; - ADM_register_job_out_t out; + if(const auto call_rv = endp.call("ADM_"s + __FUNCTION__, job_resources, + job_requirements, slurm_id); + call_rv.has_value()) { - const auto rpc = endp.call("ADM_register_job", &in, &out); + const scord::network::response_with_id resp{call_rv.value()}; - if(const auto rv = admire::error_code{out.retval}; !rv) { - LOGGER_ERROR("rpc id: {} name: {} from: {} <= " - "body: {} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc.origin()), rv, out.op_id); - return tl::make_unexpected(rv); - } + LOGGER_EVAL(resp.error_code(), INFO, ERROR, + "rpc id: {} name: {} from: {} <= " + "body: {{retval: {}, job_id: {}}} [op_id: {}]", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(endp.address()), resp.error_code(), + resp.value(), resp.op_id()); - const admire::job job = api::convert(out.job); + if(const auto ec = resp.error_code(); !ec) { + return tl::make_unexpected(resp.error_code()); + } - LOGGER_INFO("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}, job: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc.origin()), admire::error_code::success, job, - out.op_id); + return admire::job{resp.value(), slurm_id}; + } + } - return job; -#endif + LOGGER_ERROR("rpc call failed"); + return tl::make_unexpected(admire::error_code::other); } admire::error_code diff --git a/src/lib/detail/impl.hpp b/src/lib/detail/impl.hpp index 52aafcd0..9743c644 100644 --- a/src/lib/detail/impl.hpp +++ b/src/lib/detail/impl.hpp @@ -36,7 +36,8 @@ ping(const server& srv); tl::expected register_job(const server& srv, const job::resources& job_resources, - const job_requirements& reqs, admire::slurm_job_id slurm_id); + const job_requirements& job_requirements, + admire::slurm_job_id slurm_id); admire::error_code update_job(const server& srv, const job& job, diff --git a/src/scord/rpc_handlers.cpp b/src/scord/rpc_handlers.cpp index a2ee31c7..05b437d2 100644 --- a/src/scord/rpc_handlers.cpp +++ b/src/scord/rpc_handlers.cpp @@ -73,6 +73,57 @@ ping(const scord::network::request& req) { req.respond(resp); } +void +register_job(const scord::network::request& req, + const admire::job::resources& job_resources, + const admire::job_requirements& job_requirements, + admire::slurm_job_id slurm_id) { + + using scord::network::get_address; + + const auto rpc_name = "ADM_"s + __FUNCTION__; + const auto rpc_id = remote_procedure::new_id(); + + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{job_resources: {}, job_requirements: {}, slurm_id: " + "{}}}", + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + job_resources, job_requirements, slurm_id); + + admire::error_code ec; + std::optional job_id; + auto& jm = scord::job_manager::instance(); + + if(const auto jm_result = + jm.create(slurm_id, job_resources, job_requirements); + jm_result.has_value()) { + + const auto& job_info = jm_result.value(); + + // if the job requires an adhoc storage instance, inform the appropriate + // adhoc_storage instance (if registered) + if(job_requirements.adhoc_storage()) { + const auto adhoc_id = job_requirements.adhoc_storage()->id(); + auto& adhoc_manager = scord::adhoc_storage_manager::instance(); + ec = adhoc_manager.add_client_info(adhoc_id, job_info); + } + + job_id = job_info->job().id(); + } else { + LOGGER_ERROR("rpc id: {} error_msg: \"Error creating job: {}\"", rpc_id, + jm_result.error()); + ec = jm_result.error(); + } + + const auto resp = response_with_id{rpc_id, ec, job_id}; + + LOGGER_INFO("rpc id: {} name: {} to: {} <= " + "body: {{retval: {}, job_id: {}}}", + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + ec, job_id); + + req.respond(resp); +} void register_adhoc_storage(const request& req, const std::string& name, @@ -117,78 +168,6 @@ register_adhoc_storage(const request& req, const std::string& name, } // namespace scord::network::handlers -static void -ADM_register_job(hg_handle_t h) { - - using scord::network::utils::get_address; - - [[maybe_unused]] hg_return_t ret; - - ADM_register_job_in_t in; - ADM_register_job_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - const admire::job_requirements reqs(&in.reqs); - const admire::job::resources job_resources(in.job_resources); - const admire::slurm_job_id slurm_id = in.slurm_job_id; - - const auto rpc_id = remote_procedure::new_id(); - LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{job_resources: {}, job_requirements: {}, slurm_id: " - "{}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), - job_resources, reqs, slurm_id); - - admire::error_code ec = admire::error_code::success; - std::optional out_job; - auto& jm = scord::job_manager::instance(); - - if(const auto jm_result = jm.create(slurm_id, job_resources, reqs); - jm_result.has_value()) { - - const auto& job_info = jm_result.value(); - - // if the job requires an adhoc storage instance, inform the appropriate - // adhoc_storage instance (if registered) - if(reqs.adhoc_storage()) { - const auto adhoc_id = reqs.adhoc_storage()->id(); - auto& adhoc_manager = scord::adhoc_storage_manager::instance(); - ec = adhoc_manager.add_client_info(adhoc_id, job_info); - } - - out_job = job_info->job(); - } else { - LOGGER_ERROR("rpc id: {} error_msg: \"Error creating job: {}\"", rpc_id, - jm_result.error()); - ec = jm_result.error(); - } - - out.op_id = rpc_id; - out.retval = ec; - out.job = out_job ? admire::api::convert(*out_job).release() : nullptr; - - LOGGER_INFO("rpc id: {} name: {} to: {} <= " - "body: {{retval: {}, job: {}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), - ec, out_job); - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_register_job); - - static void ADM_update_job(hg_handle_t h) { diff --git a/src/scord/rpc_handlers.hpp b/src/scord/rpc_handlers.hpp index d3ac5109..2e936d23 100644 --- a/src/scord/rpc_handlers.hpp +++ b/src/scord/rpc_handlers.hpp @@ -26,6 +26,7 @@ #ifndef SCORD_RPC_HANDLERS_HPP #define SCORD_RPC_HANDLERS_HPP +#include #include namespace scord::network::handlers { @@ -34,6 +35,12 @@ void ping(const scord::network::request& req); void register_adhoc_storage(const request& req, const std::string& name, enum admire::adhoc_storage::type type, const admire::adhoc_storage::ctx& ctx); +void +register_job(const scord::network::request& req, + const admire::job::resources& job_resources, + const admire::job_requirements& job_requirements, + admire::slurm_job_id slurm_id); + } #include @@ -45,9 +52,6 @@ extern "C" { // FIXME: cannot be in a namespace due to Margo limitations // namespace scord::network::rpc { -/// ADM_register_job -DECLARE_MARGO_RPC_HANDLER(ADM_register_job); - /// ADM_update_job DECLARE_MARGO_RPC_HANDLER(ADM_update_job); diff --git a/src/scord/scord.cpp b/src/scord/scord.cpp index 84418288..6c94b387 100644 --- a/src/scord/scord.cpp +++ b/src/scord/scord.cpp @@ -33,7 +33,6 @@ #include #include -#include #include #include #include "rpc_handlers.hpp" @@ -185,6 +184,8 @@ main(int argc, char* argv[]) { daemon.set_handler("ADM_ping"s, scord::network::handlers::ping); daemon.set_handler("ADM_register_adhoc_storage"s, scord::network::handlers::register_adhoc_storage); + daemon.set_handler("ADM_register_job"s, + scord::network::handlers::register_job); #if 0 const auto rpc_registration_cb = [](auto&& ctx) { -- GitLab From 47bef9b1640a40d0c550b02899d5650e57acd530 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 7 Feb 2023 08:28:30 +0100 Subject: [PATCH 08/23] scord: Rewrite `ADM_update_adhoc_storage` RPC --- src/lib/detail/impl.cpp | 58 +++++++++++------------- src/lib/detail/impl.hpp | 3 +- src/scord/rpc_handlers.cpp | 91 ++++++++++++++++---------------------- src/scord/rpc_handlers.hpp | 18 ++++---- src/scord/scord.cpp | 2 + 5 files changed, 77 insertions(+), 95 deletions(-) diff --git a/src/lib/detail/impl.cpp b/src/lib/detail/impl.cpp index 3df941b1..9d9f769a 100644 --- a/src/lib/detail/impl.cpp +++ b/src/lib/detail/impl.cpp @@ -494,50 +494,42 @@ transfer_datasets(const server& srv, const job& job, } admire::error_code -update_adhoc_storage(const server& srv, - const adhoc_storage::ctx& adhoc_storage_ctx, +update_adhoc_storage(const server& srv, const adhoc_storage::ctx& new_ctx, const adhoc_storage& adhoc_storage) { - (void) srv; - (void) adhoc_storage_ctx; - (void) adhoc_storage; - - return admire::error_code::snafu; - -#if 0 - scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; + scord::network::client rpc_client{srv.protocol()}; const auto rpc_id = ::api::remote_procedure::new_id(); - auto endp = rpc_client.lookup(srv.address()); - LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{adhoc_storage_id: {}}}", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address()), adhoc_storage.id()); + if(const auto& lookup_rv = rpc_client.lookup(srv.address()); + lookup_rv.has_value()) { + const auto& endp = lookup_rv.value(); - const auto rpc_ctx = api::convert(adhoc_storage_ctx); + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{adhoc_id: {}, new_ctx: {}}}", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(rpc_client.self_address()), adhoc_storage.id(), + new_ctx); - ADM_update_adhoc_storage_in_t in{rpc_ctx.get(), adhoc_storage.id()}; - ADM_update_adhoc_storage_out_t out; + if(const auto& call_rv = endp.call("ADM_"s + __FUNCTION__, + adhoc_storage.id(), new_ctx); + call_rv.has_value()) { - const auto rpc = endp.call("ADM_update_adhoc_storage", &in, &out); + const scord::network::generic_response resp{call_rv.value()}; - if(const auto rv = admire::error_code{out.retval}; !rv) { - LOGGER_ERROR("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc.origin()), rv, out.op_id); - return rv; - } + LOGGER_EVAL(resp.error_code(), INFO, ERROR, + "rpc id: {} name: {} from: {} <= " + "body: {{retval: {}}} [op_id: {}]", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(endp.address()), resp.error_code(), + resp.op_id()); - LOGGER_INFO("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc.origin()), admire::error_code::success, - out.op_id); + return resp.error_code(); + } + } - return admire::error_code::success; -#endif + LOGGER_ERROR("rpc call failed"); + return admire::error_code::other; } admire::error_code diff --git a/src/lib/detail/impl.hpp b/src/lib/detail/impl.hpp index 9743c644..5e5819eb 100644 --- a/src/lib/detail/impl.hpp +++ b/src/lib/detail/impl.hpp @@ -59,8 +59,7 @@ register_adhoc_storage(const server& srv, const std::string& name, const adhoc_storage::ctx& ctx); admire::error_code -update_adhoc_storage(const server& srv, - const adhoc_storage::ctx& adhoc_storage_ctx, +update_adhoc_storage(const server& srv, const adhoc_storage::ctx& new_ctx, const adhoc_storage& adhoc_storage); admire::error_code diff --git a/src/scord/rpc_handlers.cpp b/src/scord/rpc_handlers.cpp index 05b437d2..7b040295 100644 --- a/src/scord/rpc_handlers.cpp +++ b/src/scord/rpc_handlers.cpp @@ -165,6 +165,45 @@ register_adhoc_storage(const request& req, const std::string& name, req.respond(resp); } +void +update_adhoc_storage(const request& req, std::uint64_t adhoc_id, + const admire::adhoc_storage::ctx& new_ctx) { + + using scord::network::get_address; + + const auto rpc_name = "ADM_"s + __FUNCTION__; + const auto rpc_id = remote_procedure::new_id(); + + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{adhoc_id: {}, new_ctx: {}}}", + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + adhoc_id, new_ctx); + + auto& adhoc_manager = scord::adhoc_storage_manager::instance(); + const auto ec = adhoc_manager.update(adhoc_id, new_ctx); + + if(!ec) { + LOGGER_ERROR( + "rpc id: {} error_msg: \"Error updating adhoc_storage: {}\"", + rpc_id, ec); + } + + const auto resp = generic_response{rpc_id, ec}; + + LOGGER_INFO("rpc id: {} name: {} to: {} <= " + "body: {{retval: {}}}", + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + ec); + + req.respond(resp); +} + "body: {{retval: {}}}", + rpc_id, std::quoted(__FUNCTION__), + std::quoted(get_address(req)), ec); + + req.respond(resp); +} + } // namespace scord::network::handlers @@ -284,58 +323,6 @@ ADM_remove_job(hg_handle_t h) { DEFINE_MARGO_RPC_HANDLER(ADM_remove_job); -static void -ADM_update_adhoc_storage(hg_handle_t h) { - - using scord::network::utils::get_address; - - [[maybe_unused]] hg_return_t ret; - - ADM_update_adhoc_storage_in_t in; - ADM_update_adhoc_storage_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - const admire::adhoc_storage::ctx adhoc_storage_ctx(in.adhoc_storage_ctx); - const std::uint64_t server_id(in.server_id); - - const auto rpc_id = remote_procedure::new_id(); - LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{adhoc_storage_id: {}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), - server_id); - - auto& adhoc_manager = scord::adhoc_storage_manager::instance(); - const auto ec = adhoc_manager.update(server_id, adhoc_storage_ctx); - - if(!ec) { - LOGGER_ERROR( - "rpc id: {} error_msg: \"Error updating adhoc_storage: {}\"", - rpc_id, ec); - } - - out.op_id = rpc_id; - out.retval = ec; - - LOGGER_INFO("rpc id: {} name: {} to: {} => " - "body: {{retval: {}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), - ec); - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_update_adhoc_storage); static void ADM_remove_adhoc_storage(hg_handle_t h) { diff --git a/src/scord/rpc_handlers.hpp b/src/scord/rpc_handlers.hpp index 2e936d23..49cb4371 100644 --- a/src/scord/rpc_handlers.hpp +++ b/src/scord/rpc_handlers.hpp @@ -22,7 +22,6 @@ * SPDX-License-Identifier: GPL-3.0-or-later *****************************************************************************/ -// clang-format off #ifndef SCORD_RPC_HANDLERS_HPP #define SCORD_RPC_HANDLERS_HPP @@ -31,9 +30,15 @@ namespace scord::network::handlers { -void ping(const scord::network::request& req); -void register_adhoc_storage(const request& req, const std::string& name, -enum admire::adhoc_storage::type type, const admire::adhoc_storage::ctx& ctx); +void +ping(const scord::network::request& req); +void +register_adhoc_storage(const request& req, const std::string& name, + enum admire::adhoc_storage::type type, + const admire::adhoc_storage::ctx& ctx); +void +update_adhoc_storage(const request& req, std::uint64_t adhoc_id, + const admire::adhoc_storage::ctx& new_ctx); void register_job(const scord::network::request& req, @@ -41,7 +46,7 @@ register_job(const scord::network::request& req, const admire::job_requirements& job_requirements, admire::slurm_job_id slurm_id); -} +} // namespace scord::network::handlers #include @@ -58,9 +63,6 @@ DECLARE_MARGO_RPC_HANDLER(ADM_update_job); /// ADM_remove_job DECLARE_MARGO_RPC_HANDLER(ADM_remove_job); -/// ADM_update_adhoc_storage -DECLARE_MARGO_RPC_HANDLER(ADM_update_adhoc_storage); - /// ADM_remove_adhoc_storage DECLARE_MARGO_RPC_HANDLER(ADM_remove_adhoc_storage); diff --git a/src/scord/scord.cpp b/src/scord/scord.cpp index 6c94b387..8d231d8e 100644 --- a/src/scord/scord.cpp +++ b/src/scord/scord.cpp @@ -184,6 +184,8 @@ main(int argc, char* argv[]) { daemon.set_handler("ADM_ping"s, scord::network::handlers::ping); daemon.set_handler("ADM_register_adhoc_storage"s, scord::network::handlers::register_adhoc_storage); + daemon.set_handler("ADM_update_adhoc_storage"s, + scord::network::handlers::update_adhoc_storage); daemon.set_handler("ADM_register_job"s, scord::network::handlers::register_job); -- GitLab From be8d7d1d030378387e4fcdb0361bda78a2b6ff3e Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 7 Feb 2023 08:58:14 +0100 Subject: [PATCH 09/23] scord: Rewrite `ADM_remove_adhoc_storage` RPC --- src/lib/detail/impl.cpp | 50 +++++++++++------------- src/scord/rpc_handlers.cpp | 80 +++++++++++++------------------------- src/scord/rpc_handlers.hpp | 6 +-- src/scord/scord.cpp | 2 + 4 files changed, 55 insertions(+), 83 deletions(-) diff --git a/src/lib/detail/impl.cpp b/src/lib/detail/impl.cpp index 9d9f769a..8bbae8a8 100644 --- a/src/lib/detail/impl.cpp +++ b/src/lib/detail/impl.cpp @@ -535,42 +535,38 @@ update_adhoc_storage(const server& srv, const adhoc_storage::ctx& new_ctx, admire::error_code remove_adhoc_storage(const server& srv, const adhoc_storage& adhoc_storage) { - (void) srv; - (void) adhoc_storage; + scord::network::client rpc_client{srv.protocol()}; - return admire::error_code::snafu; + const auto rpc_id = ::api::remote_procedure::new_id(); -#if 0 - scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; + if(const auto& lookup_rv = rpc_client.lookup(srv.address()); + lookup_rv.has_value()) { + const auto& endp = lookup_rv.value(); - const auto rpc_id = ::api::remote_procedure::new_id(); - auto endp = rpc_client.lookup(srv.address()); + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{adhoc_id: {}}}", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(rpc_client.self_address()), adhoc_storage.id()); - LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{adhoc_storage_id: {}}}", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address()), adhoc_storage.id()); + if(const auto& call_rv = + endp.call("ADM_"s + __FUNCTION__, adhoc_storage.id()); + call_rv.has_value()) { - ADM_remove_adhoc_storage_in_t in{adhoc_storage.id()}; - ADM_remove_adhoc_storage_out_t out; + const scord::network::generic_response resp{call_rv.value()}; - const auto rpc = endp.call("ADM_remove_adhoc_storage", &in, &out); + LOGGER_EVAL(resp.error_code(), INFO, ERROR, + "rpc id: {} name: {} from: {} <= " + "body: {{retval: {}}} [op_id: {}]", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(endp.address()), resp.error_code(), + resp.op_id()); - if(const auto rv = admire::error_code{out.retval}; !rv) { - LOGGER_ERROR("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc.origin()), rv, out.op_id); - return rv; + return resp.error_code(); + } } - LOGGER_INFO("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc.origin()), admire::error_code::success, - out.op_id); - return admire::error_code::success; -#endif + LOGGER_ERROR("rpc call failed"); + return admire::error_code::other; } tl::expected diff --git a/src/scord/rpc_handlers.cpp b/src/scord/rpc_handlers.cpp index 7b040295..013fd03f 100644 --- a/src/scord/rpc_handlers.cpp +++ b/src/scord/rpc_handlers.cpp @@ -197,9 +197,34 @@ update_adhoc_storage(const request& req, std::uint64_t adhoc_id, req.respond(resp); } + +void +remove_adhoc_storage(const request& req, std::uint64_t adhoc_id) { + + using scord::network::get_address; + + const auto rpc_name = "ADM_"s + __FUNCTION__; + const auto rpc_id = remote_procedure::new_id(); + + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{adhoc_id: {}}}", + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + adhoc_id); + + auto& adhoc_manager = scord::adhoc_storage_manager::instance(); + admire::error_code ec = adhoc_manager.remove(adhoc_id); + + if(!ec) { + LOGGER_ERROR("rpc id: {} error_msg: \"Error removing job: {}\"", rpc_id, + adhoc_id); + } + + const auto resp = generic_response{rpc_id, ec}; + + LOGGER_INFO("rpc id: {} name: {} to: {} <= " "body: {{retval: {}}}", - rpc_id, std::quoted(__FUNCTION__), - std::quoted(get_address(req)), ec); + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + ec); req.respond(resp); } @@ -323,57 +348,6 @@ ADM_remove_job(hg_handle_t h) { DEFINE_MARGO_RPC_HANDLER(ADM_remove_job); - -static void -ADM_remove_adhoc_storage(hg_handle_t h) { - - using scord::network::utils::get_address; - - [[maybe_unused]] hg_return_t ret; - - ADM_remove_adhoc_storage_in_t in; - ADM_remove_adhoc_storage_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - const auto rpc_id = remote_procedure::new_id(); - LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{adhoc_storage_id: {}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), - in.server_id); - - - auto& adhoc_manager = scord::adhoc_storage_manager::instance(); - admire::error_code ec = adhoc_manager.remove(in.server_id); - - if(!ec) { - LOGGER_ERROR("rpc id: {} error_msg: \"Error removing job: {}\"", rpc_id, - in.server_id); - } - - out.op_id = rpc_id; - out.retval = ec; - - LOGGER_INFO("rpc id: {} name: {} to: {} <= " - "body: {{retval: {}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), - ec); - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_remove_adhoc_storage); - static void ADM_deploy_adhoc_storage(hg_handle_t h) { diff --git a/src/scord/rpc_handlers.hpp b/src/scord/rpc_handlers.hpp index 49cb4371..129fcdbe 100644 --- a/src/scord/rpc_handlers.hpp +++ b/src/scord/rpc_handlers.hpp @@ -40,6 +40,9 @@ void update_adhoc_storage(const request& req, std::uint64_t adhoc_id, const admire::adhoc_storage::ctx& new_ctx); +void +remove_adhoc_storage(const request& req, std::uint64_t adhoc_id); + void register_job(const scord::network::request& req, const admire::job::resources& job_resources, @@ -63,9 +66,6 @@ DECLARE_MARGO_RPC_HANDLER(ADM_update_job); /// ADM_remove_job DECLARE_MARGO_RPC_HANDLER(ADM_remove_job); -/// ADM_remove_adhoc_storage -DECLARE_MARGO_RPC_HANDLER(ADM_remove_adhoc_storage); - /// ADM_deploy_adhoc_storage DECLARE_MARGO_RPC_HANDLER(ADM_deploy_adhoc_storage); diff --git a/src/scord/scord.cpp b/src/scord/scord.cpp index 8d231d8e..942f9e2d 100644 --- a/src/scord/scord.cpp +++ b/src/scord/scord.cpp @@ -186,6 +186,8 @@ main(int argc, char* argv[]) { scord::network::handlers::register_adhoc_storage); daemon.set_handler("ADM_update_adhoc_storage"s, scord::network::handlers::update_adhoc_storage); + daemon.set_handler("ADM_remove_adhoc_storage"s, + scord::network::handlers::remove_adhoc_storage); daemon.set_handler("ADM_register_job"s, scord::network::handlers::register_job); -- GitLab From 44beab2ad1adef15d4c88f81ce3627c78f8c9d33 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 7 Feb 2023 09:30:53 +0100 Subject: [PATCH 10/23] scord: Rewrite `ADM_deploy_adhoc_storage` RPC --- src/lib/detail/impl.cpp | 51 ++++----- src/scord/rpc_handlers.cpp | 219 +++++++++++++++++-------------------- src/scord/rpc_handlers.hpp | 6 +- src/scord/scord.cpp | 2 + 4 files changed, 128 insertions(+), 150 deletions(-) diff --git a/src/lib/detail/impl.cpp b/src/lib/detail/impl.cpp index 8bbae8a8..f99c79d2 100644 --- a/src/lib/detail/impl.cpp +++ b/src/lib/detail/impl.cpp @@ -393,43 +393,38 @@ register_adhoc_storage(const server& srv, const std::string& name, admire::error_code deploy_adhoc_storage(const server& srv, const adhoc_storage& adhoc_storage) { - (void) srv; - (void) adhoc_storage; + scord::network::client rpc_client{srv.protocol()}; - return admire::error_code::snafu; + const auto rpc_id = ::api::remote_procedure::new_id(); -#if 0 - scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; + if(const auto& lookup_rv = rpc_client.lookup(srv.address()); + lookup_rv.has_value()) { + const auto& endp = lookup_rv.value(); - const auto rpc_id = ::api::remote_procedure::new_id(); - auto endp = rpc_client.lookup(srv.address()); + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{adhoc_id: {}}}", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(rpc_client.self_address()), adhoc_storage.id()); - LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{adhoc_id: {}}}", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address()), adhoc_storage.id()); + if(const auto& call_rv = + endp.call("ADM_"s + __FUNCTION__, adhoc_storage.id()); + call_rv.has_value()) { - ADM_deploy_adhoc_storage_in_t in{adhoc_storage.id()}; - ADM_deploy_adhoc_storage_out_t out; + const scord::network::generic_response resp{call_rv.value()}; - const auto rpc = endp.call("ADM_deploy_adhoc_storage", &in, &out); + LOGGER_EVAL(resp.error_code(), INFO, ERROR, + "rpc id: {} name: {} from: {} <= " + "body: {{retval: {}}} [op_id: {}]", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(endp.address()), resp.error_code(), + resp.op_id()); - if(const auto rv = admire::error_code{out.retval}; !rv) { - LOGGER_ERROR("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address()), rv, out.op_id); - return rv; + return resp.error_code(); + } } - LOGGER_INFO("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address()), - admire::error_code::success, out.op_id); - - return admire::error_code::success; -#endif + LOGGER_ERROR("rpc call failed"); + return admire::error_code::other; } tl::expected diff --git a/src/scord/rpc_handlers.cpp b/src/scord/rpc_handlers.cpp index 013fd03f..3b77df07 100644 --- a/src/scord/rpc_handlers.cpp +++ b/src/scord/rpc_handlers.cpp @@ -229,6 +229,106 @@ remove_adhoc_storage(const request& req, std::uint64_t adhoc_id) { req.respond(resp); } +void +deploy_adhoc_storage(const request& req, std::uint64_t adhoc_id) { + + using scord::network::get_address; + + const auto rpc_name = "ADM_"s + __FUNCTION__; + const auto rpc_id = remote_procedure::new_id(); + + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{adhoc_id: {}}}", + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + adhoc_id); + + auto ec = admire::error_code::success; + auto& adhoc_manager = scord::adhoc_storage_manager::instance(); + + if(const auto am_result = adhoc_manager.find(adhoc_id); + am_result.has_value()) { + const auto& storage_info = am_result.value(); + const auto adhoc_storage = storage_info->adhoc_storage(); + + if(adhoc_storage.type() == admire::adhoc_storage::type::gekkofs) { + const auto adhoc_ctx = adhoc_storage.context(); + /* Number of nodes */ + const std::string nodes = + std::to_string(adhoc_ctx.resources().nodes().size()); + + /* Walltime */ + const std::string walltime = std::to_string(adhoc_ctx.walltime()); + + /* Launch script */ + switch(const auto pid = fork()) { + case 0: { + std::vector args; + args.push_back("gkfs"); + // args.push_back("-c"); + // args.push_back("gkfs.conf"); + args.push_back("-n"); + args.push_back(nodes.c_str()); + // args.push_back("-w"); + // args.push_back(walltime.c_str()); + args.push_back("--srun"); + args.push_back("start"); + args.push_back(NULL); + std::vector env; + env.push_back(NULL); + + execvpe("gkfs", const_cast(args.data()), + const_cast(env.data())); + LOGGER_INFO( + "ADM_deploy_adhoc_storage() script didn't execute"); + exit(EXIT_FAILURE); + break; + } + case -1: { + ec = admire::error_code::other; + LOGGER_ERROR("rpc id: {} name: {} to: {} <= " + "body: {{retval: {}}}", + rpc_id, std::quoted(rpc_name), + std::quoted(get_address(req)), ec); + break; + } + default: { + int wstatus = 0; + pid_t retwait = waitpid(pid, &wstatus, 0); + if(retwait == -1) { + LOGGER_ERROR( + "rpc id: {} error_msg: \"Error waitpid code: {}\"", + rpc_id, retwait); + ec = admire::error_code::other; + } else { + if(WEXITSTATUS(wstatus) != 0) { + ec = admire::error_code::other; + } else { + ec = admire::error_code::success; + } + } + break; + } + } + } + + } else { + ec = am_result.error(); + LOGGER_ERROR("rpc id: {} name: {} to: {} <= " + "body: {{retval: {}}}", + rpc_id, std::quoted(rpc_name), + std::quoted(get_address(req)), ec); + } + + const auto resp = generic_response{rpc_id, ec}; + + LOGGER_INFO("rpc id: {} name: {} to: {} <= " + "body: {{retval: {}}}", + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + ec); + + req.respond(resp); +} + } // namespace scord::network::handlers @@ -348,125 +448,6 @@ ADM_remove_job(hg_handle_t h) { DEFINE_MARGO_RPC_HANDLER(ADM_remove_job); -static void -ADM_deploy_adhoc_storage(hg_handle_t h) { - - using scord::network::utils::get_address; - - [[maybe_unused]] hg_return_t ret; - - ADM_deploy_adhoc_storage_in_t in; - ADM_deploy_adhoc_storage_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - - const auto rpc_id = remote_procedure::new_id(); - LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{adhoc_id: {}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), - in.id); - - auto ec = admire::error_code::success; - auto& adhoc_manager = scord::adhoc_storage_manager::instance(); - - if(const auto am_result = adhoc_manager.find(in.id); - am_result.has_value()) { - const auto& storage_info = am_result.value(); - const auto adhoc_storage = storage_info->adhoc_storage(); - - if(adhoc_storage.type() == admire::adhoc_storage::type::gekkofs) { - const auto adhoc_ctx = adhoc_storage.context(); - /* Number of nodes */ - const std::string nodes = - std::to_string(adhoc_ctx.resources().nodes().size()); - - /* Walltime */ - const std::string walltime = std::to_string(adhoc_ctx.walltime()); - - /* Launch script */ - switch(const auto pid = fork()) { - case 0: { - std::vector args; - args.push_back("gkfs"); - // args.push_back("-c"); - // args.push_back("gkfs.conf"); - args.push_back("-n"); - args.push_back(nodes.c_str()); - // args.push_back("-w"); - // args.push_back(walltime.c_str()); - args.push_back("--srun"); - args.push_back("start"); - args.push_back(NULL); - std::vector env; - env.push_back(NULL); - - execvpe("gkfs", const_cast(args.data()), - const_cast(env.data())); - LOGGER_INFO( - "ADM_deploy_adhoc_storage() script didn't execute"); - exit(EXIT_FAILURE); - break; - } - case -1: { - ec = admire::error_code::other; - LOGGER_ERROR("rpc id: {} name: {} to: {} <= " - "body: {{retval: {}}}", - rpc_id, std::quoted(__FUNCTION__), - std::quoted(get_address(h)), ec); - break; - } - default: { - int wstatus = 0; - pid_t retwait = waitpid(pid, &wstatus, 0); - if(retwait == -1) { - LOGGER_ERROR( - "rpc id: {} error_msg: \"Error waitpid code: {}\"", - rpc_id, retwait); - ec = admire::error_code::other; - } else { - if(WEXITSTATUS(wstatus) != 0) { - ec = admire::error_code::other; - } else { - ec = admire::error_code::success; - } - } - break; - } - } - } - - } else { - ec = am_result.error(); - LOGGER_ERROR("rpc id: {} name: {} to: {} <= " - "body: {{retval: {}}}", - rpc_id, std::quoted(__FUNCTION__), - std::quoted(get_address(h)), ec); - } - - out.op_id = rpc_id; - out.retval = ec; - - LOGGER_INFO("rpc id: {} name: {} to: {} <= " - "body: {{retval: {}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), - ec); - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_deploy_adhoc_storage); - static void ADM_register_pfs_storage(hg_handle_t h) { diff --git a/src/scord/rpc_handlers.hpp b/src/scord/rpc_handlers.hpp index 129fcdbe..754bac54 100644 --- a/src/scord/rpc_handlers.hpp +++ b/src/scord/rpc_handlers.hpp @@ -43,6 +43,9 @@ update_adhoc_storage(const request& req, std::uint64_t adhoc_id, void remove_adhoc_storage(const request& req, std::uint64_t adhoc_id); +void +deploy_adhoc_storage(const request& req, std::uint64_t adhoc_id); + void register_job(const scord::network::request& req, const admire::job::resources& job_resources, @@ -66,9 +69,6 @@ DECLARE_MARGO_RPC_HANDLER(ADM_update_job); /// ADM_remove_job DECLARE_MARGO_RPC_HANDLER(ADM_remove_job); -/// ADM_deploy_adhoc_storage -DECLARE_MARGO_RPC_HANDLER(ADM_deploy_adhoc_storage); - /// ADM_register_pfs_storage DECLARE_MARGO_RPC_HANDLER(ADM_register_pfs_storage); diff --git a/src/scord/scord.cpp b/src/scord/scord.cpp index 942f9e2d..65f5ae8f 100644 --- a/src/scord/scord.cpp +++ b/src/scord/scord.cpp @@ -188,6 +188,8 @@ main(int argc, char* argv[]) { scord::network::handlers::update_adhoc_storage); daemon.set_handler("ADM_remove_adhoc_storage"s, scord::network::handlers::remove_adhoc_storage); + daemon.set_handler("ADM_deploy_adhoc_storage"s, + scord::network::handlers::deploy_adhoc_storage); daemon.set_handler("ADM_register_job"s, scord::network::handlers::register_job); -- GitLab From 77f1203daa1437ec6f990b8ee761d5eced3afa7a Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 7 Feb 2023 09:40:35 +0100 Subject: [PATCH 11/23] scord: Rewrite `ADM_update_job` RPC --- src/lib/detail/impl.cpp | 56 +++++++++++-------------- src/lib/detail/impl.hpp | 2 +- src/scord/rpc_handlers.cpp | 85 ++++++++++++++------------------------ src/scord/rpc_handlers.hpp | 7 ++-- src/scord/scord.cpp | 2 + 5 files changed, 63 insertions(+), 89 deletions(-) diff --git a/src/lib/detail/impl.cpp b/src/lib/detail/impl.cpp index f99c79d2..e129110f 100644 --- a/src/lib/detail/impl.cpp +++ b/src/lib/detail/impl.cpp @@ -259,48 +259,40 @@ register_job(const server& srv, const job::resources& job_resources, admire::error_code update_job(const server& srv, const job& job, - const job::resources& job_resources) { + const job::resources& new_resources) { - (void) srv; - (void) job; - (void) job_resources; - - return admire::error_code::snafu; - -#if 0 - scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; + scord::network::client rpc_client{srv.protocol()}; const auto rpc_id = ::api::remote_procedure::new_id(); - auto endp = rpc_client.lookup(srv.address()); - LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{job: {}, job_resources: {}}}", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address()), job, job_resources); + if(const auto& lookup_rv = rpc_client.lookup(srv.address()); + lookup_rv.has_value()) { + const auto& endp = lookup_rv.value(); - const auto rpc_job = api::convert(job); - const auto rpc_job_resources = api::convert(job_resources); + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{job_id: {}, new_resources: {}}}", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(rpc_client.self_address()), job.id(), + new_resources); - ADM_update_job_in_t in{rpc_job.get(), rpc_job_resources.get()}; - ADM_update_job_out_t out; + if(const auto& call_rv = + endp.call("ADM_"s + __FUNCTION__, job.id(), new_resources); + call_rv.has_value()) { - const auto rpc = endp.call("ADM_update_job", &in, &out); + const scord::network::generic_response resp{call_rv.value()}; - if(const auto rv = admire::error_code{out.retval}; !rv) { - LOGGER_ERROR("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc.origin()), rv, out.op_id); - return rv; + LOGGER_EVAL(resp.error_code(), INFO, ERROR, + "rpc id: {} name: {} from: {} <= " + "body: {{retval: {}}} [op_id: {}]", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(endp.address()), resp.error_code(), + resp.op_id()); + return resp.error_code(); + } } - LOGGER_INFO("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc.origin()), admire::error_code::success, - out.op_id); - return admire::error_code::success; -#endif + LOGGER_ERROR("rpc call failed"); + return admire::error_code::other; } admire::error_code diff --git a/src/lib/detail/impl.hpp b/src/lib/detail/impl.hpp index 5e5819eb..59a2daf4 100644 --- a/src/lib/detail/impl.hpp +++ b/src/lib/detail/impl.hpp @@ -41,7 +41,7 @@ register_job(const server& srv, const job::resources& job_resources, admire::error_code update_job(const server& srv, const job& job, - const job::resources& job_resources); + const job::resources& new_resources); admire::error_code remove_job(const server& srv, const job& job); diff --git a/src/scord/rpc_handlers.cpp b/src/scord/rpc_handlers.cpp index 3b77df07..9ed3cc55 100644 --- a/src/scord/rpc_handlers.cpp +++ b/src/scord/rpc_handlers.cpp @@ -125,6 +125,38 @@ register_job(const scord::network::request& req, req.respond(resp); } +void +update_job(const request& req, admire::job_id job_id, + const admire::job::resources& new_resources) { + + using scord::network::get_address; + + const auto rpc_name = "ADM_"s + __FUNCTION__; + const auto rpc_id = remote_procedure::new_id(); + + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{job_id: {}, new_resources: {}}}", + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + job_id, new_resources); + + auto& jm = scord::job_manager::instance(); + const auto ec = jm.update(job_id, new_resources); + + if(!ec) { + LOGGER_ERROR("rpc id: {} error_msg: \"Error updating job: {}\"", rpc_id, + ec); + } + + const auto resp = generic_response{rpc_id, ec}; + + LOGGER_INFO("rpc id: {} name: {} to: {} <= " + "body: {{retval: {}}}", + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + ec); + + req.respond(resp); +} + void register_adhoc_storage(const request& req, const std::string& name, enum admire::adhoc_storage::type type, @@ -332,59 +364,6 @@ deploy_adhoc_storage(const request& req, std::uint64_t adhoc_id) { } // namespace scord::network::handlers -static void -ADM_update_job(hg_handle_t h) { - - using scord::network::utils::get_address; - - [[maybe_unused]] hg_return_t ret; - - ADM_update_job_in_t in; - ADM_update_job_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - const admire::job job(in.job); - const admire::job::resources job_resources(in.job_resources); - - const auto rpc_id = remote_procedure::new_id(); - LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{job: {}, job_resources: {}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), - job, job_resources); - - auto& jm = scord::job_manager::instance(); - const auto ec = jm.update(job.id(), job_resources); - - if(ec != ADM_SUCCESS) { - LOGGER_ERROR("rpc id: {} error_msg: \"Error updating job: {}\"", rpc_id, - ec); - } - - out.op_id = rpc_id; - out.retval = ec; - - LOGGER_INFO("rpc id: {} name: {} to: {} <= " - "body: {{retval: {}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), - ec); - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_update_job); - - static void ADM_remove_job(hg_handle_t h) { diff --git a/src/scord/rpc_handlers.hpp b/src/scord/rpc_handlers.hpp index 754bac54..ee536e2a 100644 --- a/src/scord/rpc_handlers.hpp +++ b/src/scord/rpc_handlers.hpp @@ -52,6 +52,10 @@ register_job(const scord::network::request& req, const admire::job_requirements& job_requirements, admire::slurm_job_id slurm_id); +void +update_job(const request& req, admire::job_id job_id, + const admire::job::resources& new_resources); + } // namespace scord::network::handlers #include @@ -63,9 +67,6 @@ extern "C" { // FIXME: cannot be in a namespace due to Margo limitations // namespace scord::network::rpc { -/// ADM_update_job -DECLARE_MARGO_RPC_HANDLER(ADM_update_job); - /// ADM_remove_job DECLARE_MARGO_RPC_HANDLER(ADM_remove_job); diff --git a/src/scord/scord.cpp b/src/scord/scord.cpp index 65f5ae8f..0d6dd76b 100644 --- a/src/scord/scord.cpp +++ b/src/scord/scord.cpp @@ -192,6 +192,8 @@ main(int argc, char* argv[]) { scord::network::handlers::deploy_adhoc_storage); daemon.set_handler("ADM_register_job"s, scord::network::handlers::register_job); + daemon.set_handler("ADM_update_job"s, + scord::network::handlers::update_job); #if 0 const auto rpc_registration_cb = [](auto&& ctx) { -- GitLab From a99302fdf5e5e581025d317561b0b8a33551635f Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 7 Feb 2023 11:11:48 +0100 Subject: [PATCH 12/23] scord: Rewrite `ADM_remove_job` RPC --- src/lib/detail/impl.cpp | 51 ++++++++---------- src/scord/rpc_handlers.cpp | 106 +++++++++++++++---------------------- src/scord/rpc_handlers.hpp | 6 +-- src/scord/scord.cpp | 2 + 4 files changed, 69 insertions(+), 96 deletions(-) diff --git a/src/lib/detail/impl.cpp b/src/lib/detail/impl.cpp index e129110f..be943788 100644 --- a/src/lib/detail/impl.cpp +++ b/src/lib/detail/impl.cpp @@ -298,45 +298,36 @@ update_job(const server& srv, const job& job, admire::error_code remove_job(const server& srv, const job& job) { - (void) srv; - (void) job; - - return admire::error_code::snafu; - -#if 0 - - scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; + scord::network::client rpc_client{srv.protocol()}; const auto rpc_id = ::api::remote_procedure::new_id(); - auto endp = rpc_client.lookup(srv.address()); - LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{job: {}}}", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address()), job); + if(const auto& lookup_rv = rpc_client.lookup(srv.address()); + lookup_rv.has_value()) { + const auto& endp = lookup_rv.value(); - const auto rpc_job = api::convert(job); + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{job_id: {}}}", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(rpc_client.self_address()), job.id()); - ADM_remove_job_in_t in{rpc_job.get()}; - ADM_remove_job_out_t out; + if(const auto& call_rv = endp.call("ADM_"s + __FUNCTION__, job.id()); + call_rv.has_value()) { - const auto rpc = endp.call("ADM_remove_job", &in, &out); + const scord::network::generic_response resp{call_rv.value()}; - if(const auto rv = admire::error_code{out.retval}; !rv) { - LOGGER_ERROR("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc.origin()), rv, out.op_id); - return rv; + LOGGER_EVAL(resp.error_code(), INFO, ERROR, + "rpc id: {} name: {} from: {} <= " + "body: {{retval: {}}} [op_id: {}]", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(endp.address()), resp.error_code(), + resp.op_id()); + return resp.error_code(); + } } - LOGGER_INFO("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc.origin()), admire::error_code::success, - out.op_id); - return admire::error_code::success; -#endif + LOGGER_ERROR("rpc call failed"); + return admire::error_code::other; } tl::expected diff --git a/src/scord/rpc_handlers.cpp b/src/scord/rpc_handlers.cpp index 9ed3cc55..ddd11634 100644 --- a/src/scord/rpc_handlers.cpp +++ b/src/scord/rpc_handlers.cpp @@ -157,6 +157,49 @@ update_job(const request& req, admire::job_id job_id, req.respond(resp); } +void +remove_job(const request& req, admire::job_id job_id) { + + using scord::network::get_address; + + const auto rpc_name = "ADM_"s + __FUNCTION__; + const auto rpc_id = remote_procedure::new_id(); + + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{job_id: {}}}", + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + job_id); + + admire::error_code ec; + auto& jm = scord::job_manager::instance(); + const auto jm_result = jm.remove(job_id); + + if(jm_result) { + // if the job was using an adhoc storage instance, inform the + // appropriate adhoc_storage that the job is no longer its client + const auto& job_info = jm_result.value(); + + if(const auto adhoc_storage = job_info->requirements()->adhoc_storage(); + adhoc_storage.has_value()) { + auto& adhoc_manager = scord::adhoc_storage_manager::instance(); + ec = adhoc_manager.remove_client_info(adhoc_storage->id()); + } + } else { + LOGGER_ERROR("rpc id: {} error_msg: \"Error removing job: {}\"", rpc_id, + job_id); + ec = jm_result.error(); + } + + const auto resp = generic_response{rpc_id, ec}; + + LOGGER_INFO("rpc id: {} name: {} to: {} <= " + "body: {{retval: {}}}", + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + ec); + + req.respond(resp); +} + void register_adhoc_storage(const request& req, const std::string& name, enum admire::adhoc_storage::type type, @@ -364,69 +407,6 @@ deploy_adhoc_storage(const request& req, std::uint64_t adhoc_id) { } // namespace scord::network::handlers -static void -ADM_remove_job(hg_handle_t h) { - - using scord::network::utils::get_address; - - [[maybe_unused]] hg_return_t ret; - - ADM_remove_job_in_t in; - ADM_remove_job_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - const admire::job job(in.job); - - const auto rpc_id = remote_procedure::new_id(); - LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{job: {}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), - job); - - admire::error_code ec; - auto& jm = scord::job_manager::instance(); - const auto jm_result = jm.remove(job.id()); - - if(jm_result) { - // if the job was using an adhoc storage instance, inform the - // appropriate adhoc_storage that the job is no longer its client - const auto& job_info = jm_result.value(); - - if(const auto adhoc_storage = job_info->requirements()->adhoc_storage(); - adhoc_storage.has_value()) { - auto& adhoc_manager = scord::adhoc_storage_manager::instance(); - ec = adhoc_manager.remove_client_info(adhoc_storage->id()); - } - } else { - LOGGER_ERROR("rpc id: {} error_msg: \"Error removing job: {}\"", rpc_id, - job.id()); - ec = jm_result.error(); - } - - out.op_id = rpc_id; - out.retval = ec; - - LOGGER_INFO("rpc id: {} name: {} to: {} <= " - "body: {{retval: {}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), - ec); - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_remove_job); - static void ADM_register_pfs_storage(hg_handle_t h) { diff --git a/src/scord/rpc_handlers.hpp b/src/scord/rpc_handlers.hpp index ee536e2a..08c658e1 100644 --- a/src/scord/rpc_handlers.hpp +++ b/src/scord/rpc_handlers.hpp @@ -56,6 +56,9 @@ void update_job(const request& req, admire::job_id job_id, const admire::job::resources& new_resources); +void +remove_job(const request& req, admire::job_id job_id); + } // namespace scord::network::handlers #include @@ -67,9 +70,6 @@ extern "C" { // FIXME: cannot be in a namespace due to Margo limitations // namespace scord::network::rpc { -/// ADM_remove_job -DECLARE_MARGO_RPC_HANDLER(ADM_remove_job); - /// ADM_register_pfs_storage DECLARE_MARGO_RPC_HANDLER(ADM_register_pfs_storage); diff --git a/src/scord/scord.cpp b/src/scord/scord.cpp index 0d6dd76b..d341d73d 100644 --- a/src/scord/scord.cpp +++ b/src/scord/scord.cpp @@ -194,6 +194,8 @@ main(int argc, char* argv[]) { scord::network::handlers::register_job); daemon.set_handler("ADM_update_job"s, scord::network::handlers::update_job); + daemon.set_handler("ADM_remove_job"s, + scord::network::handlers::remove_job); #if 0 const auto rpc_registration_cb = [](auto&& ctx) { -- GitLab From ddf1a4d6c019d3c4f0cf9eabec61eb15604b1c0f Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 7 Feb 2023 11:21:44 +0100 Subject: [PATCH 13/23] Fix formatting issues --- src/lib/detail/impl.cpp | 196 ++++++++++++++++++------------------- src/lib/detail/impl.hpp | 14 +-- src/scord/rpc_handlers.hpp | 27 ++--- src/scord/scord.cpp | 12 +-- 4 files changed, 125 insertions(+), 124 deletions(-) diff --git a/src/lib/detail/impl.cpp b/src/lib/detail/impl.cpp index be943788..c9d56101 100644 --- a/src/lib/detail/impl.cpp +++ b/src/lib/detail/impl.cpp @@ -373,104 +373,6 @@ register_adhoc_storage(const server& srv, const std::string& name, return tl::make_unexpected(admire::error_code::other); } -admire::error_code -deploy_adhoc_storage(const server& srv, const adhoc_storage& adhoc_storage) { - - scord::network::client rpc_client{srv.protocol()}; - - const auto rpc_id = ::api::remote_procedure::new_id(); - - if(const auto& lookup_rv = rpc_client.lookup(srv.address()); - lookup_rv.has_value()) { - const auto& endp = lookup_rv.value(); - - LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{adhoc_id: {}}}", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address()), adhoc_storage.id()); - - if(const auto& call_rv = - endp.call("ADM_"s + __FUNCTION__, adhoc_storage.id()); - call_rv.has_value()) { - - const scord::network::generic_response resp{call_rv.value()}; - - LOGGER_EVAL(resp.error_code(), INFO, ERROR, - "rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(endp.address()), resp.error_code(), - resp.op_id()); - - return resp.error_code(); - } - } - - LOGGER_ERROR("rpc call failed"); - return admire::error_code::other; -} - -tl::expected -transfer_datasets(const server& srv, const job& job, - const std::vector& sources, - const std::vector& targets, - const std::vector& limits, - transfer::mapping mapping) { - - (void) srv; - (void) job; - (void) sources; - (void) targets; - (void) limits; - (void) mapping; - - return tl::make_unexpected(admire::error_code::snafu); - -#if 0 - scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; - - const auto rpc_id = ::api::remote_procedure::new_id(); - auto endp = rpc_client.lookup(srv.address()); - - LOGGER_INFO( - "rpc id: {} name: {} from: {} => " - "body: {{job: {}, sources: {}, targets: {}, limits: {}, mapping: {}}}", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address()), job, sources, targets, - limits, mapping); - - const auto rpc_job = api::convert(job); - const auto rpc_sources = api::convert(sources); - const auto rpc_targets = api::convert(targets); - const auto rpc_qos_limits = api::convert(limits); - - ADM_transfer_datasets_in_t in{rpc_job.get(), rpc_sources.get(), - rpc_targets.get(), rpc_qos_limits.get(), - static_cast(mapping)}; - ADM_transfer_datasets_out_t out; - - [[maybe_unused]] const auto rpc = - endp.call("ADM_transfer_datasets", &in, &out); - - if(const auto rv = admire::error_code{out.retval}; !rv) { - LOGGER_ERROR("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc.origin()), rv, out.op_id); - return tl::make_unexpected(rv); - } - - const admire::transfer tx = api::convert(out.tx); - - LOGGER_INFO("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}, transfer: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc.origin()), admire::error_code::success, tx, - out.op_id); - return tx; -#endif -} - admire::error_code update_adhoc_storage(const server& srv, const adhoc_storage::ctx& new_ctx, const adhoc_storage& adhoc_storage) { @@ -686,4 +588,102 @@ remove_pfs_storage(const server& srv, const pfs_storage& pfs_storage) { #endif } +admire::error_code +deploy_adhoc_storage(const server& srv, const adhoc_storage& adhoc_storage) { + + scord::network::client rpc_client{srv.protocol()}; + + const auto rpc_id = ::api::remote_procedure::new_id(); + + if(const auto& lookup_rv = rpc_client.lookup(srv.address()); + lookup_rv.has_value()) { + const auto& endp = lookup_rv.value(); + + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{adhoc_id: {}}}", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(rpc_client.self_address()), adhoc_storage.id()); + + if(const auto& call_rv = + endp.call("ADM_"s + __FUNCTION__, adhoc_storage.id()); + call_rv.has_value()) { + + const scord::network::generic_response resp{call_rv.value()}; + + LOGGER_EVAL(resp.error_code(), INFO, ERROR, + "rpc id: {} name: {} from: {} <= " + "body: {{retval: {}}} [op_id: {}]", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(endp.address()), resp.error_code(), + resp.op_id()); + + return resp.error_code(); + } + } + + LOGGER_ERROR("rpc call failed"); + return admire::error_code::other; +} + +tl::expected +transfer_datasets(const server& srv, const job& job, + const std::vector& sources, + const std::vector& targets, + const std::vector& limits, + transfer::mapping mapping) { + + (void) srv; + (void) job; + (void) sources; + (void) targets; + (void) limits; + (void) mapping; + + return tl::make_unexpected(admire::error_code::snafu); + +#if 0 + scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; + + const auto rpc_id = ::api::remote_procedure::new_id(); + auto endp = rpc_client.lookup(srv.address()); + + LOGGER_INFO( + "rpc id: {} name: {} from: {} => " + "body: {{job: {}, sources: {}, targets: {}, limits: {}, mapping: {}}}", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(rpc_client.self_address()), job, sources, targets, + limits, mapping); + + const auto rpc_job = api::convert(job); + const auto rpc_sources = api::convert(sources); + const auto rpc_targets = api::convert(targets); + const auto rpc_qos_limits = api::convert(limits); + + ADM_transfer_datasets_in_t in{rpc_job.get(), rpc_sources.get(), + rpc_targets.get(), rpc_qos_limits.get(), + static_cast(mapping)}; + ADM_transfer_datasets_out_t out; + + [[maybe_unused]] const auto rpc = + endp.call("ADM_transfer_datasets", &in, &out); + + if(const auto rv = admire::error_code{out.retval}; !rv) { + LOGGER_ERROR("rpc id: {} name: {} from: {} <= " + "body: {{retval: {}}} [op_id: {}]", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(rpc.origin()), rv, out.op_id); + return tl::make_unexpected(rv); + } + + const admire::transfer tx = api::convert(out.tx); + + LOGGER_INFO("rpc id: {} name: {} from: {} <= " + "body: {{retval: {}, transfer: {}}} [op_id: {}]", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(rpc.origin()), admire::error_code::success, tx, + out.op_id); + return tx; +#endif +} + } // namespace admire::detail diff --git a/src/lib/detail/impl.hpp b/src/lib/detail/impl.hpp index 59a2daf4..75b42dc3 100644 --- a/src/lib/detail/impl.hpp +++ b/src/lib/detail/impl.hpp @@ -46,13 +46,6 @@ update_job(const server& srv, const job& job, admire::error_code remove_job(const server& srv, const job& job); -tl::expected -transfer_datasets(const server& srv, const job& job, - const std::vector& sources, - const std::vector& targets, - const std::vector& limits, - transfer::mapping mapping); - tl::expected register_adhoc_storage(const server& srv, const std::string& name, enum adhoc_storage::type type, @@ -79,6 +72,13 @@ update_pfs_storage(const server& srv, const pfs_storage& pfs_storage, admire::error_code remove_pfs_storage(const server& srv, const pfs_storage& pfs_storage); +tl::expected +transfer_datasets(const server& srv, const job& job, + const std::vector& sources, + const std::vector& targets, + const std::vector& limits, + transfer::mapping mapping); + } // namespace admire::detail #endif // SCORD_ADMIRE_IMPL_HPP diff --git a/src/scord/rpc_handlers.hpp b/src/scord/rpc_handlers.hpp index 08c658e1..ba11ad0e 100644 --- a/src/scord/rpc_handlers.hpp +++ b/src/scord/rpc_handlers.hpp @@ -32,19 +32,6 @@ namespace scord::network::handlers { void ping(const scord::network::request& req); -void -register_adhoc_storage(const request& req, const std::string& name, - enum admire::adhoc_storage::type type, - const admire::adhoc_storage::ctx& ctx); -void -update_adhoc_storage(const request& req, std::uint64_t adhoc_id, - const admire::adhoc_storage::ctx& new_ctx); - -void -remove_adhoc_storage(const request& req, std::uint64_t adhoc_id); - -void -deploy_adhoc_storage(const request& req, std::uint64_t adhoc_id); void register_job(const scord::network::request& req, @@ -59,6 +46,20 @@ update_job(const request& req, admire::job_id job_id, void remove_job(const request& req, admire::job_id job_id); +void +register_adhoc_storage(const request& req, const std::string& name, + enum admire::adhoc_storage::type type, + const admire::adhoc_storage::ctx& ctx); +void +update_adhoc_storage(const request& req, std::uint64_t adhoc_id, + const admire::adhoc_storage::ctx& new_ctx); + +void +remove_adhoc_storage(const request& req, std::uint64_t adhoc_id); + +void +deploy_adhoc_storage(const request& req, std::uint64_t adhoc_id); + } // namespace scord::network::handlers #include diff --git a/src/scord/scord.cpp b/src/scord/scord.cpp index d341d73d..2cb7a75f 100644 --- a/src/scord/scord.cpp +++ b/src/scord/scord.cpp @@ -182,6 +182,12 @@ main(int argc, char* argv[]) { scord::network::server daemon(cfg); daemon.set_handler("ADM_ping"s, scord::network::handlers::ping); + daemon.set_handler("ADM_register_job"s, + scord::network::handlers::register_job); + daemon.set_handler("ADM_update_job"s, + scord::network::handlers::update_job); + daemon.set_handler("ADM_remove_job"s, + scord::network::handlers::remove_job); daemon.set_handler("ADM_register_adhoc_storage"s, scord::network::handlers::register_adhoc_storage); daemon.set_handler("ADM_update_adhoc_storage"s, @@ -190,12 +196,6 @@ main(int argc, char* argv[]) { scord::network::handlers::remove_adhoc_storage); daemon.set_handler("ADM_deploy_adhoc_storage"s, scord::network::handlers::deploy_adhoc_storage); - daemon.set_handler("ADM_register_job"s, - scord::network::handlers::register_job); - daemon.set_handler("ADM_update_job"s, - scord::network::handlers::update_job); - daemon.set_handler("ADM_remove_job"s, - scord::network::handlers::remove_job); #if 0 const auto rpc_registration_cb = [](auto&& ctx) { -- GitLab From cbea50e93603d0b4a62753ceeccb14d073cb2faf Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 7 Feb 2023 14:17:43 +0100 Subject: [PATCH 14/23] scord: Rewrite `ADM_register_pfs_storage` RPC --- src/common/api/admire_types.hpp | 16 +++++++++ src/common/api/types.cpp | 47 ++++++++++++++++++++++++ src/common/net/serialization.hpp | 23 ++++++++++++ src/lib/detail/impl.cpp | 61 ++++++++++++++------------------ src/scord/rpc_handlers.cpp | 60 ++++++++++--------------------- src/scord/rpc_handlers.hpp | 8 +++-- src/scord/scord.cpp | 2 ++ 7 files changed, 138 insertions(+), 79 deletions(-) diff --git a/src/common/api/admire_types.hpp b/src/common/api/admire_types.hpp index 395bb5e1..664d5f83 100644 --- a/src/common/api/admire_types.hpp +++ b/src/common/api/admire_types.hpp @@ -472,6 +472,8 @@ struct pfs_storage { struct ctx { + ctx() = default; + explicit ctx(std::filesystem::path mount_point); explicit ctx(ADM_pfs_context_t ctx); @@ -479,10 +481,18 @@ struct pfs_storage { std::filesystem::path mount_point() const; + template + void + serialize(Archive&& ar) { + ar& m_mount_point; + } + private: std::filesystem::path m_mount_point; }; + pfs_storage(); + pfs_storage(enum pfs_storage::type type, std::string name, std::uint64_t id, std::filesystem::path mount_point); @@ -511,6 +521,12 @@ struct pfs_storage { void update(admire::pfs_storage::ctx new_ctx); + // The implementation for this must be deferred until + // after the declaration of the PIMPL class + template + void + serialize(Archive& ar); + private: class impl; std::unique_ptr m_pimpl; diff --git a/src/common/api/types.cpp b/src/common/api/types.cpp index 6bc9624a..dc83c93d 100644 --- a/src/common/api/types.cpp +++ b/src/common/api/types.cpp @@ -1664,6 +1664,7 @@ pfs_storage::ctx::mount_point() const { class pfs_storage::impl { public: + impl() = default; explicit impl(enum pfs_storage::type type, std::string name, std::uint64_t id, pfs_storage::ctx ctx) : m_type(type), m_name(std::move(name)), m_id(id), @@ -1701,6 +1702,24 @@ public: m_ctx = std::move(new_ctx); } + template + void + load(Archive& ar) { + ar(SCORD_SERIALIZATION_NVP(m_type)); + ar(SCORD_SERIALIZATION_NVP(m_name)); + ar(SCORD_SERIALIZATION_NVP(m_id)); + ar(SCORD_SERIALIZATION_NVP(m_ctx)); + } + + template + void + save(Archive& ar) const { + ar(SCORD_SERIALIZATION_NVP(m_type)); + ar(SCORD_SERIALIZATION_NVP(m_name)); + ar(SCORD_SERIALIZATION_NVP(m_id)); + ar(SCORD_SERIALIZATION_NVP(m_ctx)); + } + private: enum type m_type; std::string m_name; @@ -1708,6 +1727,8 @@ private: pfs_storage::ctx m_ctx; }; +pfs_storage::pfs_storage() = default; + pfs_storage::pfs_storage(enum pfs_storage::type type, std::string name, std::uint64_t id, std::filesystem::path mount_point) : m_pimpl(std::make_unique( @@ -1764,6 +1785,32 @@ pfs_storage::update(admire::pfs_storage::ctx new_ctx) { return m_pimpl->update(std::move(new_ctx)); } +// since the PIMPL class is fully defined at this point, we can now +// define the serialization function +template +inline void +pfs_storage::serialize(Archive& ar) { + ar(SCORD_SERIALIZATION_NVP(m_pimpl)); +} + +// we must also explicitly instantiate our template functions for +// serialization in the desired archives +template void +pfs_storage::impl::save( + scord::network::serialization::output_archive&) const; + +template void +pfs_storage::impl::load( + scord::network::serialization::input_archive&); + +template void +pfs_storage::serialize( + scord::network::serialization::output_archive&); + +template void +pfs_storage::serialize( + scord::network::serialization::input_archive&); + class job_requirements::impl { public: diff --git a/src/common/net/serialization.hpp b/src/common/net/serialization.hpp index c8c731d0..7eefdd67 100644 --- a/src/common/net/serialization.hpp +++ b/src/common/net/serialization.hpp @@ -33,6 +33,29 @@ #include #include +// Cereal does not serialize std::filesystem::path's by default +#include + +namespace cereal { + +//! Loading for std::filesystem::path +template +inline void +CEREAL_LOAD_FUNCTION_NAME(Archive& ar, std::filesystem::path& out) { + std::string tmp; + ar(CEREAL_NVP_("data", tmp)); + out.assign(tmp); +} + +//! Saving for std::filesystem::path +template +inline void +CEREAL_SAVE_FUNCTION_NAME(Archive& ar, const std::filesystem::path& in) { + ar(CEREAL_NVP_("data", in.string())); +} + +} // namespace cereal + namespace scord::network::serialization { #define SCORD_SERIALIZATION_NVP CEREAL_NVP diff --git a/src/lib/detail/impl.cpp b/src/lib/detail/impl.cpp index c9d56101..529008b9 100644 --- a/src/lib/detail/impl.cpp +++ b/src/lib/detail/impl.cpp @@ -453,51 +453,42 @@ tl::expected register_pfs_storage(const server& srv, const std::string& name, enum pfs_storage::type type, const pfs_storage::ctx& ctx) { - (void) srv; - (void) name; - (void) type; - (void) ctx; - - return tl::make_unexpected(admire::error_code::snafu); - -#if 0 - scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; + scord::network::client rpc_client{srv.protocol()}; const auto rpc_id = ::api::remote_procedure::new_id(); - auto endp = rpc_client.lookup(srv.address()); - LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{name: {}, type: {}, pfs_ctx: {}}}", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address()), name, type, ctx); + if(const auto& lookup_rv = rpc_client.lookup(srv.address()); + lookup_rv.has_value()) { + const auto& endp = lookup_rv.value(); - const auto rpc_name = name.c_str(); - const auto rpc_type = static_cast(type); - const auto rpc_ctx = api::convert(ctx); + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{name: {}, type: {}, pfs_ctx: {}}}", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(rpc_client.self_address()), name, type, ctx); - ADM_register_pfs_storage_in_t in{rpc_name, rpc_type, rpc_ctx.get()}; - ADM_register_pfs_storage_out_t out; + if(const auto& call_rv = + endp.call("ADM_"s + __FUNCTION__, name, type, ctx); + call_rv.has_value()) { - const auto rpc = endp.call("ADM_register_pfs_storage", &in, &out); + const scord::network::response_with_id resp{call_rv.value()}; - if(const auto rv = admire::error_code{out.retval}; !rv) { - LOGGER_ERROR("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address()), rv, out.op_id); - return tl::make_unexpected(rv); - } + LOGGER_EVAL(resp.error_code(), INFO, ERROR, + "rpc id: {} name: {} from: {} <= " + "body: {{retval: {}, pfs_id: {}}} [op_id: {}]", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(endp.address()), resp.error_code(), + resp.value(), resp.op_id()); - auto rpc_pfs_storage = admire::pfs_storage{type, name, out.id, ctx}; + if(const auto ec = resp.error_code(); !ec) { + return tl::make_unexpected(ec); + } - LOGGER_INFO("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}, id: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address()), - admire::error_code::success, out.id, out.op_id); + return admire::pfs_storage{type, name, resp.value(), ctx}; + } + } - return rpc_pfs_storage; -#endif + LOGGER_ERROR("rpc call failed"); + return tl::make_unexpected(admire::error_code::other); } admire::error_code diff --git a/src/scord/rpc_handlers.cpp b/src/scord/rpc_handlers.cpp index ddd11634..552d2a15 100644 --- a/src/scord/rpc_handlers.cpp +++ b/src/scord/rpc_handlers.cpp @@ -404,69 +404,47 @@ deploy_adhoc_storage(const request& req, std::uint64_t adhoc_id) { req.respond(resp); } -} // namespace scord::network::handlers - - -static void -ADM_register_pfs_storage(hg_handle_t h) { - - using admire::pfs_storage; - using scord::network::utils::get_address; - - [[maybe_unused]] hg_return_t ret; - - ADM_register_pfs_storage_in_t in; - ADM_register_pfs_storage_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); +void +register_pfs_storage(const request& req, const std::string& name, + enum admire::pfs_storage::type type, + const admire::pfs_storage::ctx& ctx) { - const std::string pfs_name{in.name}; - const auto pfs_type = static_cast(in.type); - const pfs_storage::ctx pfs_ctx{in.ctx}; + using scord::network::get_address; + const auto rpc_name = "ADM_"s + __FUNCTION__; const auto rpc_id = remote_procedure::new_id(); + LOGGER_INFO("rpc id: {} name: {} from: {} => " "body: {{name: {}, type: {}, pfs_ctx: {}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), - pfs_name, pfs_type, pfs_ctx); + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + name, type, ctx); admire::error_code ec; - std::uint64_t out_pfs_id = 0; + std::optional pfs_id = 0; auto& pfs_manager = scord::pfs_storage_manager::instance(); - if(const auto pm_result = pfs_manager.create(pfs_type, pfs_name, pfs_ctx); + if(const auto pm_result = pfs_manager.create(type, name, ctx); pm_result.has_value()) { const auto& adhoc_storage_info = pm_result.value(); - out_pfs_id = adhoc_storage_info->pfs_storage().id(); + pfs_id = adhoc_storage_info->pfs_storage().id(); } else { LOGGER_ERROR("rpc id: {} error_msg: \"Error creating pfs_storage: {}\"", rpc_id, pm_result.error()); ec = pm_result.error(); } - out.op_id = rpc_id; - out.retval = ec; - out.id = out_pfs_id; + const auto resp = response_with_id{rpc_id, ec, pfs_id}; LOGGER_INFO("rpc id: {} name: {} to: {} => " - "body: {{retval: {}, id: {}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), - ec, out.id); - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); + "body: {{retval: {}, pfs_id: {}}}", + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + ec, pfs_id); - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); + req.respond(resp); } -DEFINE_MARGO_RPC_HANDLER(ADM_register_pfs_storage); +} // namespace scord::network::handlers + static void ADM_update_pfs_storage(hg_handle_t h) { diff --git a/src/scord/rpc_handlers.hpp b/src/scord/rpc_handlers.hpp index ba11ad0e..146850cc 100644 --- a/src/scord/rpc_handlers.hpp +++ b/src/scord/rpc_handlers.hpp @@ -60,6 +60,11 @@ remove_adhoc_storage(const request& req, std::uint64_t adhoc_id); void deploy_adhoc_storage(const request& req, std::uint64_t adhoc_id); +void +register_pfs_storage(const request& req, const std::string& name, + enum admire::pfs_storage::type type, + const admire::pfs_storage::ctx& ctx); + } // namespace scord::network::handlers #include @@ -71,9 +76,6 @@ extern "C" { // FIXME: cannot be in a namespace due to Margo limitations // namespace scord::network::rpc { -/// ADM_register_pfs_storage -DECLARE_MARGO_RPC_HANDLER(ADM_register_pfs_storage); - /// ADM_update_pfs_storage DECLARE_MARGO_RPC_HANDLER(ADM_update_pfs_storage); diff --git a/src/scord/scord.cpp b/src/scord/scord.cpp index 2cb7a75f..e8891ece 100644 --- a/src/scord/scord.cpp +++ b/src/scord/scord.cpp @@ -196,6 +196,8 @@ main(int argc, char* argv[]) { scord::network::handlers::remove_adhoc_storage); daemon.set_handler("ADM_deploy_adhoc_storage"s, scord::network::handlers::deploy_adhoc_storage); + daemon.set_handler("ADM_register_pfs_storage"s, + scord::network::handlers::register_pfs_storage); #if 0 const auto rpc_registration_cb = [](auto&& ctx) { -- GitLab From a36850f4306509a9f263edd3b637d75e1787f98e Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 7 Feb 2023 14:19:47 +0100 Subject: [PATCH 15/23] Rewrite `ADM_update_pfs_storage` RPC --- src/lib/detail/impl.cpp | 57 +++++++++++++++++--------------------- src/lib/detail/impl.hpp | 2 +- src/scord/rpc_handlers.cpp | 50 ++++++++++----------------------- src/scord/rpc_handlers.hpp | 7 +++-- src/scord/scord.cpp | 2 ++ 5 files changed, 47 insertions(+), 71 deletions(-) diff --git a/src/lib/detail/impl.cpp b/src/lib/detail/impl.cpp index 529008b9..aa88b9a2 100644 --- a/src/lib/detail/impl.cpp +++ b/src/lib/detail/impl.cpp @@ -493,48 +493,41 @@ register_pfs_storage(const server& srv, const std::string& name, admire::error_code update_pfs_storage(const server& srv, const pfs_storage& pfs_storage, - const admire::pfs_storage::ctx& pfs_storage_ctx) { + const admire::pfs_storage::ctx& new_ctx) { - (void) srv; - (void) pfs_storage; - (void) pfs_storage_ctx; - - return admire::error_code::snafu; - -#if 0 - scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; + scord::network::client rpc_client{srv.protocol()}; const auto rpc_id = ::api::remote_procedure::new_id(); - auto endp = rpc_client.lookup(srv.address()); - LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{pfs_storage_id: {}}}", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address()), pfs_storage.id()); + if(const auto& lookup_rv = rpc_client.lookup(srv.address()); + lookup_rv.has_value()) { + const auto& endp = lookup_rv.value(); - const auto rpc_ctx = api::convert(pfs_storage_ctx); + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{pfs_id: {}, new_ctx: {}}}", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(rpc_client.self_address()), pfs_storage.id(), + new_ctx); - ADM_update_pfs_storage_in_t in{rpc_ctx.get(), pfs_storage.id()}; - ADM_update_pfs_storage_out_t out; + if(const auto& call_rv = + endp.call("ADM_"s + __FUNCTION__, pfs_storage.id(), new_ctx); + call_rv.has_value()) { - const auto rpc = endp.call("ADM_update_pfs_storage", &in, &out); + const scord::network::generic_response resp{call_rv.value()}; - if(const auto rv = admire::error_code{out.retval}; !rv) { - LOGGER_ERROR("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc.origin()), rv, out.op_id); - return rv; - } + LOGGER_EVAL(resp.error_code(), INFO, ERROR, + "rpc id: {} name: {} from: {} <= " + "body: {{retval: {}}} [op_id: {}]", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(endp.address()), resp.error_code(), + resp.op_id()); - LOGGER_INFO("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc.origin()), admire::error_code::success, - out.op_id); + return resp.error_code(); + } + } - return admire::error_code::success; -#endif + LOGGER_ERROR("rpc call failed"); + return admire::error_code::other; } admire::error_code diff --git a/src/lib/detail/impl.hpp b/src/lib/detail/impl.hpp index 75b42dc3..bb24d14e 100644 --- a/src/lib/detail/impl.hpp +++ b/src/lib/detail/impl.hpp @@ -67,7 +67,7 @@ register_pfs_storage(const server& srv, const std::string& name, admire::error_code update_pfs_storage(const server& srv, const pfs_storage& pfs_storage, - const admire::pfs_storage::ctx& pfs_storage_ctx); + const admire::pfs_storage::ctx& new_ctx); admire::error_code remove_pfs_storage(const server& srv, const pfs_storage& pfs_storage); diff --git a/src/scord/rpc_handlers.cpp b/src/scord/rpc_handlers.cpp index 552d2a15..323ed20a 100644 --- a/src/scord/rpc_handlers.cpp +++ b/src/scord/rpc_handlers.cpp @@ -443,60 +443,40 @@ register_pfs_storage(const request& req, const std::string& name, req.respond(resp); } -} // namespace scord::network::handlers - - -static void -ADM_update_pfs_storage(hg_handle_t h) { - - using scord::network::utils::get_address; - - [[maybe_unused]] hg_return_t ret; - - ADM_update_pfs_storage_in_t in; - ADM_update_pfs_storage_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); +void +update_pfs_storage(const request& req, std::uint64_t pfs_id, + const admire::pfs_storage::ctx& new_ctx) { - const admire::pfs_storage::ctx pfs_storage_ctx(in.pfs_storage_ctx); - const std::uint64_t server_id(in.server_id); + using scord::network::get_address; + const auto rpc_name = "ADM_"s + __FUNCTION__; const auto rpc_id = remote_procedure::new_id(); + LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{pfs_storage_id: {}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), - server_id); + "body: {{pfs_id: {}, new_ctx: {}}}", + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + pfs_id, new_ctx); auto& pfs_manager = scord::pfs_storage_manager::instance(); - const auto ec = pfs_manager.update(server_id, pfs_storage_ctx); + const auto ec = pfs_manager.update(pfs_id, new_ctx); if(!ec) { LOGGER_ERROR("rpc id: {} error_msg: \"Error updating pfs_storage: {}\"", rpc_id, ec); } - out.op_id = rpc_id; - out.retval = ec; + const auto resp = generic_response{rpc_id, ec}; LOGGER_INFO("rpc id: {} name: {} to: {} => " "body: {{retval: {}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), ec); - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); + req.respond(resp); } -DEFINE_MARGO_RPC_HANDLER(ADM_update_pfs_storage); +} // namespace scord::network::handlers + static void ADM_remove_pfs_storage(hg_handle_t h) { diff --git a/src/scord/rpc_handlers.hpp b/src/scord/rpc_handlers.hpp index 146850cc..7383b906 100644 --- a/src/scord/rpc_handlers.hpp +++ b/src/scord/rpc_handlers.hpp @@ -65,6 +65,10 @@ register_pfs_storage(const request& req, const std::string& name, enum admire::pfs_storage::type type, const admire::pfs_storage::ctx& ctx); +void +update_pfs_storage(const request& req, std::uint64_t pfs_id, + const admire::pfs_storage::ctx& new_ctx); + } // namespace scord::network::handlers #include @@ -76,9 +80,6 @@ extern "C" { // FIXME: cannot be in a namespace due to Margo limitations // namespace scord::network::rpc { -/// ADM_update_pfs_storage -DECLARE_MARGO_RPC_HANDLER(ADM_update_pfs_storage); - /// ADM_remove_pfs_storage DECLARE_MARGO_RPC_HANDLER(ADM_remove_pfs_storage); diff --git a/src/scord/scord.cpp b/src/scord/scord.cpp index e8891ece..c05ec5a9 100644 --- a/src/scord/scord.cpp +++ b/src/scord/scord.cpp @@ -198,6 +198,8 @@ main(int argc, char* argv[]) { scord::network::handlers::deploy_adhoc_storage); daemon.set_handler("ADM_register_pfs_storage"s, scord::network::handlers::register_pfs_storage); + daemon.set_handler("ADM_update_pfs_storage"s, + scord::network::handlers::update_pfs_storage); #if 0 const auto rpc_registration_cb = [](auto&& ctx) { -- GitLab From 9b6f126dcac2d20d9b2b1a053415ce8d942947d6 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 7 Feb 2023 14:53:14 +0100 Subject: [PATCH 16/23] scord: Simplify RPC registration --- src/scord/scord.cpp | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/src/scord/scord.cpp b/src/scord/scord.cpp index c05ec5a9..ed4fc6ca 100644 --- a/src/scord/scord.cpp +++ b/src/scord/scord.cpp @@ -181,25 +181,22 @@ main(int argc, char* argv[]) { try { scord::network::server daemon(cfg); - daemon.set_handler("ADM_ping"s, scord::network::handlers::ping); - daemon.set_handler("ADM_register_job"s, - scord::network::handlers::register_job); - daemon.set_handler("ADM_update_job"s, - scord::network::handlers::update_job); - daemon.set_handler("ADM_remove_job"s, - scord::network::handlers::remove_job); - daemon.set_handler("ADM_register_adhoc_storage"s, - scord::network::handlers::register_adhoc_storage); - daemon.set_handler("ADM_update_adhoc_storage"s, - scord::network::handlers::update_adhoc_storage); - daemon.set_handler("ADM_remove_adhoc_storage"s, - scord::network::handlers::remove_adhoc_storage); - daemon.set_handler("ADM_deploy_adhoc_storage"s, - scord::network::handlers::deploy_adhoc_storage); - daemon.set_handler("ADM_register_pfs_storage"s, - scord::network::handlers::register_pfs_storage); - daemon.set_handler("ADM_update_pfs_storage"s, - scord::network::handlers::update_pfs_storage); + // convenience macro to ensure the names of an RPC and its handler + // always match +#define EXPAND(rpc_name) "ADM_" #rpc_name##s, scord::network::handlers::rpc_name + + daemon.set_handler(EXPAND(ping)); + daemon.set_handler(EXPAND(register_job)); + daemon.set_handler(EXPAND(update_job)); + daemon.set_handler(EXPAND(remove_job)); + daemon.set_handler(EXPAND(register_adhoc_storage)); + daemon.set_handler(EXPAND(update_adhoc_storage)); + daemon.set_handler(EXPAND(remove_adhoc_storage)); + daemon.set_handler(EXPAND(deploy_adhoc_storage)); + daemon.set_handler(EXPAND(register_pfs_storage)); + daemon.set_handler(EXPAND(update_pfs_storage)); + +#undef EXPAND #if 0 const auto rpc_registration_cb = [](auto&& ctx) { -- GitLab From 9b32e40ecb1d44fa679255729af4dd8da9a9f2cc Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 7 Feb 2023 14:53:46 +0100 Subject: [PATCH 17/23] scord: Rewrite `ADM_remove_pfs_storage` RPC --- src/lib/detail/impl.cpp | 51 +++++++++++++++++--------------------- src/scord/rpc_handlers.cpp | 48 +++++++++++------------------------ src/scord/rpc_handlers.hpp | 6 ++--- src/scord/scord.cpp | 1 + 4 files changed, 41 insertions(+), 65 deletions(-) diff --git a/src/lib/detail/impl.cpp b/src/lib/detail/impl.cpp index aa88b9a2..38364738 100644 --- a/src/lib/detail/impl.cpp +++ b/src/lib/detail/impl.cpp @@ -533,43 +533,38 @@ update_pfs_storage(const server& srv, const pfs_storage& pfs_storage, admire::error_code remove_pfs_storage(const server& srv, const pfs_storage& pfs_storage) { - (void) srv; - (void) pfs_storage; + scord::network::client rpc_client{srv.protocol()}; - return admire::error_code::snafu; + const auto rpc_id = ::api::remote_procedure::new_id(); -#if 0 - scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; + if(const auto& lookup_rv = rpc_client.lookup(srv.address()); + lookup_rv.has_value()) { + const auto& endp = lookup_rv.value(); - const auto rpc_id = ::api::remote_procedure::new_id(); - auto endp = rpc_client.lookup(srv.address()); + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{pfs_id: {}}}", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(rpc_client.self_address()), pfs_storage.id()); - LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{pfs_storage_id: {}}}", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address()), pfs_storage.id()); + if(const auto& call_rv = + endp.call("ADM_"s + __FUNCTION__, pfs_storage.id()); + call_rv.has_value()) { - ADM_remove_pfs_storage_in_t in{pfs_storage.id()}; - ADM_remove_pfs_storage_out_t out; + const scord::network::generic_response resp{call_rv.value()}; - const auto rpc = endp.call("ADM_remove_pfs_storage", &in, &out); + LOGGER_EVAL(resp.error_code(), INFO, ERROR, + "rpc id: {} name: {} from: {} <= " + "body: {{retval: {}}} [op_id: {}]", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(endp.address()), resp.error_code(), + resp.op_id()); - if(const auto rv = admire::error_code{out.retval}; !rv) { - LOGGER_ERROR("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc.origin()), rv, out.op_id); - return rv; + return resp.error_code(); + } } - LOGGER_INFO("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc.origin()), admire::error_code::success, - out.op_id); - - return admire::error_code::success; -#endif + LOGGER_ERROR("rpc call failed"); + return admire::error_code::other; } admire::error_code diff --git a/src/scord/rpc_handlers.cpp b/src/scord/rpc_handlers.cpp index 323ed20a..87425fe6 100644 --- a/src/scord/rpc_handlers.cpp +++ b/src/scord/rpc_handlers.cpp @@ -475,58 +475,38 @@ update_pfs_storage(const request& req, std::uint64_t pfs_id, req.respond(resp); } -} // namespace scord::network::handlers - - -static void -ADM_remove_pfs_storage(hg_handle_t h) { - - using scord::network::utils::get_address; - - [[maybe_unused]] hg_return_t ret; - - ADM_remove_pfs_storage_in_t in; - ADM_remove_pfs_storage_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); +void +remove_pfs_storage(const request& req, std::uint64_t pfs_id) { - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); + using scord::network::get_address; + const auto rpc_name = "ADM_"s + __FUNCTION__; const auto rpc_id = remote_procedure::new_id(); - LOGGER_INFO("rpc id: {} name: {} from: {} => " - "body: {{pfs_storage_id: {}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), - in.server_id); + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{pfs_id: {}}}", + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + pfs_id); auto& pfs_manager = scord::pfs_storage_manager::instance(); - admire::error_code ec = pfs_manager.remove(in.server_id); + admire::error_code ec = pfs_manager.remove(pfs_id); if(!ec) { LOGGER_ERROR("rpc id: {} error_msg: \"Error removing pfs storage: {}\"", - rpc_id, in.server_id); + rpc_id, pfs_id); } - out.op_id = rpc_id; - out.retval = ec; + const auto resp = generic_response{rpc_id, ec}; LOGGER_INFO("rpc id: {} name: {} to: {} <= " "body: {{retval: {}}}", - rpc_id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), ec); - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); + req.respond(resp); } -DEFINE_MARGO_RPC_HANDLER(ADM_remove_pfs_storage); +} // namespace scord::network::handlers /** * Specifes the origin location in a storage tier where input is located, as diff --git a/src/scord/rpc_handlers.hpp b/src/scord/rpc_handlers.hpp index 7383b906..cbfa3632 100644 --- a/src/scord/rpc_handlers.hpp +++ b/src/scord/rpc_handlers.hpp @@ -69,6 +69,9 @@ void update_pfs_storage(const request& req, std::uint64_t pfs_id, const admire::pfs_storage::ctx& new_ctx); +void +remove_pfs_storage(const request& req, std::uint64_t pfs_id); + } // namespace scord::network::handlers #include @@ -80,9 +83,6 @@ extern "C" { // FIXME: cannot be in a namespace due to Margo limitations // namespace scord::network::rpc { -/// ADM_remove_pfs_storage -DECLARE_MARGO_RPC_HANDLER(ADM_remove_pfs_storage); - /// ADM_input DECLARE_MARGO_RPC_HANDLER(ADM_input); diff --git a/src/scord/scord.cpp b/src/scord/scord.cpp index ed4fc6ca..f21f4850 100644 --- a/src/scord/scord.cpp +++ b/src/scord/scord.cpp @@ -195,6 +195,7 @@ main(int argc, char* argv[]) { daemon.set_handler(EXPAND(deploy_adhoc_storage)); daemon.set_handler(EXPAND(register_pfs_storage)); daemon.set_handler(EXPAND(update_pfs_storage)); + daemon.set_handler(EXPAND(remove_pfs_storage)); #undef EXPAND -- GitLab From 57dd9eb6494755636d553cea966cd95c3c1992fc Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 7 Feb 2023 16:07:39 +0100 Subject: [PATCH 18/23] scord: Rewrite `ADM_transfer_datasets` RPC --- src/common/api/admire_types.hpp | 21 +++++ src/common/api/types.cpp | 129 +++++++++++++++++++++++++++++++ src/common/net/serialization.hpp | 1 + src/lib/detail/impl.cpp | 80 ++++++++----------- src/scord/rpc_handlers.cpp | 115 +++++++++------------------ src/scord/rpc_handlers.hpp | 10 ++- src/scord/scord.cpp | 1 + 7 files changed, 228 insertions(+), 129 deletions(-) diff --git a/src/common/api/admire_types.hpp b/src/common/api/admire_types.hpp index 664d5f83..9719690f 100644 --- a/src/common/api/admire_types.hpp +++ b/src/common/api/admire_types.hpp @@ -224,6 +224,7 @@ struct transfer { n_to_n = ADM_MAPPING_N_TO_N }; + transfer(); explicit transfer(transfer_id id); explicit transfer(ADM_transfer_t transfer); @@ -239,6 +240,12 @@ struct transfer { transfer_id id() const; + // The implementation for this must be deferred until + // after the declaration of the PIMPL class + template + void + serialize(Archive& ar); + private: class impl; std::unique_ptr m_pimpl; @@ -260,6 +267,7 @@ enum class scope : std::underlying_type::type { struct entity { + entity(); template entity(admire::qos::scope s, T&& data); explicit entity(ADM_qos_entity_t entity); @@ -280,6 +288,12 @@ struct entity { T data() const; + // The implementation for this must be deferred until + // after the declaration of the PIMPL class + template + void + serialize(Archive& ar); + private: class impl; std::unique_ptr m_pimpl; @@ -287,6 +301,7 @@ private: struct limit { + limit(); limit(admire::qos::subclass cls, uint64_t value); limit(admire::qos::subclass cls, uint64_t value, const admire::qos::entity& e); @@ -310,6 +325,12 @@ struct limit { uint64_t value() const; + // The implementation for this must be deferred until + // after the declaration of the PIMPL class + template + void + serialize(Archive& ar); + private: class impl; std::unique_ptr m_pimpl; diff --git a/src/common/api/types.cpp b/src/common/api/types.cpp index dc83c93d..019ea7bb 100644 --- a/src/common/api/types.cpp +++ b/src/common/api/types.cpp @@ -1308,6 +1308,7 @@ job::serialize>( class transfer::impl { public: + impl() = default; explicit impl(transfer_id id) : m_id(id) {} impl(const impl& rhs) = default; @@ -1322,10 +1323,24 @@ public: return m_id; } + template + void + load(Archive& ar) { + ar(SCORD_SERIALIZATION_NVP(m_id)); + } + + template + void + save(Archive& ar) const { + ar(SCORD_SERIALIZATION_NVP(m_id)); + } + private: transfer_id m_id; }; +transfer::transfer() = default; + transfer::transfer(transfer_id id) : m_pimpl(std::make_unique(id)) {} @@ -1353,6 +1368,32 @@ transfer::id() const { return m_pimpl->id(); } +// since the PIMPL class is fully defined at this point, we can now +// define the serialization function +template +inline void +transfer::serialize(Archive& ar) { + ar(SCORD_SERIALIZATION_NVP(m_pimpl)); +} + +// we must also explicitly instantiate our template functions for +// serialization in the desired archives +template void +transfer::impl::save( + scord::network::serialization::output_archive&) const; + +template void +transfer::impl::load( + scord::network::serialization::input_archive&); + +template void +transfer::serialize( + scord::network::serialization::output_archive&); + +template void +transfer::serialize( + scord::network::serialization::input_archive&); + class dataset::impl { public: impl() = default; @@ -1965,6 +2006,8 @@ namespace qos { class entity::impl { public: + impl() = default; + template impl(const admire::qos::scope& s, T&& data) : m_scope(s), m_data(data) {} @@ -1990,6 +2033,20 @@ public: return std::get(m_data); } + template + void + load(Archive& ar) { + ar(SCORD_SERIALIZATION_NVP(m_scope)); + ar(SCORD_SERIALIZATION_NVP(m_data)); + } + + template + void + save(Archive& ar) const { + ar(SCORD_SERIALIZATION_NVP(m_scope)); + ar(SCORD_SERIALIZATION_NVP(m_data)); + } + private: static std::variant init_helper(ADM_qos_entity_t entity) { @@ -2014,6 +2071,8 @@ private: std::variant m_data; }; +entity::entity() = default; + template entity::entity(admire::qos::scope s, T&& data) : m_pimpl(std::make_unique(s, std::forward(data))) {} @@ -2066,10 +2125,36 @@ entity::data() const { return m_pimpl->data(); } +// since the PIMPL class is fully defined at this point, we can now +// define the serialization function +template +inline void +entity::serialize(Archive& ar) { + ar(SCORD_SERIALIZATION_NVP(m_pimpl)); +} + +// we must also explicitly instantiate our template functions for +// serialization in the desired archives +template void +entity::impl::save( + scord::network::serialization::output_archive&) const; + +template void +entity::impl::load( + scord::network::serialization::input_archive&); + +template void +entity::serialize( + scord::network::serialization::output_archive&); + +template void +entity::serialize( + scord::network::serialization::input_archive&); class limit::impl { public: + impl() = default; impl(admire::qos::subclass cls, uint64_t value, admire::qos::entity e) : m_subclass(cls), m_value(value), m_entity(std::move(e)) {} @@ -2104,12 +2189,30 @@ public: return m_value; } + template + void + load(Archive& ar) { + ar(SCORD_SERIALIZATION_NVP(m_subclass)); + ar(SCORD_SERIALIZATION_NVP(m_value)); + ar(SCORD_SERIALIZATION_NVP(m_entity)); + } + + template + void + save(Archive& ar) const { + ar(SCORD_SERIALIZATION_NVP(m_subclass)); + ar(SCORD_SERIALIZATION_NVP(m_value)); + ar(SCORD_SERIALIZATION_NVP(m_entity)); + } + private: admire::qos::subclass m_subclass; uint64_t m_value; std::optional m_entity; }; +limit::limit() = default; + limit::limit(admire::qos::subclass cls, uint64_t value) : m_pimpl(std::make_unique(cls, value)) {} @@ -2150,6 +2253,32 @@ limit::value() const { return m_pimpl->value(); } +// since the PIMPL class is fully defined at this point, we can now +// define the serialization function +template +inline void +limit::serialize(Archive& ar) { + ar(SCORD_SERIALIZATION_NVP(m_pimpl)); +} + +// we must also explicitly instantiate our template functions for +// serialization in the desired archives +template void +limit::impl::save( + scord::network::serialization::output_archive&) const; + +template void +limit::impl::load( + scord::network::serialization::input_archive&); + +template void +limit::serialize( + scord::network::serialization::output_archive&); + +template void +limit::serialize( + scord::network::serialization::input_archive&); + } // namespace qos } // namespace admire diff --git a/src/common/net/serialization.hpp b/src/common/net/serialization.hpp index 7eefdd67..fbca80d6 100644 --- a/src/common/net/serialization.hpp +++ b/src/common/net/serialization.hpp @@ -27,6 +27,7 @@ #include #include +#include #include #include #include diff --git a/src/lib/detail/impl.cpp b/src/lib/detail/impl.cpp index 38364738..8801bd54 100644 --- a/src/lib/detail/impl.cpp +++ b/src/lib/detail/impl.cpp @@ -611,58 +611,44 @@ transfer_datasets(const server& srv, const job& job, const std::vector& limits, transfer::mapping mapping) { - (void) srv; - (void) job; - (void) sources; - (void) targets; - (void) limits; - (void) mapping; + scord::network::client rpc_client{srv.protocol()}; - return tl::make_unexpected(admire::error_code::snafu); + const auto rpc_id = ::api::remote_procedure::new_id(); -#if 0 - scord::network::rpc_client rpc_client{srv.protocol(), rpc_registration_cb}; + if(const auto& lookup_rv = rpc_client.lookup(srv.address()); + lookup_rv.has_value()) { + const auto& endp = lookup_rv.value(); - const auto rpc_id = ::api::remote_procedure::new_id(); - auto endp = rpc_client.lookup(srv.address()); - - LOGGER_INFO( - "rpc id: {} name: {} from: {} => " - "body: {{job: {}, sources: {}, targets: {}, limits: {}, mapping: {}}}", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc_client.self_address()), job, sources, targets, - limits, mapping); - - const auto rpc_job = api::convert(job); - const auto rpc_sources = api::convert(sources); - const auto rpc_targets = api::convert(targets); - const auto rpc_qos_limits = api::convert(limits); - - ADM_transfer_datasets_in_t in{rpc_job.get(), rpc_sources.get(), - rpc_targets.get(), rpc_qos_limits.get(), - static_cast(mapping)}; - ADM_transfer_datasets_out_t out; - - [[maybe_unused]] const auto rpc = - endp.call("ADM_transfer_datasets", &in, &out); - - if(const auto rv = admire::error_code{out.retval}; !rv) { - LOGGER_ERROR("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc.origin()), rv, out.op_id); - return tl::make_unexpected(rv); - } + LOGGER_INFO("rpc id: {} name: {} from: {} => " + "body: {{job_id: {}, sources: {}, targets: {}, limits: {}, " + "mapping: {}}}", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(rpc_client.self_address()), job.id(), sources, + targets, limits, mapping); - const admire::transfer tx = api::convert(out.tx); + if(const auto& call_rv = endp.call("ADM_"s + __FUNCTION__, job.id(), + sources, targets, limits, mapping); + call_rv.has_value()) { - LOGGER_INFO("rpc id: {} name: {} from: {} <= " - "body: {{retval: {}, transfer: {}}} [op_id: {}]", - rpc_id, std::quoted("ADM_"s + __FUNCTION__), - std::quoted(rpc.origin()), admire::error_code::success, tx, - out.op_id); - return tx; -#endif + const scord::network::response_with_id resp{call_rv.value()}; + + LOGGER_EVAL(resp.error_code(), INFO, ERROR, + "rpc id: {} name: {} from: {} <= " + "body: {{retval: {}, tx_id: {}}} [op_id: {}]", + rpc_id, std::quoted("ADM_"s + __FUNCTION__), + std::quoted(endp.address()), resp.error_code(), + resp.value(), resp.op_id()); + + if(const auto ec = resp.error_code(); !ec) { + return tl::make_unexpected(ec); + } + + return admire::transfer{resp.value()}; + } + } + + LOGGER_ERROR("rpc call failed"); + return tl::make_unexpected(admire::error_code::other); } } // namespace admire::detail diff --git a/src/scord/rpc_handlers.cpp b/src/scord/rpc_handlers.cpp index 87425fe6..ef11c9bc 100644 --- a/src/scord/rpc_handlers.cpp +++ b/src/scord/rpc_handlers.cpp @@ -506,6 +506,42 @@ remove_pfs_storage(const request& req, std::uint64_t pfs_id) { req.respond(resp); } +void +transfer_datasets(const request& req, admire::job_id job_id, + const std::vector& sources, + const std::vector& targets, + const std::vector& limits, + enum admire::transfer::mapping mapping) { + + using scord::network::get_address; + + const auto rpc_name = "ADM_"s + __FUNCTION__; + const auto rpc_id = remote_procedure::new_id(); + + LOGGER_INFO( + "rpc id: {} name: {} from: {} => " + "body: {{job_id: {}, sources: {}, targets: {}, limits: {}, mapping: {}}}", + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + job_id, sources, targets, limits, mapping); + + admire::error_code ec; + + std::optional tx_id; + + // TODO: generate a global ID for the transfer and contact Cargo to + // actually request it + tx_id = 42; + + const auto resp = response_with_id{rpc_id, ec, tx_id}; + + LOGGER_INFO("rpc id: {} name: {} to: {} <= " + "body: {{retval: {}, tx_id: {}}}", + rpc_id, std::quoted(rpc_name), std::quoted(get_address(req)), + ec, tx_id); + + req.respond(resp); +} + } // namespace scord::network::handlers /** @@ -1069,85 +1105,6 @@ ADM_in_transit_ops(hg_handle_t h) { DEFINE_MARGO_RPC_HANDLER(ADM_in_transit_ops) - -/** - * Transfers the dataset identified by the source_name to the storage tier - * defined by destination_name, and apply the provided constraints during the - * transfer. This function returns a handle that can be used to track the - * operation (i.e., get statistics, or status). - * - * @param in.source A source_location identifying the source dataset/s in the - * source storage tier. - * @param in.destination A destination_location identifying the destination - * dataset/s in its desired location in a storage tier. - * @param in.qos_constraints A list of qos_constraints that must be applied to - * the transfer. These may not exceed the global ones set at node, application, - * or resource level (see Section 3.4). - * @param in.distribution A distribution strategy for data (e.g. one-to-one, - * one-to-many, many-to-many) - * @param in.job_id A job_id identifying the originating job. - * @param out.transfer_handle A transfer_handle allowing clients to interact - * with the transfer (e.g. wait for its completion, query its status, cancel it, - * etc. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_transfer_datasets(hg_handle_t h) { - - using scord::network::utils::get_address; - - [[maybe_unused]] hg_return_t ret; - - ADM_transfer_datasets_in_t in; - ADM_transfer_datasets_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - const admire::job job{in.job}; - const std::vector sources = - admire::api::convert(in.sources); - const std::vector targets = - admire::api::convert(in.targets); - const std::vector limits = - admire::api::convert(in.qos_limits); - const auto mapping = static_cast(in.mapping); - - const auto id = remote_procedure::new_id(); - LOGGER_INFO( - "rpc id: {} name: {} from: {} => " - "body: {{job: {}, sources: {}, targets: {}, limits: {}, mapping: {}}}", - id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), job, - sources, targets, limits, mapping); - - admire::error_code ec; - - const auto transfer = admire::transfer{42}; - - out.op_id = id; - out.retval = ec; - out.tx = admire::api::convert(transfer).release(); - - LOGGER_INFO("rpc id: {} name: {} to: {} <= " - "body: {{retval: {}, transfer: {}}}", - id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), ec, - transfer); - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_transfer_datasets) - /** * Sets information for the dataset identified by resource_id. * diff --git a/src/scord/rpc_handlers.hpp b/src/scord/rpc_handlers.hpp index cbfa3632..c5624b26 100644 --- a/src/scord/rpc_handlers.hpp +++ b/src/scord/rpc_handlers.hpp @@ -72,6 +72,13 @@ update_pfs_storage(const request& req, std::uint64_t pfs_id, void remove_pfs_storage(const request& req, std::uint64_t pfs_id); +void +transfer_datasets(const request& req, admire::job_id job_id, + const std::vector& sources, + const std::vector& targets, + const std::vector& limits, + enum admire::transfer::mapping mapping); + } // namespace scord::network::handlers #include @@ -121,9 +128,6 @@ DECLARE_MARGO_RPC_HANDLER(ADM_in_situ_ops); DECLARE_MARGO_RPC_HANDLER(ADM_in_transit_ops); -/// ADM_transfer_datasets -DECLARE_MARGO_RPC_HANDLER(ADM_transfer_datasets); - /// ADM_set_dataset_information DECLARE_MARGO_RPC_HANDLER(ADM_set_dataset_information); diff --git a/src/scord/scord.cpp b/src/scord/scord.cpp index f21f4850..33217769 100644 --- a/src/scord/scord.cpp +++ b/src/scord/scord.cpp @@ -196,6 +196,7 @@ main(int argc, char* argv[]) { daemon.set_handler(EXPAND(register_pfs_storage)); daemon.set_handler(EXPAND(update_pfs_storage)); daemon.set_handler(EXPAND(remove_pfs_storage)); + daemon.set_handler(EXPAND(transfer_datasets)); #undef EXPAND -- GitLab From ad78f509acc10894e8333bf5c4625bb02a9ca8db Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 7 Feb 2023 16:17:15 +0100 Subject: [PATCH 19/23] Cleanup: Remove unused code and includes --- src/common/api/types.cpp | 5 - src/lib/detail/impl.cpp | 129 ---- src/scord/rpc_handlers.cpp | 1282 ------------------------------------ src/scord/rpc_handlers.hpp | 95 +-- src/scord/scord.cpp | 151 ----- 5 files changed, 1 insertion(+), 1661 deletions(-) diff --git a/src/common/api/types.cpp b/src/common/api/types.cpp index 019ea7bb..56f2a124 100644 --- a/src/common/api/types.cpp +++ b/src/common/api/types.cpp @@ -26,13 +26,8 @@ #include #include #include -#include -#include -#include #include #include -#include -#include #include "admire_types.hpp" #include "internal_types.hpp" diff --git a/src/lib/detail/impl.cpp b/src/lib/detail/impl.cpp index 8801bd54..67ab4a70 100644 --- a/src/lib/detail/impl.cpp +++ b/src/lib/detail/impl.cpp @@ -27,140 +27,11 @@ #include #include #include -#include -#include #include #include "impl.hpp" using namespace std::literals; -#if 0 -void -rpc_registration_cb(scord::network::rpc_client* client) { - - REGISTER_RPC(client, "ADM_ping", void, ADM_ping_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_register_job", ADM_register_job_in_t, - ADM_register_job_out_t, NULL, true); - REGISTER_RPC(client, "ADM_update_job", ADM_update_job_in_t, - ADM_update_job_out_t, NULL, true); - REGISTER_RPC(client, "ADM_remove_job", ADM_remove_job_in_t, - ADM_remove_job_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_register_adhoc_storage", - ADM_register_adhoc_storage_in_t, - ADM_register_adhoc_storage_out_t, NULL, true); - REGISTER_RPC(client, "ADM_update_adhoc_storage", - ADM_update_adhoc_storage_in_t, ADM_update_adhoc_storage_out_t, - NULL, true); - REGISTER_RPC(client, "ADM_remove_adhoc_storage", - ADM_remove_adhoc_storage_in_t, ADM_remove_adhoc_storage_out_t, - NULL, true); - - REGISTER_RPC(client, "ADM_deploy_adhoc_storage", - ADM_deploy_adhoc_storage_in_t, ADM_deploy_adhoc_storage_out_t, - NULL, true); - - REGISTER_RPC(client, "ADM_register_pfs_storage", - ADM_register_pfs_storage_in_t, ADM_register_pfs_storage_out_t, - NULL, true); - REGISTER_RPC(client, "ADM_update_pfs_storage", ADM_update_pfs_storage_in_t, - ADM_update_pfs_storage_out_t, NULL, true); - REGISTER_RPC(client, "ADM_remove_pfs_storage", ADM_remove_pfs_storage_in_t, - ADM_remove_pfs_storage_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_input", ADM_input_in_t, ADM_input_out_t, NULL, - true); - - - REGISTER_RPC(client, "ADM_output", ADM_output_in_t, ADM_output_out_t, NULL, - true); - - REGISTER_RPC(client, "ADM_inout", ADM_inout_in_t, ADM_inout_out_t, NULL, - true); - - REGISTER_RPC(client, "ADM_adhoc_context", ADM_adhoc_context_in_t, - ADM_adhoc_context_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_adhoc_context_id", ADM_adhoc_context_id_in_t, - ADM_adhoc_context_id_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_adhoc_nodes", ADM_adhoc_nodes_in_t, - ADM_adhoc_nodes_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_adhoc_walltime", ADM_adhoc_walltime_in_t, - ADM_adhoc_walltime_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_adhoc_access", ADM_adhoc_access_in_t, - ADM_adhoc_access_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_adhoc_distribution", ADM_adhoc_distribution_in_t, - ADM_adhoc_distribution_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_adhoc_background_flush", - ADM_adhoc_background_flush_in_t, - ADM_adhoc_background_flush_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_in_situ_ops", ADM_in_situ_ops_in_t, - ADM_in_situ_ops_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_in_transit_ops", ADM_in_transit_ops_in_t, - ADM_in_transit_ops_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_transfer_datasets", ADM_transfer_datasets_in_t, - ADM_transfer_datasets_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_set_dataset_information", - ADM_set_dataset_information_in_t, - ADM_set_dataset_information_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_set_io_resources", ADM_set_io_resources_in_t, - ADM_set_io_resources_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_get_transfer_priority", - ADM_get_transfer_priority_in_t, - ADM_get_transfer_priority_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_set_transfer_priority", - ADM_set_transfer_priority_in_t, - ADM_set_transfer_priority_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_cancel_transfer", ADM_cancel_transfer_in_t, - ADM_cancel_transfer_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_get_pending_transfers", - ADM_get_pending_transfers_in_t, - ADM_get_pending_transfers_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_set_qos_constraints", - ADM_set_qos_constraints_in_t, ADM_set_qos_constraints_out_t, - NULL, true); - - REGISTER_RPC(client, "ADM_get_qos_constraints", - ADM_get_qos_constraints_in_t, ADM_get_qos_constraints_out_t, - NULL, true); - - REGISTER_RPC(client, "ADM_define_data_operation", - ADM_define_data_operation_in_t, - ADM_define_data_operation_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_connect_data_operation", - ADM_connect_data_operation_in_t, - ADM_connect_data_operation_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_finalize_data_operation", - ADM_finalize_data_operation_in_t, - ADM_finalize_data_operation_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_link_transfer_to_data_operation", - ADM_link_transfer_to_data_operation_in_t, - ADM_link_transfer_to_data_operation_out_t, NULL, true); - - REGISTER_RPC(client, "ADM_get_statistics", ADM_get_statistics_in_t, - ADM_get_statistics_out_t, NULL, true); -} -#endif - namespace api { struct remote_procedure { diff --git a/src/scord/rpc_handlers.cpp b/src/scord/rpc_handlers.cpp index ef11c9bc..b128cb26 100644 --- a/src/scord/rpc_handlers.cpp +++ b/src/scord/rpc_handlers.cpp @@ -23,11 +23,6 @@ *****************************************************************************/ #include -#include -#include -#include -#include -#include #include #include "rpc_handlers.hpp" #include "job_manager.hpp" @@ -543,1280 +538,3 @@ transfer_datasets(const request& req, admire::job_id job_id, } } // namespace scord::network::handlers - -/** - * Specifes the origin location in a storage tier where input is located, as - * well as the target location where it should be placed in a different storage - * tier. - * - * @param in.origin An origin location for the source dataset. - * @param in.target A target location for the destination dataset. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_input(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_input_in_t in; - ADM_input_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - - if(in.origin == nullptr) { - LOGGER_ERROR("ADM_input(): invalid origin (nullptr)"); - } else if(in.target == nullptr) { - LOGGER_ERROR("ADM_input(): invalid target (nullptr)"); - } else { - LOGGER_INFO("ADM_input({}, {})", in.origin, in.target); - out.ret = 0; - } - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_input) - -/** - * Specifies the origin location in a storage tier where output is located, as - * well as the target location where it should be placed in a different storage - * tier. - * - * @param in.origin An origin location for the source dataset. - * @param in.target A target location for the destination dataset. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_output(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_output_in_t in; - ADM_output_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - - if(in.origin == nullptr) { - LOGGER_ERROR("ADM_output(): invalid origin (nullptr)"); - } else if(in.target == nullptr) { - LOGGER_ERROR("ADM_output(): invalid target (nullptr)"); - } else { - LOGGER_INFO("ADM_output({}, {})", in.origin, in.target); - out.ret = 0; - } - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_output) - -/** - * Specifies both the input and output locations in a storage tier. This - * combines both ADM_input and ADM_output for user convenience: the input data - * provided by origin is overwritten by the output data generated at target. - * - * @param in.origin An origin location for the source dataset. - * @param in.target A target location for the destination dataset. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_inout(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_inout_in_t in; - ADM_inout_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - - if(in.origin == nullptr) { - LOGGER_ERROR("ADM_inout(): invalid origin (nullptr)"); - } else if(in.target == nullptr) { - LOGGER_ERROR("ADM_inout(): invalid target (nullptr)"); - } else { - LOGGER_INFO("ADM_inout({}, {})", in.origin, in.target); - out.ret = 0; - } - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_inout) - -/** - * Specifies the execution_mode an Ad hoc Storage System should use. Valid - * options: in_job:shared (run while sharing the application’s compute nodes), - * in_job:dedicated (run using a subset of the application’s compute nodes), - * separate:new (ask the system to allocate a separate job with separate runtime - * and number of nodes) and separate:existing (ask the system to reuse an - * already running Ad hoc Storage System instance). The number of nodes assigned - * for the Ad hoc Storage System must be specified with ADM_adhoc_nodes. In the - * separate:new execution_mode, the lifetime of the Ad hoc Storage System will - * be controlled with ADM_adhoc_walltime. In the separate:existing - * execution_mode, a valid context ID must be provided with - * ADM_adhoc_context_id. - * - * @param in.context A valid execution_mode describing how the Ad hoc Storage - * System should behave. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - * @return out.adhoc_context_id A number that identifies the context. - */ -static void -ADM_adhoc_context(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_adhoc_context_in_t in; - ADM_adhoc_context_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - const std::string ctx(in.context); - - out.ret = -1; - out.adhoc_context = -1; - - if(in.context == nullptr) { - LOGGER_ERROR("ADM_adhoc_context(): invalid context (nullptr)"); - } else { - LOGGER_INFO("ADM_adhoc_context({})", in.context); - - if(ctx == "in_job:shared" || ctx == "in_job:dedicated" || - ctx == "separate:new" || ctx == "separate:existing") { - LOGGER_INFO("ADM_adhoc_context value is acceptable ({})", - in.context); - out.ret = 0; - out.adhoc_context = rand(); - } else { - LOGGER_ERROR( - "ADM_adhoc_context is not valid. Please use: in_job:shared, in_job:dedicated, separate:new or separate:existing"); - } - } - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_adhoc_context) - -/** - * Specifies an existing Ad hoc Storage System to use via its ID. - * - * @param in.context_id A valid context_id for a separate instance of an Ad hoc - * Storage System. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_adhoc_context_id(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_adhoc_context_id_in_t in; - ADM_adhoc_context_id_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - - out.ret = -1; - - if(in.context_id < 0) { - LOGGER_ERROR("ADM_adhoc_context_id(): invalid context_id (< 0)"); - } else { - LOGGER_INFO("ADM_adhoc_context_id({})", in.context_id); - out.ret = 0; - } - - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_adhoc_context_id) - -/** - * Specifies the number of nodes for the Ad hoc Storage System. If the - * ADM_adhoc_execution_mode is shared, the number cannot exceed the number of - * allocated nodes within the compute job. If the ADM_adhoc_execution_mode is - * dedicated, the number of nodes is not restricted. - * - * @param in.number_of_nodes The desired number_of_nodes. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_adhoc_nodes(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_adhoc_nodes_in_t in; - ADM_adhoc_nodes_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - - if(in.nodes <= 0) { - LOGGER_ERROR("ADM_adhoc_nodes(): invalid n_nodes (<= 0)"); - } else { - LOGGER_INFO("ADM_adhoc_nodes({})", in.nodes); - out.ret = 0; - } - - - /*Specifies the number of nodes for the Ad hoc Storage System. If the - ADM_adhoc_execution_mode is shared, the number cannot exceed the number of - allocated nodes within the compute job. If the ADM_adhoc_execution_mode is - dedicated, the number of nodes is not restricted. Should this be checked - now? */ - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_adhoc_nodes) - -/** - * Specifies for how long the ad hoc storage system should run before should - * down. Only relevant in the context of the ADM_adhoc_context function. - * - * @param in.walltime The desired walltime in minutes. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_adhoc_walltime(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_adhoc_walltime_in_t in; - ADM_adhoc_walltime_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - - if(in.walltime < 0) { - LOGGER_ERROR("ADM_adhoc_walltime(): invalid walltime (< 0)"); - } else { - LOGGER_INFO("ADM_adhoc_walltime({})", in.walltime); - out.ret = 0; - } - - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_adhoc_walltime) - -/** - * Specifies access to the ad hoc storage system: write-only, read-only, - * read-write. Cannot be used when using an existing Ad hoc Storage System - * instance. - * - * @param in.access The desired access method - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_adhoc_access(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_adhoc_access_in_t in; - ADM_adhoc_access_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - const std::string acc(in.access); - - out.ret = -1; - - if(in.access == nullptr) { - LOGGER_ERROR("ADM_adhoc_access(): invalid access (nullptr)"); - } else { - LOGGER_INFO("ADM_adhoc_access({})", in.access); - - if((acc == "write-only") || (acc == "read-only") || - (acc == "read-write")) { - out.ret = 0; - LOGGER_INFO("ADM_adhoc_access value is acceptable ({})", in.access); - } else { - LOGGER_ERROR( - "ADM_adhoc_access is not valid. Please use: write-only, read-only or read-write"); - } - } - - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_adhoc_access) - - -/** - * Specifies the data distribution within the ad hoc storage system, e.g., - * wide-striping, local, local-data-global-metadata. - * - * @param in.data_distribution The desired data distribution - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_adhoc_distribution(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_adhoc_distribution_in_t in; - ADM_adhoc_distribution_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - - if(in.data_distribution == nullptr) { - LOGGER_ERROR( - "ADM_adhoc_distribution(): invalid data_distribution (nullptr)"); - } else { - LOGGER_INFO("ADM_adhoc_distribution({})", in.data_distribution); - out.ret = 0; - } - - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_adhoc_distribution) - -/** - * Specifies if data in the output location should be moved to the shared - * backend storage system in the background (default false). - * - * @param in.b_flush A boolean enabling or disabling the option. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_adhoc_background_flush(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_adhoc_background_flush_in_t in; - ADM_adhoc_background_flush_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - LOGGER_INFO("ADM_adhoc_background_flush({})", in.b_flush); - out.ret = 0; - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_adhoc_background_flush) - -/** - * In situ data operations specified in a given configuration file. - * - * @param in.in_situ A path to the configuration file. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_in_situ_ops(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_in_situ_ops_in_t in; - ADM_in_situ_ops_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - - if(in.in_situ == nullptr) { - LOGGER_ERROR("ADM_in_situ_ops(): invalid in_situ_ops (nullptr)"); - } else { - LOGGER_INFO("ADM_in_situ_ops({})", in.in_situ); - out.ret = 0; - } - - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_in_situ_ops) - -/** - * In transit data operations specified in a given configuration file. - * - * @param in.in_transit A path to the configuration file. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_in_transit_ops(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_in_transit_ops_in_t in; - ADM_in_transit_ops_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - - if(in.in_transit == nullptr) { - LOGGER_ERROR("ADM_in_transit_ops(): invalid in_transit (nullptr)"); - } else { - LOGGER_INFO("ADM_in_transit_ops({})", in.in_transit); - out.ret = 0; - } - - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_in_transit_ops) - -/** - * Sets information for the dataset identified by resource_id. - * - * @param in.resource_id A resource_id identifying the dataset of interest. - * @param in.info An opaque inf o argument containing information about the - * dataset (e.g. its lifespan, access methods, intended usage, etc.). - * @param in.job_id A job_id identifying the originating job. - * @param out.status A status code determining whether the operation was - * successful. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_set_dataset_information(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_set_dataset_information_in_t in; - ADM_set_dataset_information_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - out.status = -1; - - if(in.resource_id < 0) { - LOGGER_ERROR( - "ADM_set_dataset_information(): invalid resource_id (< 0)"); - } else if(in.info == nullptr) { - LOGGER_ERROR("ADM_set_dataset_information(): invalid info (nullptr)"); - } else if(in.job_id < 0) { - LOGGER_ERROR("ADM_set_dataset_information(): invalid job_id (< 0)"); - } else { - LOGGER_INFO("ADM_set_dataset_information({},{},{})", in.resource_id, - in.info, in.job_id); - out.ret = 0; - out.status = 0; - } - - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_set_dataset_information) - -/** - * Changes the I/O resources used by a storage tier, typically an Ad hoc Storage - * System. - * - * @param in.tier_id A tier_id specifying the target storage tier. - * @param in.resources An opaque resources argument containing information about - * the I/O resources to modify (e.g. number of I/O nodes.). - * @param in.job_id A job_id identifying the originating job. - * @param out.status A status code determining whether the operation was - * successful. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_set_io_resources(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_set_io_resources_in_t in; - ADM_set_io_resources_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - out.status = -1; - - if(in.tier_id < 0) { - LOGGER_ERROR("ADM_set_io_resources(): invalid tier_id (nullptr)"); - } else if(in.resources == nullptr) { - LOGGER_ERROR("ADM_set_io_resources(): invalid resources (nullptr)"); - } else if(in.job_id < 0) { - LOGGER_ERROR("ADM_set_io_resources(): invalid job_id (< 0)"); - } else { - LOGGER_INFO("ADM_set_io_resources({},{},{})", in.tier_id, in.resources, - in.job_id); - out.ret = 0; - out.status = 0; - } - - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_set_io_resources) - - -/** - * Returns the priority of the pending transfer identified by transfer_id. - * - * @param in.transfer_id A tier_id specifying the target storage tier. - * @param out.priority The priority of the pending transfer or an error code if - * it didn’t exist or is no longer pending. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_get_transfer_priority(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_get_transfer_priority_in_t in; - ADM_get_transfer_priority_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - out.priority = -1; - - if(in.transfer_id < 0) { - LOGGER_ERROR( - "ADM_get_transfer_priority(): invalid transfer_id (nullptr)"); - } else { - LOGGER_INFO("ADM_get_transfer_priority({})", in.transfer_id); - out.ret = 0; - out.priority = 0; - } - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_get_transfer_priority) - -/** - * Moves the operation identified by transfer_id up or down by n positions in - * its scheduling queue. - * - * @param in.transfer_id A transf er_id identifying a pending transfer. - * @param in.n_positions A positive or negative number n for the number of - * positions the transfer should go up or down in its scheduling queue. - * @param out.status A status code indicating whether the operation was - * successful. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_set_transfer_priority(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_set_transfer_priority_in_t in; - ADM_set_transfer_priority_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - out.status = -1; - - if(in.transfer_id < 0) { - LOGGER_ERROR( - "ADM_set_transfer_priority(): invalid transfer_id (nullptr)"); - } else { - LOGGER_INFO("ADM_set_transfer_priority({}, {})", in.transfer_id, - in.n_positions); - out.ret = 0; - out.status = 0; - } - - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_set_transfer_priority) - -/** - * Cancels the pending transfer identified by transfer_id. - * - * @param in.transfer_id A transfer_id identifying a pending transfer. - * @param out.status A status code indicating whether the operation was - * successful. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_cancel_transfer(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_cancel_transfer_in_t in; - ADM_cancel_transfer_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - out.status = -1; - - if(in.transfer_id < 0) { - LOGGER_ERROR("ADM_cancel_transfer(): invalid transfer_id (< 0)"); - } else { - LOGGER_INFO("ADM_cancel_transfer({})", in.transfer_id); - out.ret = 0; - out.status = 0; - } - - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_cancel_transfer) - -/** - * Returns a list of pending transfers. Each operation will include a transf - * er_id as well as information about the involved resources and tiers. - * - * @param out.pending_transfers A list of pending_transfers. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_get_pending_transfers(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_get_pending_transfers_in_t in; - ADM_get_pending_transfers_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = 0; - out.pending_transfers = "list"; - - LOGGER_INFO("ADM_get_pending_transfers()"); - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_get_pending_transfers) - -/** - * Registers a QoS constraint defined by class, scope, and value for the element - * identified by id. - * - * @param in.scope The scope it should be applied to: dataset, node, or job. - * @param in.qos_class A QoS class (e.g. "badwidth", "iops", etc.). - * @param in.element_id A valid id for the element that should be constrained, - * i.e. a resource ID, a node hostname, or a Job ID. - * @param in.class_value An appropriate value for the selected class. - * @param out.status A status code indicating whether the operation was - * successful. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_set_qos_constraints(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_set_qos_constraints_in_t in; - ADM_set_qos_constraints_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - out.status = -1; - - if(in.scope == nullptr) { - LOGGER_ERROR("ADM_set_qos_constraints(): invalid scope (nullptr)"); - } else if(in.qos_class == nullptr) { - LOGGER_ERROR("ADM_set_qos_constraints(): invalid qos_class (nullptr)"); - } else if(in.element_id < 0) { - LOGGER_ERROR("ADM_set_qos_constraints(): invalid element_id (< 0)"); - } else if(in.class_value == nullptr) { - LOGGER_ERROR( - "ADM_set_qos_constraints(): invalid class_value (nullptr)"); - } else { - LOGGER_INFO("ADM_set_qos_constraints({}, {}, {}, {})", in.scope, - in.qos_class, in.element_id, in.class_value); - const std::string scp(in.scope); - if((scp == "dataset") || (scp == "node") || (scp == "job")) { - LOGGER_INFO( - "ADM_set_qos_constraints scope value is acceptable ({})", - in.scope); - out.ret = 0; - out.status = 0; - } else { - LOGGER_ERROR( - "ADM_set_qos_constraints scope value is not valid. Please use: dataset, node or job"); - } - } - - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_set_qos_constraints) - -/** - * Returns a list of QoS constraints defined for an element identified for id. - * - * @param in.scope The scope being queried: dataset, node, or job. - * @param in.element_id A valid id for the element of interest, i.e. a resource - * ID, a node hostname, or a Job ID. - * @param out.list A list of QoS constraints that includes all the classes - * currently defined for the element as well as the values set for them. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_get_qos_constraints(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_get_qos_constraints_in_t in; - ADM_get_qos_constraints_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - out.list = nullptr; - - if(in.scope == nullptr) { - LOGGER_ERROR("ADM_get_qos_constraints(): invalid scope (nullptr)"); - } else if(in.element_id < 0) { - LOGGER_ERROR("ADM_get_qos_constraints(): invalid element_id (< 0)"); - } else { - LOGGER_INFO("ADM_get_qos_constraints({}, {})", in.scope, in.element_id); - - const std::string scp(in.scope); - - if((scp == "dataset") || (scp == "node") || (scp == "job")) { - LOGGER_INFO( - "ADM_get_qos_constraints scope value is acceptable ({})", - in.scope); - out.ret = 0; - out.list = "list"; - } else { - LOGGER_ERROR( - "ADM_get_qos_constraints scope value is not valid. Please use: dataset, node or job "); - } - } - - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_get_qos_constraints) - -/** - * Defines a new operation, with the code found in path. The code will be - * identified by the user-provided operation_id and will accept the arguments - * defined, using the next format "arg0, arg1, arg2, . . . ". - * - * @param in.path A valid path for the operation code. - * @param in.operation_id A user-defined operation_id for the operation. - * @param in.arguments A list of arguments for the operation. - * @param out.status A status code indicating whether the operation was - * successful. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_define_data_operation(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_define_data_operation_in_t in; - ADM_define_data_operation_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - out.status = -1; - - if(in.path == nullptr) { - LOGGER_ERROR("ADM_define_data_operation(): invalid path (nullptr)"); - } else if(in.operation_id < 0) { - LOGGER_ERROR("ADM_define_data_operation(): invalid operation_id (< 0)"); - } else if(in.arguments == nullptr) { - LOGGER_ERROR( - "ADM_define_data_operation(): invalid arguments (nullptr)"); - } else { - LOGGER_INFO("ADM_define_data_operation ({}, {}, {})", in.path, - in.operation_id, in.arguments); - out.ret = 0; - out.status = 0; - } - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_define_data_operation) - - -/** - * Connects and starts the data operation defined with operation_id and with the - * arguments, using the input and output data storage (i.e., files). If the - * operation can be executed in a streaming fashion (i.e., it can start even if - * the input data is not entirely available), the stream parameter must be set - * to true. - * - * @param in.operation_id The operation_id of the operation to be connected. - * @param in.input An input data resource for the operation. - * @param in.stream A stream boolean indicating if the operation should be - * executed in a streaming fashion. - * @param in.arguments The values for the arguments required by the operation. - * @param in.job_id A job_id identifying the originating job. - * @param out.data An output data resource where the result of the operation - * should be stored. - * @return out.operation_handle An operation_handle for the operation that - * allows clients to further interact with the operation (e.g query its status, - * cancel it, etc.). - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_connect_data_operation(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_connect_data_operation_in_t in; - ADM_connect_data_operation_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - out.data = nullptr; - out.operation_handle = nullptr; - - if(in.operation_id < 0) { - LOGGER_ERROR( - "ADM_connect_data_operation(): invalid operation_id (< 0)"); - } else if(in.input == nullptr) { - LOGGER_ERROR("ADM_define_data_operation(): invalid input (nullptr)"); - } else if(in.stream != true && in.stream != false) { - LOGGER_ERROR( - "ADM_connect_data_operation(): invalid stream (not true/false)"); - } else if(in.arguments == nullptr) { - LOGGER_ERROR( - "ADM_connect_data_operation(): invalid arguments (nullptr)"); - } else if(in.job_id < 0) { - LOGGER_ERROR("ADM_connect_data_operation(): invalid job_id (< 0)"); - } else { - LOGGER_INFO("ADM_connect_data_operation({}, {}, {}, {}, {})", - in.operation_id, in.input, in.stream, in.arguments, - in.job_id); - out.ret = 0; - out.data = "ouput"; - out.operation_handle = "operation_handle"; - } - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_connect_data_operation) - -/** - * Finalises the operation defined with operation_id. - * - * @param in.operation_id The operation_id of the operation to be connected. - * @return out.status A status code indicating whether the operation was - * successful. - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_finalize_data_operation(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_finalize_data_operation_in_t in; - ADM_finalize_data_operation_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - out.status = -1; - - if(in.operation_id < 0) { - LOGGER_ERROR( - "ADM_finalize_data_operation(): invalid operation_id (< 0)"); - } else { - LOGGER_INFO("ADM_finalize_data_operation({})", in.operation_id); - out.ret = 0; - out.status = 0; - } - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_finalize_data_operation) - -/** - * Links the data operation defined with operation_id with the pending transfer - * identified by transf er_id using the values provided as arguments. If the - * operation can be executed in a streaming fashion (i.e., it can start even if - * the input data is not entirely available), the stream parameter must be set - * to true. - * - * @param in.operation_id The operation_id of the operation to be connected. - * @param in.transfer_id The transfer_id of the pending transfer the operation - * should be linked to. - * @param in.stream A stream boolean indicating if the operation should be - * executed in a streaming fashion. - * @param in.arguments The values for the arguments required by the operation. - * @param in.job_id A job_id identifying the originating job. - * @return out.operation_handle An operation_handle for the operation that - * allows clients to further interact with the operation (e.g query its status, - * cancel it, etc.). - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_link_transfer_to_data_operation(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_link_transfer_to_data_operation_in_t in; - ADM_link_transfer_to_data_operation_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - out.operation_handle = nullptr; - - if(in.operation_id < 0) { - LOGGER_ERROR( - "ADM_link_transfer_to_data_operation(): invalid operation_id (< 0)"); - } else if(in.transfer_id < 0) { - LOGGER_ERROR( - "ADM_link_transfer_to_data_operation(): invalid transfer_id (< 0)"); - } else if(in.arguments == nullptr) { - LOGGER_ERROR( - "ADM_link_transfer_to_data_operation(): invalid arguments (nullptr)"); - } else if(in.stream != true && in.stream != false) { - LOGGER_ERROR( - "ADM_link_transfer_to_data_operation(): invalid stream (not true/false)"); - } else if(in.job_id < 0) { - LOGGER_ERROR( - "ADM_link_transfer_to_data_operation(): invalid job_id (< 0)"); - } else { - LOGGER_INFO("ADM_link_transfer_to_data_operation ({}, {}, {}, {}, {})", - in.operation_id, in.transfer_id, in.stream, in.arguments, - in.job_id); - out.ret = 0; - out.operation_handle = "operation_handle"; - } - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_link_transfer_to_data_operation) - - -/** - * Returns the current I/O statistics for a specified job_id and an optional - * corresponding job_step. The information will be returned in an - * easy-to-process format, e.g., JSON (see Listing 3.1). - * - * @param in.job_id - * @param in.job_step - * @return out.job_statistics - * @return out.ret Returns if the remote procedure has been completed - * successfully or not. - */ -static void -ADM_get_statistics(hg_handle_t h) { - - [[maybe_unused]] hg_return_t ret; - - ADM_get_statistics_in_t in; - ADM_get_statistics_out_t out; - - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - ret = margo_get_input(h, &in); - assert(ret == HG_SUCCESS); - - out.ret = -1; - out.job_statistics = nullptr; - - if(in.job_id < 0) { - LOGGER_ERROR("ADM_get_statistics(): invalid job_id (< 0)"); - } else if(in.job_step < 0) { - LOGGER_ERROR("ADM_get_statistics(): invalid job_step (< 0)"); - } else { - LOGGER_INFO("ADM_get_statistics ({}, {})", in.job_id, in.job_step); - out.ret = 0; - out.job_statistics = "job_statistics"; - } - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); - - ret = margo_free_input(h, &in); - assert(ret == HG_SUCCESS); - - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); -} - -DEFINE_MARGO_RPC_HANDLER(ADM_get_statistics) diff --git a/src/scord/rpc_handlers.hpp b/src/scord/rpc_handlers.hpp index c5624b26..d901fb39 100644 --- a/src/scord/rpc_handlers.hpp +++ b/src/scord/rpc_handlers.hpp @@ -25,6 +25,7 @@ #ifndef SCORD_RPC_HANDLERS_HPP #define SCORD_RPC_HANDLERS_HPP +#include #include #include @@ -81,98 +82,4 @@ transfer_datasets(const request& req, admire::job_id job_id, } // namespace scord::network::handlers -#include - -#ifdef __cplusplus -extern "C" { -#endif - -// FIXME: cannot be in a namespace due to Margo limitations -// namespace scord::network::rpc { - -/// ADM_input -DECLARE_MARGO_RPC_HANDLER(ADM_input); - -/// ADM_output -DECLARE_MARGO_RPC_HANDLER(ADM_output); - -/// ADM_inout -DECLARE_MARGO_RPC_HANDLER(ADM_inout); - -/// ADM_adhoc_context -DECLARE_MARGO_RPC_HANDLER(ADM_adhoc_context); - -/// ADM_adhoc_context_id -DECLARE_MARGO_RPC_HANDLER(ADM_adhoc_context_id); - -/// ADM_adhoc_nodes -DECLARE_MARGO_RPC_HANDLER(ADM_adhoc_nodes) - -/// ADM_adhoc_walltime -DECLARE_MARGO_RPC_HANDLER(ADM_adhoc_walltime); - - -/// ADM_adhoc_access -DECLARE_MARGO_RPC_HANDLER(ADM_adhoc_access); - -/// ADM_adhoc_distribution -DECLARE_MARGO_RPC_HANDLER(ADM_adhoc_distribution); - -/// ADM_adhoc_background_flush -DECLARE_MARGO_RPC_HANDLER(ADM_adhoc_background_flush); - -/// ADM_in_situ_ops -DECLARE_MARGO_RPC_HANDLER(ADM_in_situ_ops); - -/// ADM_in_transit_ops -DECLARE_MARGO_RPC_HANDLER(ADM_in_transit_ops); - - -/// ADM_set_dataset_information -DECLARE_MARGO_RPC_HANDLER(ADM_set_dataset_information); - -/// ADM_set_io_resources -DECLARE_MARGO_RPC_HANDLER(ADM_set_io_resources); - -/// ADM_get_transfer_priority -DECLARE_MARGO_RPC_HANDLER(ADM_get_transfer_priority); - -/// ADM_set_transfer_priority -DECLARE_MARGO_RPC_HANDLER(ADM_set_transfer_priority); - -/// ADM_cancel_transfer -DECLARE_MARGO_RPC_HANDLER(ADM_cancel_transfer); - -/// ADM_get_pending_transfers -DECLARE_MARGO_RPC_HANDLER(ADM_get_pending_transfers); - -/// ADM_set_qos_constraints -DECLARE_MARGO_RPC_HANDLER(ADM_set_qos_constraints); - -/// ADM_get_qos_constraints -DECLARE_MARGO_RPC_HANDLER(ADM_get_qos_constraints); - -/// ADM_define_data_operation -DECLARE_MARGO_RPC_HANDLER(ADM_define_data_operation); - -/// ADM_connect_data_operation -DECLARE_MARGO_RPC_HANDLER(ADM_connect_data_operation); - -/// ADM_finalize_data_operation -DECLARE_MARGO_RPC_HANDLER(ADM_finalize_data_operation); - -/// ADM_link_transfer_to_data_operation -DECLARE_MARGO_RPC_HANDLER(ADM_link_transfer_to_data_operation); - -/// ADM_get_statistics -DECLARE_MARGO_RPC_HANDLER(ADM_get_statistics); - - -//} // namespace scord::network::rpc - -#ifdef __cplusplus -}; -#endif - #endif // SCORD_RPC_HANDLERS_HPP -// clang-format on diff --git a/src/scord/scord.cpp b/src/scord/scord.cpp index 33217769..3615a8bf 100644 --- a/src/scord/scord.cpp +++ b/src/scord/scord.cpp @@ -33,7 +33,6 @@ #include #include -#include #include #include "rpc_handlers.hpp" #include "env.hpp" @@ -200,156 +199,6 @@ main(int argc, char* argv[]) { #undef EXPAND -#if 0 - const auto rpc_registration_cb = [](auto&& ctx) { - LOGGER_INFO(" * Registering RPCs handlers..."); - - REGISTER_RPC(ctx, "ADM_ping", void, ADM_ping_out_t, ADM_ping, true); - - REGISTER_RPC(ctx, "ADM_register_job", ADM_register_job_in_t, - ADM_register_job_out_t, ADM_register_job, true); - REGISTER_RPC(ctx, "ADM_update_job", ADM_update_job_in_t, - ADM_update_job_out_t, ADM_update_job, true); - REGISTER_RPC(ctx, "ADM_remove_job", ADM_remove_job_in_t, - ADM_remove_job_out_t, ADM_remove_job, true); - - REGISTER_RPC(ctx, "ADM_register_adhoc_storage", - ADM_register_adhoc_storage_in_t, - ADM_register_adhoc_storage_out_t, - ADM_register_adhoc_storage, true); - REGISTER_RPC(ctx, "ADM_update_adhoc_storage", - ADM_update_adhoc_storage_in_t, - ADM_update_adhoc_storage_out_t, - ADM_update_adhoc_storage, true); - REGISTER_RPC(ctx, "ADM_remove_adhoc_storage", - ADM_remove_adhoc_storage_in_t, - ADM_remove_adhoc_storage_out_t, - ADM_remove_adhoc_storage, true); - - REGISTER_RPC(ctx, "ADM_deploy_adhoc_storage", - ADM_deploy_adhoc_storage_in_t, - ADM_deploy_adhoc_storage_out_t, - ADM_deploy_adhoc_storage, true); - - REGISTER_RPC(ctx, "ADM_register_pfs_storage", - ADM_register_pfs_storage_in_t, - ADM_register_pfs_storage_out_t, - ADM_register_pfs_storage, true); - REGISTER_RPC( - ctx, "ADM_update_pfs_storage", ADM_update_pfs_storage_in_t, - ADM_update_pfs_storage_out_t, ADM_update_pfs_storage, true); - REGISTER_RPC( - ctx, "ADM_remove_pfs_storage", ADM_remove_pfs_storage_in_t, - ADM_remove_pfs_storage_out_t, ADM_remove_pfs_storage, true); - - REGISTER_RPC(ctx, "ADM_input", ADM_input_in_t, ADM_input_out_t, - ADM_input, true); - - REGISTER_RPC(ctx, "ADM_output", ADM_output_in_t, ADM_output_out_t, - ADM_output, true); - - REGISTER_RPC(ctx, "ADM_inout", ADM_inout_in_t, ADM_inout_out_t, - ADM_inout, true); - - REGISTER_RPC(ctx, "ADM_adhoc_context", ADM_adhoc_context_in_t, - ADM_adhoc_context_out_t, ADM_adhoc_context, true); - - REGISTER_RPC(ctx, "ADM_adhoc_context_id", ADM_adhoc_context_id_in_t, - ADM_adhoc_context_id_out_t, ADM_adhoc_context_id, - true); - - REGISTER_RPC(ctx, "ADM_adhoc_nodes", ADM_adhoc_nodes_in_t, - ADM_adhoc_nodes_out_t, ADM_adhoc_nodes, true); - - REGISTER_RPC(ctx, "ADM_adhoc_walltime", ADM_adhoc_walltime_in_t, - ADM_adhoc_walltime_out_t, ADM_adhoc_walltime, true); - - REGISTER_RPC(ctx, "ADM_adhoc_access", ADM_adhoc_access_in_t, - ADM_adhoc_access_out_t, ADM_adhoc_access, true); - - REGISTER_RPC( - ctx, "ADM_adhoc_distribution", ADM_adhoc_distribution_in_t, - ADM_adhoc_distribution_out_t, ADM_adhoc_distribution, true); - - REGISTER_RPC(ctx, "ADM_adhoc_background_flush", - ADM_adhoc_background_flush_in_t, - ADM_adhoc_background_flush_out_t, - ADM_adhoc_background_flush, true); - - REGISTER_RPC(ctx, "ADM_in_situ_ops", ADM_in_situ_ops_in_t, - ADM_in_situ_ops_out_t, ADM_in_situ_ops, true); - - REGISTER_RPC(ctx, "ADM_in_transit_ops", ADM_in_transit_ops_in_t, - ADM_in_transit_ops_out_t, ADM_in_transit_ops, true); - - REGISTER_RPC( - ctx, "ADM_transfer_datasets", ADM_transfer_datasets_in_t, - ADM_transfer_datasets_out_t, ADM_transfer_datasets, true); - - REGISTER_RPC(ctx, "ADM_set_dataset_information", - ADM_set_dataset_information_in_t, - ADM_set_dataset_information_out_t, - ADM_set_dataset_information, true); - - REGISTER_RPC(ctx, "ADM_set_io_resources", ADM_set_io_resources_in_t, - ADM_set_io_resources_out_t, ADM_set_io_resources, - true); - - REGISTER_RPC(ctx, "ADM_get_transfer_priority", - ADM_get_transfer_priority_in_t, - ADM_get_transfer_priority_out_t, - ADM_get_transfer_priority, true); - - REGISTER_RPC(ctx, "ADM_set_transfer_priority", - ADM_set_transfer_priority_in_t, - ADM_set_transfer_priority_out_t, - ADM_set_transfer_priority, true); - - REGISTER_RPC(ctx, "ADM_cancel_transfer", ADM_cancel_transfer_in_t, - ADM_cancel_transfer_out_t, ADM_cancel_transfer, true); - - REGISTER_RPC(ctx, "ADM_get_pending_transfers", - ADM_get_pending_transfers_in_t, - ADM_get_pending_transfers_out_t, - ADM_get_pending_transfers, true); - - REGISTER_RPC(ctx, "ADM_set_qos_constraints", - ADM_set_qos_constraints_in_t, - ADM_set_qos_constraints_out_t, ADM_set_qos_constraints, - true); - - REGISTER_RPC(ctx, "ADM_get_qos_constraints", - ADM_get_qos_constraints_in_t, - ADM_get_qos_constraints_out_t, ADM_get_qos_constraints, - true); - - REGISTER_RPC(ctx, "ADM_define_data_operation", - ADM_define_data_operation_in_t, - ADM_define_data_operation_out_t, - ADM_define_data_operation, true); - - REGISTER_RPC(ctx, "ADM_connect_data_operation", - ADM_connect_data_operation_in_t, - ADM_connect_data_operation_out_t, - ADM_connect_data_operation, true); - - REGISTER_RPC(ctx, "ADM_finalize_data_operation", - ADM_finalize_data_operation_in_t, - ADM_finalize_data_operation_out_t, - ADM_finalize_data_operation, true); - - REGISTER_RPC(ctx, "ADM_link_transfer_to_data_operation", - ADM_link_transfer_to_data_operation_in_t, - ADM_link_transfer_to_data_operation_out_t, - ADM_link_transfer_to_data_operation, true); - - REGISTER_RPC(ctx, "ADM_get_statistics", ADM_get_statistics_in_t, - ADM_get_statistics_out_t, ADM_get_statistics, true); - - // TODO: add internal RPCs for communication with scord-ctl - }; -#endif - return daemon.run(); } catch(const std::exception& ex) { fmt::print(stderr, -- GitLab From 88d23c2bd606a7f308b26b6ddeee2ce19f045f7d Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 7 Feb 2023 16:40:05 +0100 Subject: [PATCH 20/23] scord_ctl: Update RPCs to mochi-thalium --- src/scord-ctl/rpc_handlers.cpp | 35 ++++++++++++++-------------------- src/scord-ctl/rpc_handlers.hpp | 15 +++++++-------- src/scord-ctl/scord-ctl.cpp | 15 ++++++--------- 3 files changed, 27 insertions(+), 38 deletions(-) diff --git a/src/scord-ctl/rpc_handlers.cpp b/src/scord-ctl/rpc_handlers.cpp index 3aad2c7e..274623df 100644 --- a/src/scord-ctl/rpc_handlers.cpp +++ b/src/scord-ctl/rpc_handlers.cpp @@ -23,8 +23,7 @@ *****************************************************************************/ #include -#include -#include +#include #include "rpc_handlers.hpp" struct remote_procedure { @@ -35,35 +34,29 @@ struct remote_procedure { } }; -static void -ADM_ping(hg_handle_t h) { +namespace scord::network::handlers { - using scord::network::utils::get_address; +void +ping(const scord::network::request& req) { - [[maybe_unused]] hg_return_t ret; + using scord::network::generic_response; + using scord::network::get_address; - [[maybe_unused]] margo_instance_id mid = margo_hg_handle_get_instance(h); - - const auto id = remote_procedure::new_id(); + const auto rpc_id = remote_procedure::new_id(); LOGGER_INFO("rpc id: {} name: {} from: {} => " "body: {{}}", - id, std::quoted(__FUNCTION__), std::quoted(get_address(h))); + rpc_id, std::quoted(__FUNCTION__), + std::quoted(get_address(req))); - ADM_ping_out_t out; - out.op_id = id; - out.retval = ADM_SUCCESS; + const auto resp = generic_response{rpc_id, admire::error_code::success}; LOGGER_INFO("rpc id: {} name: {} to: {} <= " "body: {{retval: {}}}", - id, std::quoted(__FUNCTION__), std::quoted(get_address(h)), - ADM_SUCCESS); - - ret = margo_respond(h, &out); - assert(ret == HG_SUCCESS); + rpc_id, std::quoted(__FUNCTION__), + std::quoted(get_address(req)), admire::error_code::success); - ret = margo_destroy(h); - assert(ret == HG_SUCCESS); + req.respond(resp); } -DEFINE_MARGO_RPC_HANDLER(ADM_ping); +} // namespace scord::network::handlers diff --git a/src/scord-ctl/rpc_handlers.hpp b/src/scord-ctl/rpc_handlers.hpp index 633befa2..199ca912 100644 --- a/src/scord-ctl/rpc_handlers.hpp +++ b/src/scord-ctl/rpc_handlers.hpp @@ -25,16 +25,15 @@ #ifndef SCORD_CTL_RPC_HANDLERS_HPP #define SCORD_CTL_RPC_HANDLERS_HPP -#include +#include +#include +#include -#ifdef __cplusplus -extern "C" { -#endif +namespace scord::network::handlers { -DECLARE_MARGO_RPC_HANDLER(ADM_ping); +void +ping(const scord::network::request& req); -#ifdef __cplusplus -}; -#endif +} // namespace scord::network::handlers #endif // SCORD_CTL_RPC_HANDLERS_HPP diff --git a/src/scord-ctl/scord-ctl.cpp b/src/scord-ctl/scord-ctl.cpp index 375efea5..d616dabf 100644 --- a/src/scord-ctl/scord-ctl.cpp +++ b/src/scord-ctl/scord-ctl.cpp @@ -33,13 +33,13 @@ #include #include -#include #include #include "rpc_handlers.hpp" #include "env.hpp" namespace fs = std::filesystem; namespace bpo = boost::program_options; +using namespace std::literals; void print_version(const std::string& progname) { @@ -178,17 +178,14 @@ main(int argc, char* argv[]) { try { scord::network::server daemon(cfg); -#if 0 - const auto rpc_registration_cb = [](auto&& ctx) { - LOGGER_INFO(" * Registering RPCs handlers..."); +// convenience macro to ensure the names of an RPC and its handler +// always match +#define EXPAND(rpc_name) "ADM_" #rpc_name##s, scord::network::handlers::rpc_name - REGISTER_RPC(ctx, "ADM_ping", void, ADM_ping_out_t, ADM_ping, true); + daemon.set_handler(EXPAND(ping)); - // TODO: add internal RPCs for communication with scord - }; +#undef EXPAND - daemon.configure(cfg, rpc_registration_cb); -#endif return daemon.run(); } catch(const std::exception& ex) { fmt::print(stderr, -- GitLab From e65006a1c30bdbaffbb069a21603062c37cf1c7c Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 7 Feb 2023 16:42:33 +0100 Subject: [PATCH 21/23] Cleanup: Remove unused files --- src/common/net/detail/address.hpp | 74 ------ src/common/net/engine.hpp | 424 ------------------------------ 2 files changed, 498 deletions(-) delete mode 100644 src/common/net/detail/address.hpp delete mode 100644 src/common/net/engine.hpp diff --git a/src/common/net/detail/address.hpp b/src/common/net/detail/address.hpp deleted file mode 100644 index 33455947..00000000 --- a/src/common/net/detail/address.hpp +++ /dev/null @@ -1,74 +0,0 @@ -#ifndef SCORD_NETWORK_DETAIL_ADDRESS_HPP -#define SCORD_NETWORK_DETAIL_ADDRESS_HPP - -// C includes -#include - -namespace scord::network::detail { - -/** A simple RAII wrapper for hg_addr_t. This way we can keep track of - * generated mercury addresses both in enddpoints and in the address cache - * using std::shared_ptr
(), and only free them when the last referrer - * dies, which is convenient */ -struct address { - - static address - self_address(const hg_class_t* hg_class) { - - hg_addr_t self_addr; - hg_return_t ret = - HG_Addr_self(const_cast(hg_class), &self_addr); - - if(ret != HG_SUCCESS) { - throw std::runtime_error("Failed to retrieve self address: " + - std::string(HG_Error_to_string(ret))); - } - - return {hg_class, self_addr}; - } - - address() : m_hg_class(NULL), m_hg_addr(HG_ADDR_NULL) {} - - address(const hg_class_t* hg_class, hg_addr_t hg_addr) - : m_hg_class(hg_class), m_hg_addr(hg_addr) {} - - address(address&& rhs) - : m_hg_class(std::move(rhs.m_hg_class)), - m_hg_addr(std::move(rhs.m_hg_addr)) { - - rhs.m_hg_class = NULL; - rhs.m_hg_addr = HG_ADDR_NULL; - } - - address& - operator=(address&& rhs) { - - if(this != &rhs) { - m_hg_class = std::move(rhs.m_hg_class); - m_hg_addr = std::move(rhs.m_hg_addr); - - rhs.m_hg_class = NULL; - rhs.m_hg_addr = HG_ADDR_NULL; - } - - return *this; - } - - ~address() { - if(m_hg_class != NULL && m_hg_addr != HG_ADDR_NULL) { - HG_Addr_free(const_cast(m_hg_class), m_hg_addr); - } - } - - hg_addr_t - mercury_address() const { - return m_hg_addr; - } - - const hg_class_t* m_hg_class; - hg_addr_t m_hg_addr; -}; - -} // namespace scord::network::detail - -#endif // SCORD_NETWORK_DETAIL_ADDRESS_HPP diff --git a/src/common/net/engine.hpp b/src/common/net/engine.hpp deleted file mode 100644 index 5822fa50..00000000 --- a/src/common/net/engine.hpp +++ /dev/null @@ -1,424 +0,0 @@ -/****************************************************************************** - * Copyright 2021-2022, Barcelona Supercomputing Center (BSC), Spain - * - * This software was partially supported by the EuroHPC-funded project ADMIRE - * (Project ID: 956748, https://www.admire-eurohpc.eu). - * - * This file is part of scord. - * - * scord is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * scord is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with scord. If not, see . - * - * SPDX-License-Identifier: GPL-3.0-or-later - *****************************************************************************/ - -#ifndef SCORD_NETWORK_ENGINE_HPP -#define SCORD_NETWORK_ENGINE_HPP - -#include -#include -#include -#include -#include -#include "detail/address.hpp" - - -namespace scord::network { - -namespace detail { - -#define REGISTER_RPC(__engine, __func_name, __in_t, __out_t, __handler, \ - requires_response) \ - { \ - REGISTER_RPC_IMPL((__engine)->m_context->m_mid, \ - (__engine)->m_context->m_rpc_names, __func_name, \ - __in_t, __out_t, __handler, requires_response); \ - } - -#define REGISTER_RPC_IMPL(__mid, __rpc_names, __func_name, __in_t, __out_t, \ - __handler, requires_response) \ - { \ - hg_id_t id = margo_provider_register_name( \ - __mid, __func_name, BOOST_PP_CAT(hg_proc_, __in_t), \ - BOOST_PP_CAT(hg_proc_, __out_t), _handler_for_##__handler, \ - MARGO_DEFAULT_PROVIDER_ID, ABT_POOL_NULL); \ - (__rpc_names).emplace(__func_name, id); \ - if(!(requires_response)) { \ - ::margo_registered_disable_response(__mid, id, HG_TRUE); \ - } \ - } - -#define _handler_for___null NULL - -struct margo_context { - - explicit margo_context(::margo_instance_id mid) : m_mid(mid) {} - - margo_instance_id m_mid; - std::unordered_map m_rpc_names; -}; - -} // namespace detail - -// forward declarations -struct endpoint; - -namespace utils { - -std::string -get_address(hg_handle_t h); - -} // namespace utils - -struct engine { - - enum class execution_mode : bool { - server = MARGO_SERVER_MODE, - client = MARGO_CLIENT_MODE - }; - - explicit engine(std::string_view address, - execution_mode = execution_mode::client) { - struct margo_init_info info = MARGO_INIT_INFO_INITIALIZER; - - m_context = std::make_shared( - margo_init_ext(address.data(), MARGO_SERVER_MODE, &info)); - - if(m_context->m_mid == MARGO_INSTANCE_NULL) { - throw std::runtime_error("Margo initialization failed"); - } - } - - ~engine() { - if(m_context) { - ::margo_finalize(m_context->m_mid); - } - } - - void - listen() const { - - /* NOTE: there isn't anything else for the server to do at this point - * except wait for itself to be shut down. The - * margo_wait_for_finalize() call here yields to let Margo drive - * progress until that happens. - */ - ::margo_wait_for_finalize(m_context->m_mid); - } - - void - stop() { - ::margo_finalize(m_context->m_mid); - - // It is not safe to access m_margo_context->m_mid after the - // margo_finalize() call. Make sure that no other threads can do a - // double margo_finalize() (e.g when calling ~engine()) by resetting - // m_margo_context. - m_context.reset(); - } - - endpoint - lookup(const std::string& address) const; - - std::string - self_address() const { - - struct addr_handle { - addr_handle(margo_instance_id mid, hg_addr_t addr) - : m_mid(mid), m_addr(addr) {} - - ~addr_handle() { - if(m_addr) { - margo_addr_free(m_mid, m_addr); - } - } - - hg_addr_t - native() const { - return m_addr; - } - - margo_instance_id m_mid; - hg_addr_t m_addr; - }; - - const auto self_addr = addr_handle{ - m_context->m_mid, [mid = m_context->m_mid]() -> hg_addr_t { - hg_addr_t tmp; - - hg_return_t ret = margo_addr_self(mid, &tmp); - - if(ret != HG_SUCCESS) { - LOGGER_WARN(fmt::format( - "Error finding out self address: {}", - HG_Error_to_string(ret))); - return nullptr; - } - - return tmp; - }()}; - - if(!self_addr.native()) { - return "unknown"; - } - - hg_size_t expected_length; - hg_return_t ret = - margo_addr_to_string(m_context->m_mid, nullptr, - &expected_length, self_addr.native()); - - if(ret != HG_SUCCESS) { - LOGGER_WARN(fmt::format("Error finding out self address: {}", - HG_Error_to_string(ret))); - return "unknown"; - } - - std::vector tmp; - tmp.reserve(expected_length); - - ret = margo_addr_to_string(m_context->m_mid, tmp.data(), - &expected_length, self_addr.native()); - - if(ret != HG_SUCCESS) { - LOGGER_WARN(fmt::format("Error finding out self address: {}", - HG_Error_to_string(ret))); - return "unknown"; - } - - return {tmp.data()}; - } - - std::shared_ptr m_context; -}; - -template -class rpc_handle { -public: - rpc_handle(hg_handle_t handle, Output output) - : m_handle(handle), m_output(output) {} - - ~rpc_handle() { - - if(m_handle) { - - if(m_output) { - margo_free_output(m_handle, m_output); - } - - margo_destroy(m_handle); - } - } - - hg_handle_t - native() { - return m_handle; - } - - std::string - origin() const { - return utils::get_address(m_handle); - } - -private: - hg_handle_t m_handle; - Output m_output; -}; - - -struct endpoint { -private: - // Endpoints should only be created by calling engine::lookup() - friend class engine; - - endpoint(std::shared_ptr context, - std::shared_ptr address) - : m_margo_context(std::move(context)), m_address(std::move(address)) {} - -public: - endpoint(const endpoint& /*other*/) = default; - endpoint& - operator=(const endpoint& /*other*/) = default; - endpoint(endpoint&& /*rhs*/) = default; - endpoint& - operator=(endpoint&& /*rhs*/) = default; - - template - void - call(const std::string& id, Args&&... args) { - - const auto it = m_margo_context->m_rpc_names.find(id); - - if(it == m_margo_context->m_rpc_names.end()) { - throw std::runtime_error( - fmt::format("Unknown remote procedure: {}", id)); - } - - hg_handle_t handle; - auto ret = ::margo_create(m_margo_context->m_mid, - m_address->mercury_address(), it->second, - &handle); - if(ret != HG_SUCCESS) { - throw std::runtime_error( - fmt::format("Error during endpoint::call(): {}", - ::HG_Error_to_string(ret))); - } - - ret = ::margo_forward(handle, nullptr); - - if(ret != HG_SUCCESS) { - throw std::runtime_error( - fmt::format("Error during endpoint::call(): {}", - ::HG_Error_to_string(ret))); - } - - ret = ::margo_destroy(handle); - - if(ret != HG_SUCCESS) { - throw std::runtime_error( - fmt::format("Error during endpoint::call(): {}", - ::HG_Error_to_string(ret))); - } - } - - /** - * Deprecated call, used to support Margo directly - * - **/ - template - [[nodiscard]] rpc_handle - call(const std::string& id, T1 input = nullptr, T2 output = nullptr) { - - const auto it = m_margo_context->m_rpc_names.find(id); - - if(it == m_margo_context->m_rpc_names.end()) { - throw std::runtime_error( - fmt::format("Unknown remote procedure: {}", id)); - } - - hg_handle_t handle; - auto ret = ::margo_create(m_margo_context->m_mid, - m_address->mercury_address(), it->second, - &handle); - if(ret != HG_SUCCESS) { - throw std::runtime_error( - fmt::format("Error during endpoint::call(): {}", - ::HG_Error_to_string(ret))); - } - - ret = ::margo_forward(handle, input); - - if(ret != HG_SUCCESS) { - throw std::runtime_error( - fmt::format("Error during endpoint::call(): {}", - ::HG_Error_to_string(ret))); - } - - if(output != nullptr) { - ret = ::margo_get_output(handle, output); - } - - return rpc_handle{handle, output}; - } - -private: - std::shared_ptr m_margo_context; - std::shared_ptr m_address; -}; - -// now that we have the complete definition of engine and endpoint, we can -// finally define engine::lookup completely -inline endpoint -engine::lookup(const std::string& address) const { - - hg_addr_t svr_addr; - auto ret = - ::margo_addr_lookup(m_context->m_mid, address.c_str(), &svr_addr); - if(ret != HG_SUCCESS) { - throw std::runtime_error( - fmt::format("Error during engine::lookup(): {}", - ::HG_Error_to_string(ret))); - } - - return {m_context, std::make_shared( - ::margo_get_class(m_context->m_mid), svr_addr)}; -} - - -struct rpc_client : engine { - explicit rpc_client(const std::string& protocol) - : engine(protocol, execution_mode::client) {} - - template - rpc_client(const std::string& protocol, - Callback&& rpc_registration_callback) - : engine(protocol, execution_mode::client) { - rpc_registration_callback(this); - } -}; - -struct rpc_acceptor : engine { - - static std::string - format_address(const std::string& protocol, const std::string& address, - int port) { - return fmt::format("{}://{}:{}", protocol, address, port); - } - - rpc_acceptor(const std::string& protocol, const std::string& bind_address, - int port) - : engine(format_address(protocol, bind_address, port)) {} -}; - -namespace utils { - -inline std::string -get_address(hg_handle_t h) { - - const hg_info* hgi = margo_get_info(h); - - if(!hgi) { - LOGGER_WARN("Unable to get information from hg_handle"); - return "unknown"; - } - - margo_instance_id mid = margo_hg_handle_get_instance(h); - - hg_size_t expected_length; - hg_return_t ret = - margo_addr_to_string(mid, nullptr, &expected_length, hgi->addr); - - if(ret != HG_SUCCESS) { - LOGGER_WARN("Error finding out client address: {}", - HG_Error_to_string(ret)); - return "unknown"; - } - - std::vector tmp; - tmp.reserve(expected_length); - - ret = margo_addr_to_string(mid, tmp.data(), &expected_length, hgi->addr); - - if(ret != HG_SUCCESS) { - LOGGER_WARN("Error finding out client address: {}", - HG_Error_to_string(ret)); - return "unknown"; - } - - return {tmp.data()}; -} - -} // namespace utils - -} // namespace scord::network - -#endif // SCORD_NETWORK_ENGINE_HPP -- GitLab From 2d426c7e2fd754b60636c8de9df131f2d483e6a9 Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Fri, 10 Feb 2023 14:50:04 +0100 Subject: [PATCH 22/23] rpc_client: Prevent Thallium automatic conversions when not needed --- src/common/net/endpoint.hpp | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/common/net/endpoint.hpp b/src/common/net/endpoint.hpp index 4f0dee24..208498ea 100644 --- a/src/common/net/endpoint.hpp +++ b/src/common/net/endpoint.hpp @@ -41,24 +41,16 @@ public: address() const; template - auto + inline std::optional> call(const std::string& rpc_name, Args&&... args) const { - // deduce the return type of the expression in the try-block below so - // that we know the type to return within std::optional - using rpc_function_type = - decltype(m_engine->define(std::declval())); - using rpc_return_type = decltype(std::declval().on( - m_endpoint)(std::forward(args)...)); - using return_type = std::optional; - try { - const auto& rpc = m_engine->define(rpc_name); - const auto& rv = rpc.on(m_endpoint)(std::forward(args)...); - return return_type{rv}; + const auto rpc = m_engine->define(rpc_name); + return std::make_optional( + rpc.on(m_endpoint)(std::forward(args)...)); } catch(const std::exception& ex) { LOGGER_ERROR("endpoint::call() failed: {}", ex.what()); - return return_type{}; + return std::nullopt; } } -- GitLab From 4822db5b041096eff674bc24368a25dc2fb511ff Mon Sep 17 00:00:00 2001 From: Alberto Miranda Date: Tue, 7 Feb 2023 21:37:56 +0100 Subject: [PATCH 23/23] Update Docker images for 0.2.0-wip[-debug] --- docker/0.2.0-wip-debug/Dockerfile | 170 ++++++++++++++++++ docker/0.2.0-wip-debug/Makefile | 4 + .../patches/mochi-thallium.patch | 85 +++++++++ docker/0.2.0-wip/Dockerfile | 163 +++++++++++------ docker/0.2.0-wip/patches/mochi-thallium.patch | 85 +++++++++ 5 files changed, 450 insertions(+), 57 deletions(-) create mode 100644 docker/0.2.0-wip-debug/Dockerfile create mode 100644 docker/0.2.0-wip-debug/Makefile create mode 100644 docker/0.2.0-wip-debug/patches/mochi-thallium.patch create mode 100644 docker/0.2.0-wip/patches/mochi-thallium.patch diff --git a/docker/0.2.0-wip-debug/Dockerfile b/docker/0.2.0-wip-debug/Dockerfile new file mode 100644 index 00000000..0caa65c0 --- /dev/null +++ b/docker/0.2.0-wip-debug/Dockerfile @@ -0,0 +1,170 @@ +FROM debian:testing-slim + +LABEL Description="Debian-based environment suitable to build scord" + +ENV DEPS_INSTALL_PATH /usr/local + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + git \ + curl \ + ca-certificates \ + libtool \ + pkg-config \ + make \ + automake \ + gcc \ + g++ \ + g++-11 \ + vim \ + gdb \ + procps \ + # AGIOS dependencies + libconfig-dev \ + # Mercury dependencies + libltdl-dev \ + lbzip2 \ + # Margo dependencies \ + libjson-c-dev \ + # GekkoFS dependencies + libboost-program-options-dev \ + uuid-dev \ + python3 \ + libyaml-dev libcurl4-openssl-dev procps \ + # genopts dependencies + python3-venv \ + # redis-plus-plus dependencies \ + libhiredis-dev \ + # tests dependencies \ + python3-pip && \ + ### install cmake 3.23.1 ################################################### + curl -OL https://github.com/Kitware/CMake/releases/download/v3.23.1/cmake-3.23.1-Linux-x86_64.sh && \ + chmod u+x ./cmake-3.23.1-Linux-x86_64.sh && \ + ./cmake-3.23.1-Linux-x86_64.sh --skip-license --prefix=${DEPS_INSTALL_PATH} && \ + rm ./cmake-3.23.1-Linux-x86_64.sh && \ + ########################################################################### + ### DEPENDENCIES + ########################################################################### \ + cd && \ + mkdir deps && cd deps && \ + git clone https://github.com/jbeder/yaml-cpp --recurse-submodules && \ + git clone https://github.com/json-c/json-c --recurse-submodules && \ + git clone https://github.com/ofiwg/libfabric --recurse-submodules && \ + git clone https://github.com/pmodels/argobots --recurse-submodules && \ + git clone https://github.com/mercury-hpc/mercury --recurse-submodules && \ + git clone https://github.com/mochi-hpc/mochi-margo --recurse-submodules && \ +# cd mochi-margo && git reset --hard v0.9.9 && cd .. && \ + git clone https://github.com/sewenew/redis-plus-plus --recurse-submodules && \ + git clone https://github.com/francielizanon/agios --recurse-submodules && \ + cd agios && git checkout development && cd .. && \ + git clone https://github.com/USCiLab/cereal --recurse-submodules && \ + git clone https://github.com/mochi-hpc/mochi-thallium --recurse-submodules && \ + cd mochi-thallium && \ + export LD_LIBRARY_PATH=${DEPS_INSTALL_PATH}/lib:${DEPS_INSTALL_PATH}/lib64 && \ + export PKG_CONFIG_PATH=${DEPS_INSTALL_PATH}/lib/pkgconfig:${DEPS_INSTALL_PATH}/lib64/pkgconfig && \ + cd && \ + \ + ### yaml-cpp + cd deps/yaml-cpp && \ + mkdir build && cd build && \ + cmake -DCMAKE_INSTALL_PREFIX=${DEPS_INSTALL_PATH} \ + -DYAML_CPP_BUILD_TESTS=OFF \ + .. && \ + make -j install && \ + cd .. && rm -rf build && cd && \ + \ + ### argobots + cd deps/argobots && \ + ./autogen.sh && \ + mkdir build && cd build && \ + CFLAGS="-ggdb3 -O0" ../configure --prefix=${DEPS_INSTALL_PATH} && \ + make install -j && \ + cd .. && rm -rf build && cd && \ + cd deps/libfabric && \ + \ + ### libfabric + ./autogen.sh && \ + mkdir build && cd build && \ + CFLAGS="-ggdb3 -O0" ../configure --prefix=${DEPS_INSTALL_PATH} && \ + make install -j && \ + cd .. && rm -rf build && cd && \ + \ + ### mercury + cd deps/mercury && \ + mkdir build && cd build && \ + cmake -DMERCURY_USE_SELF_FORWARD:BOOL=ON \ + -DBUILD_TESTING:BOOL=ON \ + -DMERCURY_USE_BOOST_PP:BOOL=ON \ + -DCMAKE_INSTALL_PREFIX=${DEPS_INSTALL_PATH} \ + -DBUILD_SHARED_LIBS:BOOL=ON \ + -DNA_USE_OFI:BOOL=ON \ + -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + -DCMAKE_BUILD_TYPE:STRING=Debug \ + .. && \ + make install -j && \ + cd .. && rm -rf build && cd && \ + \ + ### json-c + cd deps/json-c && \ + mkdir build && cd build && \ + cmake -DCMAKE_INSTALL_PREFIX=${DEPS_INSTALL_PATH} \ + .. && \ + make install -j && \ + cd .. && rm -rf build && cd && \ + \ + ### mochi-margo + cd deps/mochi-margo && \ + ./prepare.sh && \ + mkdir build && cd build && \ + CFLAGS="-ggdb3 -O0" ../configure --prefix=${DEPS_INSTALL_PATH} && \ + make -j install && \ + cd .. && rm -rf build && cd && \ + \ + ### redis-plus-plus + cd deps/redis-plus-plus && \ + mkdir build && cd build && \ + cmake -DCMAKE_INSTALL_PREFIX=${DEPS_INSTALL_PATH} \ + -DCMAKE_BUILD_TYPE:STRING=Debug \ + .. && \ + make install -j && \ + cd .. && rm -rf build && cd && \ + \ + ### agios + cd deps/agios && \ + mkdir build && cd build && \ + cmake -DCMAKE_INSTALL_PREFIX=${DEPS_INSTALL_PATH} \ + -DCMAKE_BUILD_TYPE:STRING=Debug \ + .. && \ + make install -j && \ + cd .. && rm -rf build && cd && \ + \ + ### cereal + cd deps/cereal && \ + mkdir build && cd build && \ + cmake -DCMAKE_BUILD_TYPE:STRING=Debug \ + -DBUILD_DOC:BOOL=OFF \ + -DBUILD_SANDBOX:BOOL=OFF \ + -DBUILD_TESTS:BOOL=OFF \ + -DSKIP_PERFORMANCE_COMPARISON:BOOL=ON \ + -DCMAKE_INSTALL_PREFIX=${DEPS_INSTALL_PATH} \ + .. && \ + make -j install && \ + cd .. && rm -rf build && cd && \ + \ + ### mochi-thallium + cd deps/mochi-thallium && \ + mkdir build && cd build && \ + cmake -DCMAKE_INSTALL_PREFIX=${DEPS_INSTALL_PATH} \ + -DCMAKE_BUILD_TYPE:STRING=Debug \ + .. && \ + make -j install && \ + cd .. && rm -rf build && cd && \ + \ + ### python packages for testing scripts\ + pip install lark loguru && \ + \ + ### Cleanup + # Clean apt cache to reduce image layer size + rm -rf /var/lib/apt/lists/* && \ + # Clean apt caches of packages + apt-get clean && apt-get autoclean diff --git a/docker/0.2.0-wip-debug/Makefile b/docker/0.2.0-wip-debug/Makefile new file mode 100644 index 00000000..83601860 --- /dev/null +++ b/docker/0.2.0-wip-debug/Makefile @@ -0,0 +1,4 @@ +.PHONY: all + +all: + docker build -t bscstorage/scord:0.2.0-wip-debug . diff --git a/docker/0.2.0-wip-debug/patches/mochi-thallium.patch b/docker/0.2.0-wip-debug/patches/mochi-thallium.patch new file mode 100644 index 00000000..f9ba1f73 --- /dev/null +++ b/docker/0.2.0-wip-debug/patches/mochi-thallium.patch @@ -0,0 +1,85 @@ +diff --git a/include/thallium/packed_data.hpp b/include/thallium/packed_data.hpp +index 9e6e76e..37e64d3 100644 +--- a/include/thallium/packed_data.hpp ++++ b/include/thallium/packed_data.hpp +@@ -14,13 +14,13 @@ + + namespace thallium { + +-template class callable_remote_procedure_with_context; ++template class callable_remote_procedure_with_context; + class async_response; +-template class request_with_context; ++template class request_with_context; + using request = request_with_context<>; + + namespace detail { +- struct engine_impl; ++struct engine_impl; + } + + /** +@@ -36,9 +36,9 @@ class packed_data { + + private: + std::weak_ptr m_engine_impl; +- hg_handle_t m_handle = HG_HANDLE_NULL; +- hg_return_t (*m_unpack_fn)(hg_handle_t,void*) = nullptr; +- hg_return_t (*m_free_fn)(hg_handle_t,void*) = nullptr; ++ hg_handle_t m_handle = HG_HANDLE_NULL; ++ hg_return_t (*m_unpack_fn)(hg_handle_t, void*) = nullptr; ++ hg_return_t (*m_free_fn)(hg_handle_t, void*) = nullptr; + mutable std::tuple m_context; + + /** +@@ -62,6 +62,41 @@ class packed_data { + MARGO_ASSERT(ret, margo_ref_incr); + } + ++ packed_data(const packed_data&) = delete; ++ packed_data& operator=(const packed_data&) = delete; ++ ++ packed_data(packed_data&& rhs) ++ : m_engine_impl(std::move(rhs.m_engine_impl), ++ m_context(std::move(rhs.m_context))) { ++ m_handle = rhs.m_handle; ++ rhs.m_handle = HG_HANDLE_NULL; ++ m_unpack_fn = rhs.m_unpack_fn; ++ rhs.m_unpack_fn = nullptr; ++ m_free_fn = rhs.m_free_fn; ++ rhs.m_free_fn = nullptr; ++ } ++ ++ packed_data& operator=(packed_data&& rhs) { ++ ++ if(&rhs == this) { ++ return *this; ++ } ++ ++ // the original members m_handle, m_context, and m_handle are being ++ // replaced here by the ones from rhs. It may be necessary to release ++ // their resources if `packed_data` has claimed ownership over them, ++ // otherwise we would be leaking ++ m_engine_impl = std::move(rhs.m_engine_impl); ++ m_context = std::move(rhs.m_context); ++ ++ m_handle = rhs.m_handle; ++ rhs.m_handle = HG_HANDLE_NULL; ++ m_unpack_fn = rhs.m_unpack_fn; ++ rhs.m_unpack_fn = nullptr; ++ m_free_fn = rhs.m_free_fn; ++ rhs.m_free_fn = nullptr; ++ } ++ + packed_data() = default; + + public: +@@ -78,7 +113,7 @@ class packed_data { + * @tparam NewCtxArg Types of the serialization context. + * @param args Context. + */ +- template ++ template + auto with_serialization_context(NewCtxArg&&... args) { + return packed_data...>( + m_unpack_fn, m_free_fn, m_handle, m_engine_impl, diff --git a/docker/0.2.0-wip/Dockerfile b/docker/0.2.0-wip/Dockerfile index b8dd77a8..2250ffce 100644 --- a/docker/0.2.0-wip/Dockerfile +++ b/docker/0.2.0-wip/Dockerfile @@ -2,8 +2,7 @@ FROM debian:bullseye-slim LABEL Description="Debian-based environment suitable to build scord" -ENV INSTALL_PATH /usr/local - +ENV DEPS_INSTALL_PATH /usr/local RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -30,88 +29,138 @@ RUN apt-get update && \ python3 \ libyaml-dev libcurl4-openssl-dev procps \ # genopts dependencies - python3-venv libhiredis-dev && \ - # install cmake 3.14 since it's needed for some dependencies + python3-venv \ + # redis-plus-plus dependencies \ + libhiredis-dev \ + # tests dependencies \ + python3-pip && \ + ### install cmake 3.23.1 ################################################### curl -OL https://github.com/Kitware/CMake/releases/download/v3.23.1/cmake-3.23.1-Linux-x86_64.sh && \ chmod u+x ./cmake-3.23.1-Linux-x86_64.sh && \ - ./cmake-3.23.1-Linux-x86_64.sh --skip-license --prefix=${INSTALL_PATH} && \ - # Clean apt cache to reduce image layer size - rm -rf /var/lib/apt/lists/* && \ - # Clean apt caches of packages - apt-get clean && apt-get autoclean && \ + ./cmake-3.23.1-Linux-x86_64.sh --skip-license --prefix=${DEPS_INSTALL_PATH} && \ rm ./cmake-3.23.1-Linux-x86_64.sh && \ - cmake --version && \ - curl -OL https://github.com/jbeder/yaml-cpp/archive/refs/tags/yaml-cpp-0.6.2.tar.gz && \ - tar -xzf yaml-cpp-0.6.2.tar.gz && \ - cd yaml-cpp-yaml-cpp-0.6.2 && \ - mkdir build && \ - cd build && \ - cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PATH} .. && \ - make -j install && \ + ########################################################################### + ### DEPENDENCIES + ########################################################################### \ cd && \ - rm -rf yaml-cpp-yaml-cpp-0.6.2 && \ - mkdir deps && \ - cd deps && \ + mkdir deps && cd deps && \ git clone https://github.com/jbeder/yaml-cpp --recurse-submodules && \ git clone https://github.com/json-c/json-c --recurse-submodules && \ git clone https://github.com/ofiwg/libfabric --recurse-submodules && \ git clone https://github.com/pmodels/argobots --recurse-submodules && \ git clone https://github.com/mercury-hpc/mercury --recurse-submodules && \ git clone https://github.com/mochi-hpc/mochi-margo --recurse-submodules && \ +# cd mochi-margo && git reset --hard v0.9.9 && cd .. && \ git clone https://github.com/sewenew/redis-plus-plus --recurse-submodules && \ git clone https://github.com/francielizanon/agios --recurse-submodules && \ - export LD_LIBRARY_PATH=${INSTALL_PATH}/lib:${INSTALL_PATH}/lib64 && \ - export PKG_CONFIG_PATH=${INSTALL_PATH}/lib/pkgconfig:${INSTALL_PATH}/lib64/pkgconfig && \ + cd agios && git checkout development && cd .. && \ + git clone https://github.com/USCiLab/cereal --recurse-submodules && \ + git clone https://github.com/mochi-hpc/mochi-thallium --recurse-submodules && \ + cd mochi-thallium && \ + export LD_LIBRARY_PATH=${DEPS_INSTALL_PATH}/lib:${DEPS_INSTALL_PATH}/lib64 && \ + export PKG_CONFIG_PATH=${DEPS_INSTALL_PATH}/lib/pkgconfig:${DEPS_INSTALL_PATH}/lib64/pkgconfig && \ cd && \ + \ + ### yaml-cpp cd deps/yaml-cpp && \ - mkdir build && \ - cd build && \ - cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PATH} -DYAML_CPP_BUILD_TESTS=OFF .. && \ + mkdir build && cd build && \ + cmake -DCMAKE_INSTALL_PREFIX=${DEPS_INSTALL_PATH} \ + -DYAML_CPP_BUILD_TESTS=OFF \ + .. && \ make -j install && \ - cd ../../ && \ - cd argobots && \ + cd .. && rm -rf build && cd && \ + \ + ### argobots + cd deps/argobots && \ ./autogen.sh && \ - ./configure --prefix=${INSTALL_PATH} && \ + mkdir build && cd build && \ + ../configure --prefix=${DEPS_INSTALL_PATH} && \ make install -j && \ - cd .. && \ - cd libfabric && \ + cd .. && rm -rf build && cd && \ + cd deps/libfabric && \ + \ + ### libfabric ./autogen.sh && \ - ./configure --prefix=${INSTALL_PATH} && \ + mkdir build && cd build && \ + ../configure --prefix=${DEPS_INSTALL_PATH} && \ make install -j && \ - cd .. && \ - cd mercury && \ - mkdir build && \ - cd build && \ - cmake -DMERCURY_USE_SELF_FORWARD:BOOL=ON -DBUILD_TESTING:BOOL=ON -DMERCURY_USE_BOOST_PP:BOOL=ON -DCMAKE_INSTALL_PREFIX=${INSTALL_PATH} -DBUILD_SHARED_LIBS:BOOL=ON -DNA_USE_OFI:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_BUILD_TYPE:STRING=Release .. && \ + cd .. && rm -rf build && cd && \ + \ + ### mercury + cd deps/mercury && \ + mkdir build && cd build && \ + cmake -DMERCURY_USE_SELF_FORWARD:BOOL=ON \ + -DBUILD_TESTING:BOOL=ON \ + -DMERCURY_USE_BOOST_PP:BOOL=ON \ + -DCMAKE_INSTALL_PREFIX=${DEPS_INSTALL_PATH} \ + -DBUILD_SHARED_LIBS:BOOL=ON \ + -DNA_USE_OFI:BOOL=ON \ + -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + .. && \ make install -j && \ - cd ../.. && \ - cd json-c && \ - mkdir build && \ - cd build && \ - cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PATH} .. && \ + cd .. && rm -rf build && cd && \ + \ + ### json-c + cd deps/json-c && \ + mkdir build && cd build && \ + cmake -DCMAKE_INSTALL_PREFIX=${DEPS_INSTALL_PATH} \ + .. && \ make install -j && \ - cd ../../ && \ - cd mochi-margo && \ + cd .. && rm -rf build && cd && \ + \ + ### mochi-margo + cd deps/mochi-margo && \ ./prepare.sh && \ - PKG_CONFIG_PATH=${INSTALL_PATH}/lib/pkgconfig:${INSTALL_PATH}/lib64/pkgconfig ./configure --prefix=${INSTALL_PATH} && \ + mkdir build && cd build && \ + ../configure --prefix=${DEPS_INSTALL_PATH} && \ make -j install && \ - cd && \ + cd .. && rm -rf build && cd && \ + \ + ### redis-plus-plus cd deps/redis-plus-plus && \ - mkdir build && \ - cd build && \ - cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PATH} .. && \ + mkdir build && cd build && \ + cmake -DCMAKE_INSTALL_PREFIX=${DEPS_INSTALL_PATH} \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + .. && \ make install -j && \ - cd ../../ && \ - cd && \ + cd .. && rm -rf build && cd && \ + \ + ### agios cd deps/agios && \ - git checkout development && \ - mkdir build && \ - cd build && \ - cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PATH} .. && \ + mkdir build && cd build && \ + cmake -DCMAKE_INSTALL_PREFIX=${DEPS_INSTALL_PATH} \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + .. && \ make install -j && \ - cd ../../ && \ - cd && \ - rm -rf deps && \ + cd .. && rm -rf build && cd && \ + \ + ### cereal + cd deps/cereal && \ + mkdir build && cd build && \ + cmake -DCMAKE_BUILD_TYPE:STRING=Release \ + -DBUILD_DOC:BOOL=OFF \ + -DBUILD_SANDBOX:BOOL=OFF \ + -DBUILD_TESTS:BOOL=OFF \ + -DSKIP_PERFORMANCE_COMPARISON:BOOL=ON \ + -DCMAKE_INSTALL_PREFIX=${DEPS_INSTALL_PATH} \ + .. && \ + make -j install && \ + cd .. && rm -rf build && cd && \ + \ + ### mochi-thallium + cd deps/mochi-thallium && \ + mkdir build && cd build && \ + cmake -DCMAKE_INSTALL_PREFIX=${DEPS_INSTALL_PATH} \ + -DCMAKE_BUILD_TYPE:STRING=Release \ + .. && \ + make -j install && \ + cd .. && rm -rf build && cd && \ + \ + ### python packages for testing scripts\ + pip install lark loguru && \ + \ + ### Cleanup # Clean apt cache to reduce image layer size rm -rf /var/lib/apt/lists/* && \ # Clean apt caches of packages diff --git a/docker/0.2.0-wip/patches/mochi-thallium.patch b/docker/0.2.0-wip/patches/mochi-thallium.patch new file mode 100644 index 00000000..f9ba1f73 --- /dev/null +++ b/docker/0.2.0-wip/patches/mochi-thallium.patch @@ -0,0 +1,85 @@ +diff --git a/include/thallium/packed_data.hpp b/include/thallium/packed_data.hpp +index 9e6e76e..37e64d3 100644 +--- a/include/thallium/packed_data.hpp ++++ b/include/thallium/packed_data.hpp +@@ -14,13 +14,13 @@ + + namespace thallium { + +-template class callable_remote_procedure_with_context; ++template class callable_remote_procedure_with_context; + class async_response; +-template class request_with_context; ++template class request_with_context; + using request = request_with_context<>; + + namespace detail { +- struct engine_impl; ++struct engine_impl; + } + + /** +@@ -36,9 +36,9 @@ class packed_data { + + private: + std::weak_ptr m_engine_impl; +- hg_handle_t m_handle = HG_HANDLE_NULL; +- hg_return_t (*m_unpack_fn)(hg_handle_t,void*) = nullptr; +- hg_return_t (*m_free_fn)(hg_handle_t,void*) = nullptr; ++ hg_handle_t m_handle = HG_HANDLE_NULL; ++ hg_return_t (*m_unpack_fn)(hg_handle_t, void*) = nullptr; ++ hg_return_t (*m_free_fn)(hg_handle_t, void*) = nullptr; + mutable std::tuple m_context; + + /** +@@ -62,6 +62,41 @@ class packed_data { + MARGO_ASSERT(ret, margo_ref_incr); + } + ++ packed_data(const packed_data&) = delete; ++ packed_data& operator=(const packed_data&) = delete; ++ ++ packed_data(packed_data&& rhs) ++ : m_engine_impl(std::move(rhs.m_engine_impl), ++ m_context(std::move(rhs.m_context))) { ++ m_handle = rhs.m_handle; ++ rhs.m_handle = HG_HANDLE_NULL; ++ m_unpack_fn = rhs.m_unpack_fn; ++ rhs.m_unpack_fn = nullptr; ++ m_free_fn = rhs.m_free_fn; ++ rhs.m_free_fn = nullptr; ++ } ++ ++ packed_data& operator=(packed_data&& rhs) { ++ ++ if(&rhs == this) { ++ return *this; ++ } ++ ++ // the original members m_handle, m_context, and m_handle are being ++ // replaced here by the ones from rhs. It may be necessary to release ++ // their resources if `packed_data` has claimed ownership over them, ++ // otherwise we would be leaking ++ m_engine_impl = std::move(rhs.m_engine_impl); ++ m_context = std::move(rhs.m_context); ++ ++ m_handle = rhs.m_handle; ++ rhs.m_handle = HG_HANDLE_NULL; ++ m_unpack_fn = rhs.m_unpack_fn; ++ rhs.m_unpack_fn = nullptr; ++ m_free_fn = rhs.m_free_fn; ++ rhs.m_free_fn = nullptr; ++ } ++ + packed_data() = default; + + public: +@@ -78,7 +113,7 @@ class packed_data { + * @tparam NewCtxArg Types of the serialization context. + * @param args Context. + */ +- template ++ template + auto with_serialization_context(NewCtxArg&&... args) { + return packed_data...>( + m_unpack_fn, m_free_fn, m_handle, m_engine_impl, -- GitLab